UNPKG

document-extraction-service

Version:
232 lines (196 loc) 6.46 kB
const FormData = require('form-data'); const { v4: uuidv4 } = require('uuid'); class CustomExtractionRequestValidator { #config; #requestQueue; /** * @param {Object} config - Configuration object */ constructor(config) { if (!config) { throw new Error('Configuration is required'); } this.#validateConfig(config); // Set default strategy ID if not provided if (config.requestBody && !config.requestBody.strategies_batch_id) { config.requestBody.strategies_batch_id = 'fstrat-test'; } this.#config = config; this.#requestQueue = new Map(); // Make instance immutable Object.freeze(this); } /** * Get the current configuration * @returns {Object} */ get config() { return this.#config; } /** * Get pending requests count * @returns {number} */ get pendingRequestsCount() { return this.#requestQueue.size; } #validateConfig(config) { if (!config.endpoint) { throw new Error('Endpoint is required'); } if (!config.headers?.callback_url_pattern) { throw new Error('Required header missing: callback_url_pattern'); } if (config.requestBody) { const requiredFields = ['doc_id']; for (const field of requiredFields) { if (!(field in config.requestBody)) { throw new Error(`Required request body field missing: ${field}`); } } } } /** * Prepare request parameters for document processing * @param {string} docId - Document identifier * @param {string|Object} content - Document content * @param {string} streamId - Stream identifier * @param {string} strategyBatchId - Strategy batch identifier * @returns {Object} Request parameters */ prepareRequest(docId, content, streamId, strategyBatchId = "fstrat-test") { if (!docId || !content || !streamId) { throw new Error('Missing required parameters'); } const traceId = `trace-${Date.now()}-${uuidv4().slice(0, 8)}`; const callbackUrl = this.#config.headers.callback_url_pattern .replace('{{docId}}', docId) .replace('{{streamId}}', streamId); const formData = new FormData(); let contentToAppend; if (Buffer.isBuffer(content)) { contentToAppend = content; } else if (typeof content === 'object') { contentToAppend = JSON.stringify(content); } else { contentToAppend = String(content); } formData.append('document_meta', contentToAppend); formData.append('strategyBatchId', strategyBatchId); formData.append('docId', docId); formData.append('streamId', streamId); const metadata = { traceId, timestamp: Date.now() }; formData.append('metadata', JSON.stringify(metadata)); // Generate a unique boundary const boundary = `----FormBoundary${Date.now()}${Math.random().toString(36).slice(2)}`; // Add entries method for testing formData.entries = function* () { yield ['document_meta', contentToAppend]; yield ['strategyBatchId', strategyBatchId]; yield ['docId', docId]; yield ['streamId', streamId]; yield ['metadata', JSON.stringify(metadata)]; }; // Get the boundary from FormData if available, otherwise use generated boundary const formDataBoundary = formData.getBoundary?.() || boundary; const requestParams = { url: this.#config.endpoint, method: 'POST', timeout: 2 * 24 * 60 * 60 * 1000, // 2 days in milliseconds headers: { 'X-Document-ID': docId, 'X-Trace-ID': traceId, 'X-Callback-URL': callbackUrl, 'Content-Type': `multipart/form-data; boundary=${formDataBoundary}`, ...this.#config.headers }, data: formData, maxBodyLength: Infinity, maxContentLength: Infinity }; // If using node-fetch or other libraries that need manual boundary handling if (formData._boundary) { requestParams.headers['Content-Type'] = `multipart/form-data; boundary=${formData._boundary}`; } this.#requestQueue.set(traceId, { docId, timestamp: Date.now(), content }); return requestParams; } /** * Handle API response * @param {Object} response - API response * @param {string} traceId - Trace identifier * @returns {Object} Processed response */ handleResponse(response, traceId) { if (!response || !traceId || typeof traceId !== 'string') { throw new Error('Invalid response or traceId'); } const requestData = this.#requestQueue.get(traceId); if (!requestData) { throw new Error('No matching request found for traceId'); } try { // Validate response structure const hasValidStatus = response.status && typeof response.status === 'number'; const isSuccess = hasValidStatus && response.status >= 200 && response.status < 300; const hasValidData = response.data && typeof response.data === 'object'; const result = { success: isSuccess && hasValidData, docId: requestData.docId, traceId: traceId }; if (!hasValidStatus || !hasValidData || !isSuccess) { result.success = false; result.error = !hasValidStatus ? 'Invalid response format' : !hasValidData ? 'Missing or invalid response data' : response.data?.message || `Request failed with status ${response.status}`; } this.#requestQueue.delete(traceId); return result; } catch (error) { this.#requestQueue.delete(traceId); return { success: false, docId: requestData.docId, traceId: traceId, error: error.message }; } } /** * Clear request queue * @returns {boolean} */ clearQueue() { this.#requestQueue.clear(); return true; } static validateRequest(docId, content, streamId) { if (!docId || typeof docId !== 'string') { throw new Error('Invalid document ID'); } if (!content) { throw new Error('Content is required'); } if (!streamId || typeof streamId !== 'string') { throw new Error('Invalid stream ID'); } return true; } static validateRequestHeaders(headers) { if (!headers.callback_url_pattern) { throw new Error('Callback URL pattern is required in headers'); } return true; } } module.exports = CustomExtractionRequestValidator;