UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

628 lines 24.1 kB
/** * Base File Processor Abstract Class * * Provides common functionality for downloading, validating, and processing files * from any source (URLs, buffers, cloud storage, etc.) * * This class uses the Template Method pattern to provide a consistent processing * pipeline while allowing subclasses to customize specific steps. * * Key features: * - Support for both URL downloads and direct buffer input * - Configurable retry with exponential backoff * - Gzip decompression support * - Structured error handling with user-friendly messages * - File type validation by MIME type and extension * - Size limit enforcement * * @module processors/base/BaseFileProcessor * * @example * ```typescript * class ImageProcessor extends BaseFileProcessor<ProcessedImage> { * constructor() { * super({ * maxSizeMB: 10, * timeoutMs: 30000, * supportedMimeTypes: ['image/jpeg', 'image/png'], * supportedExtensions: ['.jpg', '.jpeg', '.png'], * fileTypeName: 'image', * defaultFilename: 'image.jpg', * }); * } * * protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedImage { * return { * buffer, * mimetype: fileInfo.mimetype, * size: buffer.length, * filename: this.getFilename(fileInfo), * // ... additional image-specific fields * }; * } * } * ``` */ import { promisify } from "util"; import { gunzip } from "zlib"; import { SIZE_LIMITS } from "../config/index.js"; import { isAbortError } from "../../utils/errorHandling.js"; import { withSpan } from "../../telemetry/withSpan.js"; import { tracers } from "../../telemetry/tracers.js"; import { createFileError, extractHttpStatus, FileErrorCode, isRetryableError, } from "../errors/index.js"; import { DEFAULT_RETRY_CONFIG } from "../../types/index.js"; const gunzipAsync = promisify(gunzip); /** * Abstract base class for file processors. * Provides common download, validation, and error handling functionality. * * @typeParam T - The type of processed result, must extend ProcessedFileBase */ export class BaseFileProcessor { /** Processor configuration */ config; /** * Creates a new file processor with the given configuration. * * @param config - Processor configuration */ constructor(config) { this.config = config; } /** * Get the processor configuration. * Provides read-only access to processor config for external consumers * (e.g., ProcessorRegistry, FileProcessorIntegration) without requiring * unsafe casts to access the protected field. * * @returns Readonly processor configuration */ getConfig() { return this.config; } // =========================================================================== // PUBLIC API // =========================================================================== /** * Process a single file. * Main entry point - implements the Template Method pattern. * * @param fileInfo - File information (can include URL or buffer) * @param options - Optional processing options (auth headers, timeout, retry config) * @returns Processing result with success flag and either data or error * * @example * ```typescript * const result = await processor.processFile(fileInfo, { * authHeaders: { 'Authorization': 'Bearer token' }, * timeout: 60000, * }); * * if (result.success) { * console.log('Processed:', result.data.filename); * } else { * console.error('Failed:', result.error.userMessage); * } * ``` */ async processFile(fileInfo, options) { return withSpan({ name: "neurolink.file.process", tracer: tracers.file, attributes: { "file.processor": this.constructor.name, "file.type": this.config.fileTypeName, "file.mimetype": fileInfo.mimetype ?? "unknown", "file.name": fileInfo.name ?? "unknown", }, }, async (_span) => { try { // Step 1: Validate file type and size const validationResult = this.validateFileWithResult(fileInfo); if (!validationResult.success) { return { success: false, error: validationResult.error, }; } // Step 2: Get file buffer (from direct buffer or download from URL) let buffer; if (fileInfo.buffer) { // Direct buffer provided - skip download buffer = fileInfo.buffer; } else if (fileInfo.url) { // Download from URL const downloadResult = await this.downloadFileWithRetry(fileInfo, options); if (!downloadResult.success) { return { success: false, error: downloadResult.error, }; } if (!downloadResult.data) { return { success: false, error: this.createError(FileErrorCode.DOWNLOAD_FAILED, { reason: "Download succeeded but returned no data", }), }; } buffer = downloadResult.data; // Validate actual downloaded size against limit if (!this.validateFileSize(buffer.length)) { return { success: false, error: this.createError(FileErrorCode.FILE_TOO_LARGE, { sizeMB: (buffer.length / (1024 * 1024)).toFixed(2), maxMB: this.config.maxSizeMB, type: this.config.fileTypeName, }), }; } } else { // No buffer or URL provided return { success: false, error: this.createError(FileErrorCode.DOWNLOAD_FAILED, { reason: "No buffer or URL provided for file", }), }; } // Step 3: Post-download validation (subclasses can override) const postValidationResult = await this.validateDownloadedFileWithResult(buffer, fileInfo); if (!postValidationResult.success) { return { success: false, error: postValidationResult.error, }; } // Step 4: Build processed result using template method return await this.buildProcessedResultWithResult(buffer, fileInfo); } catch (error) { // Catch any unexpected errors return { success: false, error: this.createError(FileErrorCode.UNKNOWN_ERROR, { error: error instanceof Error ? error.message : String(error) }, error instanceof Error ? error : undefined), }; } }); // end withSpan } /** * Process multiple files with detailed summary. * * @param fileIds - Array of file IDs to process * @param getFileInfo - Function to retrieve file info by ID * @param options - Optional processing options * @returns Summary with processed, failed, and skipped files * * @example * ```typescript * const summary = await processor.processFiles( * ['file1', 'file2', 'file3'], * async (id) => await fetchFileInfo(id), * { authHeaders: { 'Authorization': 'Bearer token' } } * ); * * console.log(`Success: ${summary.processedFiles.length}`); * console.log(`Failed: ${summary.failedFiles.length}`); * ``` */ async processFiles(fileIds, getFileInfo, options) { const results = []; const processedFiles = []; const failedFiles = []; const skippedFiles = []; const warnings = []; for (const fileId of fileIds) { const fileInfo = await getFileInfo(fileId); if (!fileInfo) { failedFiles.push({ fileId, filename: "unknown", mimetype: "unknown", size: 0, error: this.createError(FileErrorCode.FILE_NOT_FOUND), }); continue; } const result = await this.processFile(fileInfo, options); if (result.success && result.data) { results.push(result.data); processedFiles.push({ fileId: fileInfo.id, filename: fileInfo.name || "unknown", mimetype: fileInfo.mimetype, size: fileInfo.size, processorType: this.config.fileTypeName, }); } else if (result.error) { // Check if this is a "skipped" case vs hard failure if (result.error.code === FileErrorCode.UNSUPPORTED_TYPE) { skippedFiles.push({ fileId: fileInfo.id, filename: fileInfo.name || "unknown", mimetype: fileInfo.mimetype, size: fileInfo.size, reason: result.error.message, }); } else { failedFiles.push({ fileId: fileInfo.id, filename: fileInfo.name || "unknown", mimetype: fileInfo.mimetype, size: fileInfo.size, error: result.error, }); } } } return { totalFiles: fileIds.length, processedFiles, failedFiles, skippedFiles, warnings, results, }; } /** * Check if a file is supported by this processor. * * @param mimetype - MIME type of the file * @param filename - Filename (for extension-based detection) * @returns true if the file type is supported * * @example * ```typescript * if (processor.isFileSupported('image/jpeg', 'photo.jpg')) { * // Process the file * } * ``` */ isFileSupported(mimetype, filename) { return (this.isSupportedMimeType(mimetype) || this.isSupportedExtension(filename)); } // =========================================================================== // PROTECTED METHODS - Can be overridden by subclasses // =========================================================================== /** * Validate downloaded file buffer. * Override for custom post-download validation (e.g., magic bytes). * * @param _buffer - Downloaded file content * @param _fileInfo - Original file information * @returns null if valid, error message if invalid */ async validateDownloadedFile(_buffer, _fileInfo) { return null; // No validation by default } /** * Validate downloaded file buffer with structured error result. * Override for custom post-download validation with detailed errors. * * @param buffer - Downloaded file content * @param fileInfo - Original file information * @returns Success result or error result */ async validateDownloadedFileWithResult(buffer, fileInfo) { // Call the legacy validation method for backward compatibility const errorMessage = await this.validateDownloadedFile(buffer, fileInfo); if (errorMessage) { return { success: false, error: this.createError(FileErrorCode.INVALID_FORMAT, { reason: errorMessage, }), }; } return { success: true, data: undefined }; } /** * Build processed result with structured error handling. * Override for custom result building that can fail with errors. * * @param buffer - Downloaded file content * @param fileInfo - Original file information * @returns Success result with data or error result */ async buildProcessedResultWithResult(buffer, fileInfo) { try { const result = await this.buildProcessedResult(buffer, fileInfo); return { success: true, data: result }; } catch (error) { return { success: false, error: this.createError(FileErrorCode.PROCESSING_FAILED, { fileType: this.config.fileTypeName }, error instanceof Error ? error : undefined), }; } } /** * Get filename with default fallback. * * @param fileInfo - File information * @returns Filename or default if not available */ getFilename(fileInfo) { return fileInfo.name || this.config.defaultFilename; } // =========================================================================== // DOWNLOAD METHODS // =========================================================================== /** * Download file from URL with authentication. * * @param url - URL to download from * @param authHeaders - Optional authentication headers * @param timeout - Optional timeout override * @returns Downloaded file content as Buffer * @throws Error if download fails */ async downloadFile(url, authHeaders, timeout) { // Note: We intentionally use AbortController + setTimeout here rather than the shared // withTimeout utility. AbortController.signal cancels the actual HTTP request via // fetch's signal option, while withTimeout only races promises and would leave // the fetch running in the background, consuming network resources. const controller = new AbortController(); const effectiveTimeout = timeout ?? this.config.timeoutMs; const timeoutId = setTimeout(() => { controller.abort(); }, effectiveTimeout); try { const headers = { ...authHeaders, }; const response = await fetch(url, { headers, signal: controller.signal, }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } // Reject HTML responses - likely an error page or redirect const contentType = response.headers.get("Content-Type"); if (contentType && contentType.includes("text/html")) { throw new Error(`Received HTML response instead of file content (Content-Type: ${contentType}). This usually means the download URL returned an error page.`); } const arrayBuffer = await response.arrayBuffer(); let buffer = Buffer.from(arrayBuffer); // Check for gzip encoding and decompress if needed // Only decompress if the data actually starts with gzip magic bytes (0x1f 0x8b) const contentEncoding = response.headers.get("Content-Encoding"); const isActuallyGzipped = buffer.length >= 2 && buffer[0] === 0x1f && buffer[1] === 0x8b; if (contentEncoding?.toLowerCase().includes("gzip") && isActuallyGzipped) { try { buffer = Buffer.from(await gunzipAsync(buffer)); } catch (gzipError) { throw new Error(`Failed to decompress gzip response: ${gzipError instanceof Error ? gzipError.message : String(gzipError)}`, { cause: gzipError }); } } return buffer; } finally { clearTimeout(timeoutId); } } /** * Download file with retry logic for transient failures. * * @param fileInfo - File information with URL * @param options - Processing options including auth headers and retry config * @returns Success result with buffer or error result */ async downloadFileWithRetry(fileInfo, options) { const url = fileInfo.url; if (!url) { return { success: false, error: this.createError(FileErrorCode.DOWNLOAD_FAILED, { reason: "No URL provided for download", }), }; } const retryConfig = options?.retryConfig ?? DEFAULT_RETRY_CONFIG; let lastError; for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) { try { const buffer = await this.downloadFile(url, options?.authHeaders, options?.timeout); return { success: true, data: buffer }; } catch (error) { lastError = error instanceof Error ? error : new Error(String(error)); // Check if we should retry const shouldRetry = attempt < retryConfig.maxRetries && (retryConfig.retryOn ? retryConfig.retryOn(lastError) : isRetryableError(lastError)); if (shouldRetry) { // Calculate delay with exponential backoff const delay = Math.min(retryConfig.baseDelayMs * 2 ** attempt, retryConfig.maxDelayMs); await this.sleep(delay); continue; } // No more retries, return error break; } } // Classify the final error return { success: false, error: this.classifyDownloadError(lastError), }; } // =========================================================================== // VALIDATION METHODS // =========================================================================== /** * Validate file type and size with structured error result. * * @param fileInfo - File information to validate * @returns Success result or error result */ validateFileWithResult(fileInfo) { // Validate file type if (!this.isFileSupported(fileInfo.mimetype, fileInfo.name || "")) { return { success: false, error: this.createError(FileErrorCode.UNSUPPORTED_TYPE, { format: fileInfo.mimetype || fileInfo.name?.split(".").pop() || "unknown", supportedFormats: this.config.supportedMimeTypes.length > 0 ? this.config.supportedMimeTypes.join(", ") : this.config.supportedExtensions.join(", "), type: this.config.fileTypeName, }), }; } // Validate size if (!this.validateFileSize(fileInfo.size)) { const sizeMB = this.formatSizeMB(fileInfo.size); return { success: false, error: this.createError(FileErrorCode.FILE_TOO_LARGE, { sizeMB, maxMB: this.config.maxSizeMB, type: this.config.fileTypeName, }), }; } return { success: true, data: undefined }; } /** * Validate file size against configured maximum. * * @param sizeBytes - File size in bytes * @returns true if size is within limits */ validateFileSize(sizeBytes) { const maxBytes = this.config.maxSizeMB * 1024 * 1024; return sizeBytes <= maxBytes; } /** * Check if file matches supported MIME types. * * @param mimetype - MIME type to check * @returns true if MIME type is supported */ isSupportedMimeType(mimetype) { if (!mimetype) { return false; } return this.config.supportedMimeTypes.includes(mimetype.toLowerCase()); } /** * Check if file matches supported extensions. * * @param filename - Filename to check * @returns true if extension is supported */ isSupportedExtension(filename) { if (!filename) { return false; } const lowerFilename = filename.toLowerCase(); return this.config.supportedExtensions.some((ext) => lowerFilename.endsWith(ext)); } // =========================================================================== // UTILITY METHODS // =========================================================================== /** * Format file size in MB with 2 decimal places. * * @param sizeBytes - Size in bytes * @returns Formatted size string */ formatSizeMB(sizeBytes) { return (sizeBytes / (1024 * 1024)).toFixed(2); } /** * Create a structured file processing error. * * @param code - Error code * @param details - Additional error details * @param originalError - Original error that caused this * @returns Structured error object */ createError(code, details, originalError) { return createFileError(code, details, originalError); } /** * Classify a download error into appropriate error code. * * @param error - The error to classify * @returns Structured file processing error */ classifyDownloadError(error) { if (isAbortError(error)) { return this.createError(FileErrorCode.DOWNLOAD_TIMEOUT, { timeoutMs: this.config.timeoutMs }, error); } if (error.message.includes("HTTP")) { const status = extractHttpStatus(error); if (status === 404) { return this.createError(FileErrorCode.FILE_NOT_FOUND, {}, error); } if (status === 401 || status === 403) { return this.createError(FileErrorCode.DOWNLOAD_AUTH_FAILED, { httpStatus: status }, error); } if (status === 429) { return this.createError(FileErrorCode.RATE_LIMITED, {}, error); } return this.createError(FileErrorCode.NETWORK_ERROR, { httpStatus: status }, error); } if (error.message.includes("decompress")) { return this.createError(FileErrorCode.DECOMPRESSION_FAILED, {}, error); } return this.createError(FileErrorCode.DOWNLOAD_FAILED, {}, error); } /** * Sleep for specified milliseconds. * * @param ms - Milliseconds to sleep */ sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } } // =========================================================================== // UTILITY FUNCTIONS // =========================================================================== /** * Get the default text file download timeout. * * @returns Timeout in milliseconds */ export function getDefaultTextTimeout() { // Return a sensible default since we don't have env config return 30000; } /** * Get the default image download timeout. * * @returns Timeout in milliseconds */ export function getDefaultImageTimeout() { return 30000; } /** * Get the default text file max size in MB. * * @returns Max size in megabytes */ export function getDefaultTextMaxSizeMB() { return SIZE_LIMITS.TEXT_MAX_MB; } /** * Get the default image max size in MB. * * @returns Max size in megabytes */ export function getDefaultImageMaxSizeMB() { return SIZE_LIMITS.IMAGE_MAX_MB; } //# sourceMappingURL=BaseFileProcessor.js.map