UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

715 lines (714 loc) 28.7 kB
/** * Processor Registry * * Central registry for file processors with priority-based selection. * Uses singleton pattern to ensure a single source of truth for processor registration. * * All 16 BaseFileProcessor-based processors are auto-registered on first access * via getProcessorRegistry(). Legacy processors (CSV, Image, PDF, PPTX) that use * static methods and don't extend BaseFileProcessor are excluded — they continue * to be routed via the switch/case in FileDetector.processFile(). * * Key features: * - Priority-based processor selection (lower number = higher priority) * - Confidence scoring for match quality * - Alias support for alternative processor names * - Auto-detection and processing of files * - Testing utilities (clear, resetInstance) * * @module processors/registry/ProcessorRegistry * * @example * ```typescript * import { ProcessorRegistry, getProcessorRegistry, PROCESSOR_PRIORITIES } from "./registry/index.js"; * * const registry = await getProcessorRegistry(); * * // Register a processor * registry.register({ * name: "image", * priority: PROCESSOR_PRIORITIES.IMAGE, * processor: new ImageProcessor(), * isSupported: (mimetype, filename) => mimetype.startsWith("image/"), * description: "Processes images for AI vision", * }); * * // Find and use a processor * const match = registry.findProcessor("image/jpeg", "photo.jpg"); * if (match) { * const result = await match.processor.processFile(fileInfo); * } * * // Auto-process a file * const result = await registry.processFile(fileInfo); * ``` */ import { withSpan } from "../../telemetry/withSpan.js"; import { tracers } from "../../telemetry/tracers.js"; // ============================================================================= // UTILITY FUNCTIONS // ============================================================================= /** * Get file extension from filename. * * @param filename - The filename to extract extension from * @returns Lowercase extension with leading dot, or empty string if none */ function getFileExtension(filename) { if (!filename) { return ""; } const lastDot = filename.lastIndexOf("."); if (lastDot === -1) { return ""; } return filename.substring(lastDot).toLowerCase(); } // ============================================================================= // PROCESSOR REGISTRY CLASS // ============================================================================= /** * Central registry for file processors. * Uses singleton pattern and priority-based selection. * * Priority system: Lower number = higher priority * - 5: SVG (before image, processed as text since AI providers often don't support SVG format) * - 10: Image (AI vision) * - 20: PDF (document) * - 30: CSV (tabular data) * - ... * - 130: Config files * * @example * ```typescript * // Get singleton instance * const registry = ProcessorRegistry.getInstance(); * * // Register processors * registry.register({ * name: "pdf", * priority: 20, * processor: pdfProcessor, * isSupported: isPdfFile, * }); * * // Find best processor for a file * const match = registry.findProcessor("application/pdf", "document.pdf"); * ``` */ export class ProcessorRegistry { /** Singleton instance */ static instance = null; /** Map of processor name (lowercase) to registration */ processors = new Map(); /** Map of alias (lowercase) to canonical name (lowercase) */ aliases = new Map(); /** Flag indicating if default processors have been initialized */ initialized = false; /** * Private constructor for singleton pattern. * Use getInstance() to get the registry instance. */ constructor() { } // =========================================================================== // SINGLETON MANAGEMENT // =========================================================================== /** * Get the singleton registry instance. * * @returns The ProcessorRegistry singleton * * @example * ```typescript * const registry = ProcessorRegistry.getInstance(); * ``` */ static getInstance() { if (!ProcessorRegistry.instance) { ProcessorRegistry.instance = new ProcessorRegistry(); } return ProcessorRegistry.instance; } /** * Reset the singleton instance. * Useful for testing to ensure a clean state. * * @example * ```typescript * // In test setup/teardown * ProcessorRegistry.resetInstance(); * ``` */ static resetInstance() { ProcessorRegistry.instance = null; } // =========================================================================== // REGISTRATION // =========================================================================== /** * Register a file processor. * * @typeParam T - The type of processed result * @param registration - Processor registration details * @param options - Registration options (allowDuplicates, overwriteExisting) * @throws Error if processor with same name exists and overwrite not allowed * * @example * ```typescript * registry.register({ * name: "image", * priority: 10, * processor: imageProcessor, * isSupported: (mimetype, filename) => mimetype.startsWith("image/"), * description: "Processes images for AI vision", * aliases: ["img", "picture"], * }); * ``` */ register(registration, options) { const normalizedName = registration.name.toLowerCase(); // Check for existing registration if (this.processors.has(normalizedName)) { if (options?.overwriteExisting) { // Remove old aliases before overwriting this.removeAliasesForProcessor(normalizedName); } else if (options?.allowDuplicates) { // Silently ignore duplicate registration - don't overwrite existing return; } else { throw new Error(`Processor "${registration.name}" is already registered. Use overwriteExisting option to replace it.`); } } // Validate registration if (!registration.name) { throw new Error("Processor name is required"); } if (typeof registration.priority !== "number") { throw new Error("Processor priority must be a number"); } if (!registration.processor) { throw new Error("Processor instance is required"); } if (typeof registration.isSupported !== "function") { throw new Error("isSupported function is required"); } // Register the processor this.processors.set(normalizedName, registration); // Register aliases if (registration.aliases) { for (const alias of registration.aliases) { const normalizedAlias = alias.toLowerCase(); if (normalizedAlias !== normalizedName) { this.aliases.set(normalizedAlias, normalizedName); } } } } /** * Unregister a processor by name. * * @param name - Name of the processor to unregister * @returns true if processor was found and removed, false otherwise * * @example * ```typescript * const removed = registry.unregister("custom-image"); * ``` */ unregister(name) { const normalizedName = name.toLowerCase(); // Remove aliases pointing to this processor this.removeAliasesForProcessor(normalizedName); return this.processors.delete(normalizedName); } /** * Remove all aliases pointing to a processor. * * @param normalizedName - Lowercase processor name */ removeAliasesForProcessor(normalizedName) { const aliasEntries = Array.from(this.aliases.entries()); for (const [alias, target] of aliasEntries) { if (target === normalizedName) { this.aliases.delete(alias); } } } // =========================================================================== // PROCESSOR LOOKUP // =========================================================================== /** * Find the best matching processor for a file. * Uses priority-based selection when multiple processors match. * * @param mimetype - MIME type of the file * @param filename - Filename (for extension-based detection) * @returns Best matching processor or null if none found * * @example * ```typescript * const match = registry.findProcessor("image/jpeg", "photo.jpg"); * if (match) { * console.log(`Using ${match.name} processor`); * const result = await match.processor.processFile(fileInfo); * } * ``` */ findProcessor(mimetype, filename) { const matches = this.findAllProcessors(mimetype, filename); return matches.length > 0 ? matches[0] : null; } /** * Find all matching processors sorted by priority and confidence. * * @param mimetype - MIME type of the file * @param filename - Filename (for extension-based detection) * @returns Array of matching processors, sorted by priority (ascending) then confidence (descending) * * @example * ```typescript * const matches = registry.findAllProcessors("text/plain", "data.txt"); * console.log(`Found ${matches.length} processors that can handle this file`); * * for (const match of matches) { * console.log(`${match.name}: priority=${match.priority}, confidence=${match.confidence}%`); * } * ``` */ findAllProcessors(mimetype, filename) { const matches = []; const processorEntries = Array.from(this.processors.entries()); for (const [name, reg] of processorEntries) { try { if (reg.isSupported(mimetype, filename)) { matches.push({ name, processor: reg.processor, priority: reg.priority, confidence: this.calculateConfidence(mimetype, filename, reg), }); } } catch { // Processor's isSupported threw - skip this processor } } // Sort by priority (lower = first), then by confidence (higher = first) return matches.sort((a, b) => { if (a.priority !== b.priority) { return a.priority - b.priority; } return b.confidence - a.confidence; }); } /** * Get a specific processor by name or alias. * * @param name - Processor name or alias * @returns Processor registration or undefined if not found * * @example * ```typescript * const pdfProcessor = registry.getProcessor("pdf"); * // Also works with aliases * const imageProcessor = registry.getProcessor("img"); * ``` */ getProcessor(name) { const normalizedName = name.toLowerCase(); // Try direct lookup first const direct = this.processors.get(normalizedName); if (direct) { return direct; } // Try alias lookup const canonicalName = this.aliases.get(normalizedName); if (canonicalName) { return this.processors.get(canonicalName); } return undefined; } /** * List all registered processors. * * @returns Array of all processor registrations * * @example * ```typescript * const processors = registry.listProcessors(); * console.log("Registered processors:"); * for (const proc of processors) { * console.log(` ${proc.name} (priority: ${proc.priority})`); * } * ``` */ listProcessors() { return Array.from(this.processors.values()); } /** * Check if a processor is registered. * * @param name - Processor name or alias to check * @returns true if processor is registered * * @example * ```typescript * if (registry.hasProcessor("pdf")) { * console.log("PDF processor is available"); * } * ``` */ hasProcessor(name) { const normalizedName = name.toLowerCase(); return (this.processors.has(normalizedName) || this.aliases.has(normalizedName)); } /** * Get list of supported file types/processor names. * * @returns Array of processor names * * @example * ```typescript * const supportedTypes = registry.getSupportedTypes(); * console.log(`Supported: ${supportedTypes.join(", ")}`); * ``` */ getSupportedTypes() { return Array.from(this.processors.keys()); } // =========================================================================== // FILE PROCESSING // =========================================================================== /** * Auto-detect and process a file using the best matching processor. * * @param fileInfo - File information including content/URL * @param options - Processing options (auth headers, timeout, retry config) * @returns Processing result or null if no processor found * * @example * ```typescript * const result = await registry.processFile(fileInfo, { * authHeaders: { Authorization: "Bearer token" }, * timeout: 60000, * }); * * if (result?.success) { * console.log("Processed:", result.data.filename); * } * ``` */ async processFile(fileInfo, options) { return withSpan({ name: "neurolink.processor.processFile", tracer: tracers.processor, attributes: { "processor.filename": fileInfo.name ?? "unknown", "processor.mimetype": fileInfo.mimetype ?? "unknown", }, }, async () => { const match = this.findProcessor(fileInfo.mimetype, fileInfo.name); if (!match) { return null; } const processor = match.processor; return processor.processFile(fileInfo, options); }); } /** * Process a file with detailed result including error information. * Returns structured result with either data or error details. * * @param fileInfo - File information including content/URL * @param options - Processing options * @returns Result with type, data, and optional error information * * @example * ```typescript * const result = await registry.processWithResult(fileInfo); * * if (result.error) { * console.error(result.error.message); * console.log("Suggestion:", result.error.suggestion); * console.log("Supported types:", result.error.supportedTypes.join(", ")); * } else { * console.log(`Processed as ${result.type}:`, result.data); * } * ``` */ async processWithResult(fileInfo, options) { const match = this.findProcessor(fileInfo.mimetype, fileInfo.name); if (!match) { const extension = getFileExtension(fileInfo.name); const supportedTypes = this.getSupportedTypes(); return { type: "unsupported", data: null, error: { code: "NO_PROCESSOR_FOUND", message: `Unable to process "${fileInfo.name || "file"}": No processor available for this file type.`, filename: fileInfo.name || "unknown", mimetype: fileInfo.mimetype || "unknown", suggestion: this.getSuggestionForFile(fileInfo.mimetype, extension), supportedTypes, }, }; } try { const processor = match.processor; const result = await processor.processFile(fileInfo, options); if (result.success && result.data) { return { type: match.name, data: result.data }; } else { return { type: match.name, data: null, error: { code: "PROCESSING_FAILED", message: `Failed to process "${fileInfo.name || "file"}": ${result.error?.message || "Processor returned no data."}`, filename: fileInfo.name || "unknown", mimetype: fileInfo.mimetype || "unknown", suggestion: "The file may be corrupted or in an unexpected format. Try re-uploading or converting to a standard format.", supportedTypes: this.getSupportedTypes(), }, }; } } catch (error) { return { type: match.name, data: null, error: { code: "PROCESSING_FAILED", message: `Failed to process "${fileInfo.name || "file"}": ${error instanceof Error ? error.message : "Unknown error"}`, filename: fileInfo.name || "unknown", mimetype: fileInfo.mimetype || "unknown", suggestion: "Please check if the file is valid and not corrupted.", supportedTypes: this.getSupportedTypes(), }, }; } } // =========================================================================== // UTILITY METHODS // =========================================================================== /** * Clear all registrations. * Useful for testing to reset state between tests. * * @example * ```typescript * // In test teardown * registry.clear(); * ``` */ clear() { this.processors.clear(); this.aliases.clear(); this.initialized = false; } /** * Check if the registry has been initialized with default processors. * * @returns true if initialized */ isInitialized() { return this.initialized; } /** * Mark the registry as initialized. * Called after default processors have been registered. */ markInitialized() { this.initialized = true; } /** * Calculate confidence score for a processor match. * * @param mimetype - MIME type of the file * @param filename - Filename * @param reg - Processor registration * @returns Confidence score (0-100) */ calculateConfidence(mimetype, filename, reg) { // Check for exact MIME type match in processor config const config = reg.processor.getConfig(); if (config.supportedMimeTypes) { const supportedMimes = config.supportedMimeTypes; if (supportedMimes.includes(mimetype.toLowerCase())) { return 100; // Exact MIME type match } } // Check for MIME type prefix match (e.g., "image/*") const mimePrefix = mimetype.split("/")[0]; if (mimePrefix && reg.name.toLowerCase() === mimePrefix) { return 80; // MIME type category match } // Check for extension match if (config.supportedExtensions) { const supportedExts = config.supportedExtensions; const ext = getFileExtension(filename); if (ext && supportedExts.some((e) => e.toLowerCase() === ext)) { return 60; // Extension match } } // Generic match (isSupported returned true but we don't know why) return 40; } /** * Get a helpful suggestion based on the file type. * * @param mimetype - MIME type of the file * @param extension - File extension * @returns Suggestion string for the user */ getSuggestionForFile(mimetype, extension) { const ext = extension.toLowerCase(); const _mime = mimetype?.toLowerCase() || ""; // Common unsupported format suggestions if (ext === ".heic" || ext === ".heif") { return "Convert HEIC images to PNG or JPEG format before uploading."; } if (ext === ".tiff" || ext === ".tif") { return "Convert TIFF images to PNG or JPEG format before uploading."; } if (ext === ".bmp") { return "Convert BMP images to PNG or JPEG format before uploading."; } if (ext === ".ico") { return "Convert ICO files to PNG format before uploading."; } if ([".zip", ".rar", ".7z", ".tar", ".gz"].includes(ext)) { return "Archive files are now supported. NeuroLink will list contents and extract metadata."; } if ([".mp4", ".avi", ".mov", ".mkv", ".wmv"].includes(ext)) { return "Video files are now supported. NeuroLink will extract metadata and keyframes."; } if ([".mp3", ".wav", ".aac", ".ogg", ".flac"].includes(ext)) { return "Audio files are now supported. NeuroLink will extract metadata and tags."; } if ([".psd", ".ai", ".sketch"].includes(ext)) { return "Export design files to PNG, PDF, or SVG format before uploading."; } if ([".exe", ".dll", ".bat", ".sh", ".msi"].includes(ext)) { return "Executable files are not supported for security reasons."; } if ([".db", ".sqlite", ".mdb", ".accdb"].includes(ext)) { return "Export database data to CSV or JSON format before uploading."; } // Generic suggestion with supported formats const supportedTypes = this.getSupportedTypes(); return `Supported formats include: ${supportedTypes.join(", ")}. Please convert your file to a supported format.`; } } // ============================================================================= // DEFAULT PROCESSOR REGISTRATION // ============================================================================= /** * Register all 16 BaseFileProcessor-based processors with the registry. * * Legacy processors (CSV, Image, PDF, PPTX) that use static methods and don't * extend BaseFileProcessor are excluded — they continue to be routed via the * switch/case in FileDetector.processFile(). * * Uses dynamic imports to avoid circular dependencies and enable tree-shaking. * Each processor is registered with its priority from PROCESSOR_PRIORITIES, * and uses the processor's own isFileSupported() method for detection. * * @param registry - The ProcessorRegistry instance to register processors into */ async function initializeDefaultProcessors(registry) { // Import all processor singletons via barrel exports // Use dynamic import to avoid circular dependency issues const [markup, code, data, document, media, archive, priorities] = await Promise.all([ import("../markup/index.js"), import("../code/index.js"), import("../data/index.js"), import("../document/index.js"), import("../media/AudioProcessor.js"), import("../archive/ArchiveProcessor.js"), import("../../types/processor.js"), ]); // Also import video separately (same pattern as audio) const video = await import("../media/VideoProcessor.js"); const { PROCESSOR_PRIORITIES: P } = priorities; // Registration helper — wraps register() with allowDuplicates to be idempotent const reg = (name, priority, processor, description, aliases) => { registry.register({ name, priority, processor, isSupported: (mimetype, filename) => processor.isFileSupported(mimetype, filename), description, aliases, }, { allowDuplicates: true }); }; // ── Markup processors ──────────────────────────────────────────────────── reg("svg", P.SVG, markup.svgProcessor, "SVG vector graphics (processed as text, not image)", ["svgz"]); reg("html", P.HTML, markup.htmlProcessor, "HTML web content with OWASP-compliant sanitization", ["htm", "xhtml"]); reg("markdown", P.MARKDOWN, markup.markdownProcessor, "Markdown structured text", ["md", "mdx"]); reg("text", P.TEXT, markup.textProcessor, "Plain text files", ["txt", "log"]); // ── Code processors ────────────────────────────────────────────────────── reg("source_code", P.SOURCE_CODE, code.sourceCodeProcessor, "Source code files (50+ languages)", ["ts", "js", "py", "java", "go", "rs", "cpp"]); reg("config", P.CONFIG, code.configProcessor, "Configuration files (.env, .ini, .toml, .cfg)", ["env", "ini", "toml", "cfg"]); // ── Data processors ────────────────────────────────────────────────────── reg("json", P.JSON, data.jsonProcessor, "JSON data files", ["json", "jsonl", "geojson"]); reg("yaml", P.YAML, data.yamlProcessor, "YAML configuration and data files", ["yaml", "yml"]); reg("xml", P.XML, data.xmlProcessor, "XML data files", ["xml", "xsd", "xsl"]); // ── Document processors ────────────────────────────────────────────────── reg("excel", P.EXCEL, document.excelProcessor, "Excel spreadsheets with sheet extraction", ["xlsx", "xls"]); reg("word", P.WORD, document.wordProcessor, "Word documents with text extraction", ["docx"]); reg("rtf", P.RTF, document.rtfProcessor, "RTF documents", ["rtf"]); reg("opendocument", P.OPENDOCUMENT, document.openDocumentProcessor, "OpenDocument format files", ["odt", "ods", "odp"]); // ── Media processors ───────────────────────────────────────────────────── reg("audio", P.AUDIO, media.audioProcessor, "Audio files with metadata and tag extraction", ["mp3", "wav", "ogg", "flac", "aac", "m4a", "wma"]); reg("video", P.VIDEO, video.videoProcessor, "Video files with metadata and keyframe extraction", ["mp4", "mkv", "webm", "avi", "mov", "m4v"]); // ── Archive processors ─────────────────────────────────────────────────── reg("archive", P.ARCHIVE, archive.archiveProcessor, "Archive files with content listing", ["zip", "tar", "gz", "tgz"]); registry.markInitialized(); } // ============================================================================= // CONVENIENCE EXPORT // ============================================================================= /** Promise tracking the ongoing initialization (prevents double-init races) */ let initPromise = null; /** * Get the ProcessorRegistry singleton instance. * On first call, auto-initializes with all 16 default processors. * Convenience function for shorter imports. * * @returns The ProcessorRegistry singleton (auto-initialized with default processors) * * @example * ```typescript * import { getProcessorRegistry } from "./registry/index.js"; * * const registry = await getProcessorRegistry(); * const match = registry.findProcessor("image/svg+xml", "icon.svg"); * ``` */ export const getProcessorRegistry = async () => { const registry = ProcessorRegistry.getInstance(); if (!registry.isInitialized()) { if (!initPromise) { initPromise = initializeDefaultProcessors(registry).catch((err) => { // Reset so next call retries initPromise = null; throw err; }); } await initPromise; } return registry; }; /** * Get the ProcessorRegistry singleton instance synchronously (without auto-initialization). * Use this when you know the registry is already initialized, or when you only * need the raw registry instance (e.g., for manual registration in tests). * * @returns The ProcessorRegistry singleton (may be empty if not yet initialized) */ export const getProcessorRegistrySync = () => ProcessorRegistry.getInstance();