@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

github.com/juspay/neurolink

juspay/neurolink

172 lines (171 loc) • 6.35 kB

TypeScript

/** * File Type Detection Utility * Centralized file detection for all multimodal file types * Uses multi-strategy approach for reliable type identification */ import type { FileDetectorOptions, FileInput, FileProcessingResult } from "../types/index.js"; /** * Centralized file type detection and processing * * @example * ```typescript * // Auto-detect and process any file * const result = await FileDetector.detectAndProcess("data.csv"); * logger.info(result.type); // 'csv' * ``` */ export declare class FileDetector { static readonly DEFAULT_NETWORK_TIMEOUT = 30000; static readonly DEFAULT_HEAD_TIMEOUT = 5000; /** * Auto-detect file type and process in one call * * Runs detection strategies in priority order: * 1. MagicBytesStrategy (95% confidence) - Binary file headers * 2. MimeTypeStrategy (85% confidence) - HTTP Content-Type for URLs * 3. ExtensionStrategy (70% confidence) - File extension * 4. ContentHeuristicStrategy (75% confidence) - Content analysis * * @param input - File path, URL, Buffer, or data URI * @param options - Detection and processing options * @returns Processed file result with type and content */ static detectAndProcess(input: FileInput, options?: FileDetectorOptions): Promise<FileProcessingResult>; /** * Set span attributes and log after file processing completes. */ private static setFileResultSpanAttributes; /** * Derive a human-readable filename from FileInput for tracing. */ private static deriveInputFilename; /** * Derive byte size from FileInput for tracing. */ private static deriveInputSize; /** * Classify a FileInput into the FileSource enum used by downstream * loaders. Keeps the mimetype-hint short-circuit in detect() able to * produce a valid FileDetectionResult without re-implementing the * source-inference rules scattered across loadContent(). */ private static deriveInputSource; /** * Try fallback parsing for a specific file type * Used when file detection returns "unknown" but we want to try parsing anyway */ private static tryFallbackParsing; /** * Check if content is valid text (UTF-8, mostly printable) */ private static isValidText; /** * Guess the MIME type for text content based on content patterns */ private static guessTextMimeType; /** * Strict YAML detection for guessTextMimeType * Similar to ContentHeuristicStrategy but requires at least 2 indicators * to avoid false positives from simple key: value patterns */ private static looksLikeYAMLStrict; /** * Strict XML detection for guessTextMimeType * Ensures content has proper XML declaration or valid tag structure with closing tags * Prevents false positives from arbitrary content starting with < */ private static looksLikeXMLStrict; /** * Detect file type using multi-strategy approach * Stops at first strategy with confidence >= threshold (default: 80%) */ private static detect; /** * Load file content from various sources */ private static loadContent; /** * SDK-8: Format an informative placeholder when a file processor fails. * Instead of bare "[Video file: name]" strings, include size, format, and * the reason for failure so the LLM can acknowledge the attachment. */ private static formatInformativePlaceholder; /** * Extract metadata and printable strings from an unrecognized binary file. * This is the "extract what you can" path for unknown file types. * * Extracts: * - File size (human-readable) * - MIME type / detected format * - First N bytes as hex dump (for identification) * - Printable ASCII/UTF-8 strings found in the binary (like `strings` command) * - Known file signatures that we don't have full processors for * * @param content Raw file buffer * @param detection Detection result (may be "unknown") * @param filename Original filename (if known) * @returns Formatted text summary suitable for LLM consumption */ private static extractBinaryMetadata; /** * Identify known binary file signatures beyond what we can process. * Returns a human-readable description, or null if unrecognized. */ private static identifyBinarySignature; /** * Extract printable ASCII strings from a binary buffer. * Similar to the Unix `strings` utility. * * @param buf Buffer to scan * @param minLength Minimum string length to include (default 4) * @param maxStrings Maximum number of strings to return (default 50) * @returns Array of printable strings found in the binary */ private static extractPrintableStrings; /** * Route to appropriate processor */ private static processFile; /** * Process video file: extract metadata, keyframes, and subtitles via VideoProcessor */ private static processVideoFile; /** * Process audio file: extract metadata, tags, and cover art via AudioProcessor */ private static processAudioFile; /** * Process archive file: list contents and extract metadata via ArchiveProcessor */ private static processArchiveFile; /** * Process Excel/OpenDocument spreadsheet file via ExcelProcessor or OpenDocumentProcessor */ private static processXlsxFile; /** * Process Word/OpenDocument/RTF document via WordProcessor, OpenDocumentProcessor, or RtfProcessor */ private static processDocxFile; /** * Process PowerPoint/OpenDocument presentation via PptxProcessor */ private static processPptxFile; /** * Process SVG file as text content * Uses SvgProcessor for security sanitization (removes XSS vectors) * Returns sanitized SVG markup as text for AI analysis */ private static processSvgAsText; /** * Load file from URL with automatic retry on transient network errors */ private static loadFromURL; /** * Load file from filesystem path */ private static loadFromPath; /** * Load file from data URI */ private static loadFromDataURI; }