@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
172 lines (171 loc) • 6.35 kB
TypeScript
/**
* File Type Detection Utility
* Centralized file detection for all multimodal file types
* Uses multi-strategy approach for reliable type identification
*/
import type { FileDetectorOptions, FileInput, FileProcessingResult } from "../types/index.js";
/**
* Centralized file type detection and processing
*
* @example
* ```typescript
* // Auto-detect and process any file
* const result = await FileDetector.detectAndProcess("data.csv");
* logger.info(result.type); // 'csv'
* ```
*/
export declare class FileDetector {
static readonly DEFAULT_NETWORK_TIMEOUT = 30000;
static readonly DEFAULT_HEAD_TIMEOUT = 5000;
/**
* Auto-detect file type and process in one call
*
* Runs detection strategies in priority order:
* 1. MagicBytesStrategy (95% confidence) - Binary file headers
* 2. MimeTypeStrategy (85% confidence) - HTTP Content-Type for URLs
* 3. ExtensionStrategy (70% confidence) - File extension
* 4. ContentHeuristicStrategy (75% confidence) - Content analysis
*
* @param input - File path, URL, Buffer, or data URI
* @param options - Detection and processing options
* @returns Processed file result with type and content
*/
static detectAndProcess(input: FileInput, options?: FileDetectorOptions): Promise<FileProcessingResult>;
/**
* Set span attributes and log after file processing completes.
*/
private static setFileResultSpanAttributes;
/**
* Derive a human-readable filename from FileInput for tracing.
*/
private static deriveInputFilename;
/**
* Derive byte size from FileInput for tracing.
*/
private static deriveInputSize;
/**
* Classify a FileInput into the FileSource enum used by downstream
* loaders. Keeps the mimetype-hint short-circuit in detect() able to
* produce a valid FileDetectionResult without re-implementing the
* source-inference rules scattered across loadContent().
*/
private static deriveInputSource;
/**
* Try fallback parsing for a specific file type
* Used when file detection returns "unknown" but we want to try parsing anyway
*/
private static tryFallbackParsing;
/**
* Check if content is valid text (UTF-8, mostly printable)
*/
private static isValidText;
/**
* Guess the MIME type for text content based on content patterns
*/
private static guessTextMimeType;
/**
* Strict YAML detection for guessTextMimeType
* Similar to ContentHeuristicStrategy but requires at least 2 indicators
* to avoid false positives from simple key: value patterns
*/
private static looksLikeYAMLStrict;
/**
* Strict XML detection for guessTextMimeType
* Ensures content has proper XML declaration or valid tag structure with closing tags
* Prevents false positives from arbitrary content starting with <
*/
private static looksLikeXMLStrict;
/**
* Detect file type using multi-strategy approach
* Stops at first strategy with confidence >= threshold (default: 80%)
*/
private static detect;
/**
* Load file content from various sources
*/
private static loadContent;
/**
* SDK-8: Format an informative placeholder when a file processor fails.
* Instead of bare "[Video file: name]" strings, include size, format, and
* the reason for failure so the LLM can acknowledge the attachment.
*/
private static formatInformativePlaceholder;
/**
* Extract metadata and printable strings from an unrecognized binary file.
* This is the "extract what you can" path for unknown file types.
*
* Extracts:
* - File size (human-readable)
* - MIME type / detected format
* - First N bytes as hex dump (for identification)
* - Printable ASCII/UTF-8 strings found in the binary (like `strings` command)
* - Known file signatures that we don't have full processors for
*
* @param content Raw file buffer
* @param detection Detection result (may be "unknown")
* @param filename Original filename (if known)
* @returns Formatted text summary suitable for LLM consumption
*/
private static extractBinaryMetadata;
/**
* Identify known binary file signatures beyond what we can process.
* Returns a human-readable description, or null if unrecognized.
*/
private static identifyBinarySignature;
/**
* Extract printable ASCII strings from a binary buffer.
* Similar to the Unix `strings` utility.
*
* @param buf Buffer to scan
* @param minLength Minimum string length to include (default 4)
* @param maxStrings Maximum number of strings to return (default 50)
* @returns Array of printable strings found in the binary
*/
private static extractPrintableStrings;
/**
* Route to appropriate processor
*/
private static processFile;
/**
* Process video file: extract metadata, keyframes, and subtitles via VideoProcessor
*/
private static processVideoFile;
/**
* Process audio file: extract metadata, tags, and cover art via AudioProcessor
*/
private static processAudioFile;
/**
* Process archive file: list contents and extract metadata via ArchiveProcessor
*/
private static processArchiveFile;
/**
* Process Excel/OpenDocument spreadsheet file via ExcelProcessor or OpenDocumentProcessor
*/
private static processXlsxFile;
/**
* Process Word/OpenDocument/RTF document via WordProcessor, OpenDocumentProcessor, or RtfProcessor
*/
private static processDocxFile;
/**
* Process PowerPoint/OpenDocument presentation via PptxProcessor
*/
private static processPptxFile;
/**
* Process SVG file as text content
* Uses SvgProcessor for security sanitization (removes XSS vectors)
* Returns sanitized SVG markup as text for AI analysis
*/
private static processSvgAsText;
/**
* Load file from URL with automatic retry on transient network errors
*/
private static loadFromURL;
/**
* Load file from filesystem path
*/
private static loadFromPath;
/**
* Load file from data URI
*/
private static loadFromDataURI;
}