UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

244 lines (243 loc) 8.43 kB
/** * File Reference Architecture Types (canonical location) * * Types for the lazy on-demand file processing system. * Files are registered as lightweight references with metadata and previews. * Full content is processed on-demand when the LLM requests it via tools. */ import type { FileSource, FileType } from "./file.js"; /** * Size tier determines the processing strategy for a file. * * - tiny: Inline in prompt (current behavior) * - small: Full load, truncate to budget * - medium: Outline + on-demand sections * - large: Stream + chunked summarization * - huge: Reference only + tool-based access * - oversized: Reject with informative message */ export type SizeTier = "tiny" | "small" | "medium" | "large" | "huge" | "oversized"; /** * Processing status of a file reference */ export type FileReferenceStatus = "registered" | "previewed" | "processing" | "processed" | "error"; /** * A section in a file outline (used for code, PDFs, spreadsheets) */ export type OutlineSection = { /** Section heading/name (e.g., function name, class name, sheet name) */ name: string; /** Type of section (function, class, import, sheet, page, heading) */ kind: string; /** Starting line number (1-indexed) */ startLine: number; /** Ending line number (1-indexed) */ endLine: number; /** Nesting depth (0 = top-level) */ depth: number; /** Child sections (e.g., methods within a class) */ children?: OutlineSection[]; }; /** * A lightweight reference to a file registered for on-demand processing. * * Registration is fast (~1ms): only stat + magic bytes + first 1KB preview. * Full processing is deferred until the LLM requests it via tools. */ export type FileReference = { /** Unique identifier (UUID v4) */ id: string; /** How the file was provided */ source: FileSource; /** Original file path or URL */ originalPath?: string; /** Display name */ filename: string; /** Original file size in bytes */ sizeBytes: number; /** Detected file type from magic bytes / extension */ detectedType: FileType; /** Detected MIME type */ mimeType: string; /** Size tier determining processing strategy */ sizeTier: SizeTier; /** Estimated tokens after processing (type-aware) */ estimatedTokens: number; /** First ~500 tokens of content (lightweight preview) */ preview: string; /** Current processing status */ status: FileReferenceStatus; /** LLM-generated summary (populated lazily via summarize_file tool) */ summary?: string; /** Structural outline for code/docs (populated lazily) */ outlineSections?: OutlineSection[]; /** Path in temp directory where buffer is persisted */ tempPath?: string; /** Provider file API ID (for Anthropic Files API, Gemini File API, etc.) */ providerId?: string; /** Full processed content (cached after first full processing) */ processedContent?: string; /** Extracted images (e.g., video keyframes, PPTX slide images) */ extractedImages?: Buffer[]; /** Timestamp when the file was registered */ registeredAt: number; /** Timestamp when the file was last accessed (for LRU eviction) */ lastAccessedAt: number; /** Total line count (for text files, populated on first read) */ totalLines?: number; /** File extension (e.g., 'py', 'xlsx', 'mp4') */ extension?: string; }; /** * Options for registering a file */ export type FileRegistrationOptions = { /** Override filename detection */ filename?: string; /** Override file type detection */ fileType?: FileType; /** * Caller-provided MIME type hint (e.g. "text/plain", "application/json"). * Used when the filename has no extension and magic-byte detection cannot * identify the content (common for Slack/Curator-style buffers where the * original extension was stripped). Honored during type detection, mimeType * assignment, and filename-extension synthesis. An explicit `fileType` * override still wins over this hint. */ mimetype?: string; /** Maximum preview length in characters */ maxPreviewChars?: number; /** Skip persisting buffer to temp directory */ skipTempPersist?: boolean; }; /** * Result of reading a file section */ export type FileReadResult = { /** The content that was read */ content: string; /** Starting line number (1-indexed) */ startLine: number; /** Ending line number (1-indexed) */ endLine: number; /** Total lines in the file */ totalLines: number; /** Whether the content was truncated to fit token budget */ truncated: boolean; /** Number of tokens in the returned content */ estimatedTokens: number; }; /** * Result of searching within a file */ export type FileSearchResult = { /** Matching lines with context */ matches: FileSearchMatch[]; /** Total number of matches found */ totalMatches: number; /** Whether results were truncated */ truncated: boolean; }; /** * A single search match within a file */ export type FileSearchMatch = { /** Line number (1-indexed) */ lineNumber: number; /** The matching line content */ line: string; /** Context lines before the match */ contextBefore: string[]; /** Context lines after the match */ contextAfter: string[]; }; /** * Options for the streaming reader */ export type StreamingReaderOptions = { /** Maximum tokens to read (stops when budget exhausted) */ tokenBudget?: number; /** Starting line number (1-indexed, default 1) */ startLine?: number; /** Ending line number (1-indexed, default EOF) */ endLine?: number; /** Encoding (default 'utf-8') */ encoding?: BufferEncoding; /** Provider name for token estimation multiplier */ provider?: string; }; /** * Parameters for targeted content extraction via extract_file_content tool. * Different file types use different subsets of these parameters. */ export type FileExtractionParams = { /** File ID (UUID) or filename */ file_id: string; /** Start timestamp in seconds (video) */ start_time?: number; /** End timestamp in seconds (video) */ end_time?: number; /** Number of frames to extract in range (video, default: 5) */ frame_count?: number; /** Specific page/slide numbers (1-indexed) */ pages?: number[]; /** Page range (1-indexed, inclusive) */ page_range?: { start: number; end: number; }; /** Sheet name or 0-based index */ sheet?: string | number; /** Row range (1-indexed) */ row_range?: { start: number; end: number; }; /** Specific columns (e.g., ["A", "B", "D"]) */ columns?: string[]; /** File path within the archive */ entry_path?: string; /** Output format hint */ format?: "text" | "detailed" | "summary"; }; /** * Result of targeted content extraction. * May contain text, images, or both depending on the extraction type. */ export type FileExtractionResult = { /** Whether the extraction succeeded */ success: boolean; /** Extracted text content */ text?: string; /** Extracted images as JPEG buffers (e.g., video frames, slide renders) */ images?: Buffer[]; /** Metadata about the extraction */ metadata?: Record<string, unknown>; /** Error message if extraction failed */ error?: string; }; /** * Options for the file reference registry */ export type FileRegistryOptions = { /** Directory for persisting file buffers (default: os.tmpdir()/neurolink-files/) */ tempDir?: string; /** Maximum number of file references to keep (LRU eviction, default: 100) */ maxFiles?: number; /** Maximum total bytes to persist to temp (default: 1GB) */ maxTempBytes?: number; /** Default preview length in characters (default: 2000) */ defaultPreviewChars?: number; }; export declare const SIZE_TIER_THRESHOLDS: { /** < 10 KB: inline in prompt */ readonly TINY_MAX: number; /** 10 KB – 100 KB: full load with truncation */ readonly SMALL_MAX: number; /** 100 KB – 5 MB: outline + on-demand */ readonly MEDIUM_MAX: number; /** 5 MB – 100 MB: streaming + chunked summarization */ readonly LARGE_MAX: number; /** 100 MB – 2 GB: reference only */ readonly HUGE_MAX: number; };