UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

199 lines (198 loc) 5.94 kB
/** * MDocument - Main Document Processing Class * * Provides a fluent interface for document processing using the Factory + Registry pattern. * Supports various document types, chunking strategies, and metadata extraction. * * @example * ```typescript * const doc = await MDocument.fromText(content); * const chunks = await doc.chunk({ * strategy: 'recursive', * config: { maxSize: 1000, overlap: 200 } * }); * const enriched = await doc.extractMetadata({ * title: true, * summary: true, * keywords: true * }); * ``` */ import type { Chunk, ChunkParams, DocumentType, ExtractParams, MDocumentConfig } from "../../types/index.js"; /** * MDocument class for comprehensive document processing * * Provides a chainable API for: * - Loading documents from various sources * - Chunking with multiple strategies * - Metadata extraction using LLMs * - Embedding generation */ export declare class MDocument { private state; private documentId; /** * Create a new MDocument instance * @param content - Document content * @param config - Document configuration */ constructor(content: string, config?: MDocumentConfig); /** * Create MDocument from plain text * @param text - Plain text content * @param metadata - Optional metadata * @returns MDocument instance */ static fromText(text: string, metadata?: Record<string, unknown>): MDocument; /** * Create MDocument from markdown content * @param markdown - Markdown content * @param metadata - Optional metadata * @returns MDocument instance */ static fromMarkdown(markdown: string, metadata?: Record<string, unknown>): MDocument; /** * Create MDocument from HTML content * @param html - HTML content * @param metadata - Optional metadata * @returns MDocument instance */ static fromHTML(html: string, metadata?: Record<string, unknown>): MDocument; /** * Create MDocument from JSON content * @param json - JSON string or object * @param metadata - Optional metadata * @returns MDocument instance */ static fromJSONContent(json: string | object, metadata?: Record<string, unknown>): MDocument; /** * Create MDocument from LaTeX content * @param latex - LaTeX content * @param metadata - Optional metadata * @returns MDocument instance */ static fromLaTeX(latex: string, metadata?: Record<string, unknown>): MDocument; /** * Create MDocument from CSV content * @param csv - CSV content * @param metadata - Optional metadata * @returns MDocument instance */ static fromCSV(csv: string, metadata?: Record<string, unknown>): MDocument; /** * Chunk the document using specified strategy * @param params - Chunking parameters * @returns This MDocument instance (for chaining) */ chunk(params?: ChunkParams): Promise<MDocument>; /** * Extract metadata from chunks using LLM * @param params - Extraction parameters * @param options - Extractor options * @returns This MDocument instance (for chaining) */ extractMetadata(params: ExtractParams, options?: { provider?: string; modelName?: string; }): Promise<MDocument>; /** * Generate embeddings for all chunks * @param provider - Embedding provider name * @param modelName - Embedding model name * @returns This MDocument instance (for chaining) */ embed(provider?: string, modelName?: string): Promise<MDocument>; /** * Get document ID */ getId(): string; /** * Get raw document content */ getContent(): string; /** * Get document type */ getType(): DocumentType; /** * Get document metadata */ getMetadata(): Record<string, unknown>; /** * Get processed chunks */ getChunks(): Chunk[]; /** * Get chunk embeddings */ getEmbeddings(): number[][]; /** * Get processing history */ getHistory(): string[]; /** * Check if document has been chunked */ isChunked(): boolean; /** * Check if document has embeddings */ hasEmbeddings(): boolean; /** * Get chunk count */ getChunkCount(): number; /** * Set document metadata * @param key - Metadata key * @param value - Metadata value * @returns This MDocument instance (for chaining) */ setMetadata(key: string, value: unknown): MDocument; /** * Merge metadata into document * @param metadata - Metadata to merge * @returns This MDocument instance (for chaining) */ mergeMetadata(metadata: Record<string, unknown>): MDocument; /** * Filter chunks based on predicate * @param predicate - Filter function * @returns New MDocument with filtered chunks */ filterChunks(predicate: (chunk: Chunk) => boolean): MDocument; /** * Map transformation over chunks * @param transform - Transform function * @returns New MDocument with transformed chunks */ mapChunks(transform: (chunk: Chunk) => Chunk): MDocument; /** * Convert to plain object for serialization */ toJSON(): { id: string; content: string; type: DocumentType; metadata: Record<string, unknown>; chunks: Chunk[]; history: string[]; }; /** * Create MDocument from serialized JSON * @param json - Serialized document data * @returns MDocument instance */ static fromJSON(json: { id?: string; content: string; type: DocumentType; metadata?: Record<string, unknown>; chunks?: Chunk[]; history?: string[]; }): MDocument; /** * Get default chunking strategy based on document type */ private getDefaultStrategy; }