@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
199 lines (198 loc) • 5.94 kB
TypeScript
/**
* MDocument - Main Document Processing Class
*
* Provides a fluent interface for document processing using the Factory + Registry pattern.
* Supports various document types, chunking strategies, and metadata extraction.
*
* @example
* ```typescript
* const doc = await MDocument.fromText(content);
* const chunks = await doc.chunk({
* strategy: 'recursive',
* config: { maxSize: 1000, overlap: 200 }
* });
* const enriched = await doc.extractMetadata({
* title: true,
* summary: true,
* keywords: true
* });
* ```
*/
import type { Chunk, ChunkParams, DocumentType, ExtractParams, MDocumentConfig } from "../../types/index.js";
/**
* MDocument class for comprehensive document processing
*
* Provides a chainable API for:
* - Loading documents from various sources
* - Chunking with multiple strategies
* - Metadata extraction using LLMs
* - Embedding generation
*/
export declare class MDocument {
private state;
private documentId;
/**
* Create a new MDocument instance
* @param content - Document content
* @param config - Document configuration
*/
constructor(content: string, config?: MDocumentConfig);
/**
* Create MDocument from plain text
* @param text - Plain text content
* @param metadata - Optional metadata
* @returns MDocument instance
*/
static fromText(text: string, metadata?: Record<string, unknown>): MDocument;
/**
* Create MDocument from markdown content
* @param markdown - Markdown content
* @param metadata - Optional metadata
* @returns MDocument instance
*/
static fromMarkdown(markdown: string, metadata?: Record<string, unknown>): MDocument;
/**
* Create MDocument from HTML content
* @param html - HTML content
* @param metadata - Optional metadata
* @returns MDocument instance
*/
static fromHTML(html: string, metadata?: Record<string, unknown>): MDocument;
/**
* Create MDocument from JSON content
* @param json - JSON string or object
* @param metadata - Optional metadata
* @returns MDocument instance
*/
static fromJSONContent(json: string | object, metadata?: Record<string, unknown>): MDocument;
/**
* Create MDocument from LaTeX content
* @param latex - LaTeX content
* @param metadata - Optional metadata
* @returns MDocument instance
*/
static fromLaTeX(latex: string, metadata?: Record<string, unknown>): MDocument;
/**
* Create MDocument from CSV content
* @param csv - CSV content
* @param metadata - Optional metadata
* @returns MDocument instance
*/
static fromCSV(csv: string, metadata?: Record<string, unknown>): MDocument;
/**
* Chunk the document using specified strategy
* @param params - Chunking parameters
* @returns This MDocument instance (for chaining)
*/
chunk(params?: ChunkParams): Promise<MDocument>;
/**
* Extract metadata from chunks using LLM
* @param params - Extraction parameters
* @param options - Extractor options
* @returns This MDocument instance (for chaining)
*/
extractMetadata(params: ExtractParams, options?: {
provider?: string;
modelName?: string;
}): Promise<MDocument>;
/**
* Generate embeddings for all chunks
* @param provider - Embedding provider name
* @param modelName - Embedding model name
* @returns This MDocument instance (for chaining)
*/
embed(provider?: string, modelName?: string): Promise<MDocument>;
/**
* Get document ID
*/
getId(): string;
/**
* Get raw document content
*/
getContent(): string;
/**
* Get document type
*/
getType(): DocumentType;
/**
* Get document metadata
*/
getMetadata(): Record<string, unknown>;
/**
* Get processed chunks
*/
getChunks(): Chunk[];
/**
* Get chunk embeddings
*/
getEmbeddings(): number[][];
/**
* Get processing history
*/
getHistory(): string[];
/**
* Check if document has been chunked
*/
isChunked(): boolean;
/**
* Check if document has embeddings
*/
hasEmbeddings(): boolean;
/**
* Get chunk count
*/
getChunkCount(): number;
/**
* Set document metadata
* @param key - Metadata key
* @param value - Metadata value
* @returns This MDocument instance (for chaining)
*/
setMetadata(key: string, value: unknown): MDocument;
/**
* Merge metadata into document
* @param metadata - Metadata to merge
* @returns This MDocument instance (for chaining)
*/
mergeMetadata(metadata: Record<string, unknown>): MDocument;
/**
* Filter chunks based on predicate
* @param predicate - Filter function
* @returns New MDocument with filtered chunks
*/
filterChunks(predicate: (chunk: Chunk) => boolean): MDocument;
/**
* Map transformation over chunks
* @param transform - Transform function
* @returns New MDocument with transformed chunks
*/
mapChunks(transform: (chunk: Chunk) => Chunk): MDocument;
/**
* Convert to plain object for serialization
*/
toJSON(): {
id: string;
content: string;
type: DocumentType;
metadata: Record<string, unknown>;
chunks: Chunk[];
history: string[];
};
/**
* Create MDocument from serialized JSON
* @param json - Serialized document data
* @returns MDocument instance
*/
static fromJSON(json: {
id?: string;
content: string;
type: DocumentType;
metadata?: Record<string, unknown>;
chunks?: Chunk[];
history?: string[];
}): MDocument;
/**
* Get default chunking strategy based on document type
*/
private getDefaultStrategy;
}