UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

104 lines (103 loc) 5.25 kB
/** * RAG Document Processing Module * * Provides comprehensive RAG (Retrieval-Augmented Generation) capabilities: * - Document loading (text, markdown, HTML, JSON, CSV, PDF, web) * - MDocument class for fluent document processing * - 10 chunking strategies (character, recursive, sentence, token, markdown, html, json, latex, semantic, semantic-markdown) * - LLM-powered metadata extraction (title, summary, keywords, Q&A) * - Vector query tools with metadata filtering and reranking * - Hybrid search (BM25 + vector fusion) * - Graph RAG for knowledge graph-based retrieval * - RAG pipeline orchestration * - Context assembly and formatting * - ChunkerFactory and ChunkerRegistry patterns for extensibility * - Error handling and resilience (CircuitBreaker, RetryHandler) * * @example * ```typescript * import { * MDocument, * loadDocument, * RAGPipeline, * ChunkerRegistry, * ChunkerFactory, * CircuitBreaker * } from '@juspay/neurolink'; * * // Load and process a document * const doc = await loadDocument('/path/to/document.md'); * await doc.chunk({ strategy: 'markdown', config: { maxSize: 1000 } }); * await doc.embed('openai', 'text-embedding-3-small'); * * // Or use the full RAG pipeline * const pipeline = new RAGPipeline({ * embeddingModel: { provider: 'openai', modelName: 'text-embedding-3-small' }, * generationModel: { provider: 'openai', modelName: 'gpt-4o-mini' } * }); * await pipeline.ingest(['/path/to/docs/*.md']); * const response = await pipeline.query('What are the key features?'); * * // Use factory pattern for chunker creation * const chunker = await ChunkerFactory.createChunker('semantic', { maxSize: 500 }); * const chunks = await chunker.chunk(text); * ``` */ export { ChunkerFactory, chunkerFactory, createChunker, getAvailableStrategies as getFactoryStrategies, getDefaultConfig as getFactoryDefaultConfig, } from "./ChunkerFactory.js"; export { ChunkerRegistry as ChunkerRegistryV2, chunkerRegistry, getAvailableChunkers, getChunker, getChunkerMetadata, } from "./ChunkerRegistry.js"; export * from "./chunkers/index.js"; export { CharacterChunker, ChunkerRegistry, chunkText, HTMLChunker, JSONChunker, LaTeXChunker, MarkdownChunker, RecursiveChunker, SemanticChunker, SentenceChunker, TokenChunker, } from "./chunking/index.js"; export { CSVLoader, HTMLLoader, JSONLoader, loadDocument, loadDocuments, MarkdownLoader, MDocument, PDFLoader, TextLoader, WebLoader, } from "./document/index.js"; export * from "./errors/index.js"; export { GraphRAG } from "./graphRag/index.js"; export { createMetadataExtractor, extractMetadata, getAvailableExtractors, getAvailableExtractorTypes, getExtractor, getExtractorDefaultConfig, getExtractorMetadata, getRegisteredExtractorMetadata, LLMMetadataExtractor, MetadataExtractorFactory, MetadataExtractorRegistry, metadataExtractorFactory, metadataExtractorRegistry, } from "./metadata/index.js"; export { assembleContext, createContextWindow, createRAGPipeline, extractKeySentences, formatContextWithCitations, orderByDocumentStructure, RAGPipeline, summarizeContext, } from "./pipeline/index.js"; export { prepareRAGTool } from "./ragIntegration.js"; export { batchRerank, CohereRelevanceScorer, CrossEncoderReranker, createReranker, getAvailableRerankers, getAvailableRerankerTypes, getRegisteredRerankerMetadata, getReranker, getRerankerDefaultConfig, getRerankerMetadata, RerankerFactory, RerankerRegistry, rerank, rerankerFactory, rerankerRegistry, simpleRerank, } from "./reranker/index.js"; export * from "./resilience/index.js"; export { createHybridSearch, createVectorQueryTool, InMemoryBM25Index, InMemoryVectorStore, linearCombination, reciprocalRankFusion, } from "./retrieval/index.js"; export * from "../types/index.js"; import type { Chunk, ChunkingStrategy, ExtractParams } from "../types/index.js"; /** * Process a document through the full RAG pipeline * * @param text - Document text to process * @param options - Processing options * @returns Processed chunks with optional metadata */ export declare function processDocument(text: string, options?: { /** Chunking strategy (default: recursive) */ strategy?: ChunkingStrategy; /** Maximum chunk size */ maxSize?: number; /** Chunk overlap */ overlap?: number; /** Metadata extraction options */ extract?: ExtractParams; /** Provider for metadata extraction */ provider?: string; /** Model for metadata extraction */ model?: string; /** Custom metadata to add */ metadata?: Record<string, unknown>; }): Promise<Chunk[]>; /** * Get recommended chunking strategy based on content type * * @param contentType - MIME type or file extension * @returns Recommended chunking strategy */ export declare function getRecommendedStrategy(contentType: string): ChunkingStrategy; /** * Get available chunking strategies * * @returns Array of available strategy names */ export declare function getAvailableStrategies(): ChunkingStrategy[]; /** * Get default configuration for a chunking strategy * * @param strategy - Chunking strategy name * @returns Default configuration object */ export declare function getDefaultChunkerConfig(strategy: ChunkingStrategy): Record<string, unknown>;