@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
142 lines (141 loc) • 6.31 kB
JavaScript
/**
* RAG Document Processing Module
*
* Provides comprehensive RAG (Retrieval-Augmented Generation) capabilities:
* - Document loading (text, markdown, HTML, JSON, CSV, PDF, web)
* - MDocument class for fluent document processing
* - 10 chunking strategies (character, recursive, sentence, token, markdown, html, json, latex, semantic, semantic-markdown)
* - LLM-powered metadata extraction (title, summary, keywords, Q&A)
* - Vector query tools with metadata filtering and reranking
* - Hybrid search (BM25 + vector fusion)
* - Graph RAG for knowledge graph-based retrieval
* - RAG pipeline orchestration
* - Context assembly and formatting
* - ChunkerFactory and ChunkerRegistry patterns for extensibility
* - Error handling and resilience (CircuitBreaker, RetryHandler)
*
* @example
* ```typescript
* import {
* MDocument,
* loadDocument,
* RAGPipeline,
* ChunkerRegistry,
* ChunkerFactory,
* CircuitBreaker
* } from '@juspay/neurolink';
*
* // Load and process a document
* const doc = await loadDocument('/path/to/document.md');
* await doc.chunk({ strategy: 'markdown', config: { maxSize: 1000 } });
* await doc.embed('openai', 'text-embedding-3-small');
*
* // Or use the full RAG pipeline
* const pipeline = new RAGPipeline({
* embeddingModel: { provider: 'openai', modelName: 'text-embedding-3-small' },
* generationModel: { provider: 'openai', modelName: 'gpt-4o-mini' }
* });
* await pipeline.ingest(['/path/to/docs/*.md']);
* const response = await pipeline.query('What are the key features?');
*
* // Use factory pattern for chunker creation
* const chunker = await ChunkerFactory.createChunker('semantic', { maxSize: 500 });
* const chunks = await chunker.chunk(text);
* ```
*/
export { ChunkerFactory, chunkerFactory, createChunker, getAvailableStrategies as getFactoryStrategies, getDefaultConfig as getFactoryDefaultConfig, } from "./ChunkerFactory.js";
// ChunkerFactory and ChunkerRegistry patterns (from main worktree)
export { ChunkerRegistry as ChunkerRegistryV2, chunkerRegistry, getAvailableChunkers, getChunker, getChunkerMetadata, } from "./ChunkerRegistry.js";
// Base chunker and chunker implementations (from main worktree)
export * from "./chunkers/index.js";
// Chunking
export { CharacterChunker, ChunkerRegistry, chunkText, HTMLChunker, JSONChunker, LaTeXChunker, MarkdownChunker, RecursiveChunker, SemanticChunker, SentenceChunker, TokenChunker, } from "./chunking/index.js";
// Document Processing
export { CSVLoader, HTMLLoader, JSONLoader, loadDocument, loadDocuments, MarkdownLoader, MDocument, PDFLoader, TextLoader, WebLoader, } from "./document/index.js";
// Error handling
export * from "./errors/index.js";
// Graph RAG
export { GraphRAG } from "./graphRag/index.js";
// Metadata Extraction
export { createMetadataExtractor, extractMetadata, getAvailableExtractors, getAvailableExtractorTypes, getExtractor, getExtractorDefaultConfig, getExtractorMetadata, getRegisteredExtractorMetadata, LLMMetadataExtractor,
// Factory pattern
MetadataExtractorFactory,
// Registry pattern
MetadataExtractorRegistry, metadataExtractorFactory, metadataExtractorRegistry, } from "./metadata/index.js";
// Pipeline
export { assembleContext, createContextWindow, createRAGPipeline, extractKeySentences, formatContextWithCitations, orderByDocumentStructure, RAGPipeline, summarizeContext, } from "./pipeline/index.js";
// RAG Integration (for generate/stream)
export { prepareRAGTool } from "./ragIntegration.js";
// Reranker
export { batchRerank, CohereRelevanceScorer, CrossEncoderReranker, createReranker, getAvailableRerankers, getAvailableRerankerTypes, getRegisteredRerankerMetadata, getReranker, getRerankerDefaultConfig, getRerankerMetadata,
// Factory pattern
RerankerFactory,
// Registry pattern
RerankerRegistry, rerank, rerankerFactory, rerankerRegistry, simpleRerank, } from "./reranker/index.js";
// Resilience patterns
export * from "./resilience/index.js";
// Retrieval
export { createHybridSearch, createVectorQueryTool, InMemoryBM25Index, InMemoryVectorStore, linearCombination, reciprocalRankFusion, } from "./retrieval/index.js";
// Types
export * from "../types/index.js";
// Convenience functions
import { ChunkerRegistry } from "./chunking/index.js";
import { LLMMetadataExtractor } from "./metadata/index.js";
/**
* Process a document through the full RAG pipeline
*
* @param text - Document text to process
* @param options - Processing options
* @returns Processed chunks with optional metadata
*/
export async function processDocument(text, options) {
const { strategy = "recursive", maxSize = 1000, overlap = 200, extract, provider, model, metadata = {}, } = options || {};
// Chunk the document
const chunker = ChunkerRegistry.get(strategy);
const chunks = await chunker.chunk(text, { maxSize, overlap, metadata });
// Extract metadata if requested
if (extract) {
const extractor = new LLMMetadataExtractor({ provider, modelName: model });
const results = await extractor.extract(chunks, extract);
// Merge metadata into chunks
for (let i = 0; i < chunks.length && i < results.length; i++) {
const result = results[i];
if (result.title) {
chunks[i].metadata.title = result.title;
}
if (result.summary) {
chunks[i].metadata.summary = result.summary;
}
if (result.keywords) {
chunks[i].metadata.keywords = result.keywords;
}
}
}
return chunks;
}
/**
* Get recommended chunking strategy based on content type
*
* @param contentType - MIME type or file extension
* @returns Recommended chunking strategy
*/
export function getRecommendedStrategy(contentType) {
return ChunkerRegistry.getRecommendedStrategy(contentType);
}
/**
* Get available chunking strategies
*
* @returns Array of available strategy names
*/
export function getAvailableStrategies() {
return ChunkerRegistry.getAvailableStrategies();
}
/**
* Get default configuration for a chunking strategy
*
* @param strategy - Chunking strategy name
* @returns Default configuration object
*/
export function getDefaultChunkerConfig(strategy) {
return ChunkerRegistry.getDefaultConfig(strategy);
}