@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
104 lines (103 loc) • 5.25 kB
TypeScript
/**
* RAG Document Processing Module
*
* Provides comprehensive RAG (Retrieval-Augmented Generation) capabilities:
* - Document loading (text, markdown, HTML, JSON, CSV, PDF, web)
* - MDocument class for fluent document processing
* - 10 chunking strategies (character, recursive, sentence, token, markdown, html, json, latex, semantic, semantic-markdown)
* - LLM-powered metadata extraction (title, summary, keywords, Q&A)
* - Vector query tools with metadata filtering and reranking
* - Hybrid search (BM25 + vector fusion)
* - Graph RAG for knowledge graph-based retrieval
* - RAG pipeline orchestration
* - Context assembly and formatting
* - ChunkerFactory and ChunkerRegistry patterns for extensibility
* - Error handling and resilience (CircuitBreaker, RetryHandler)
*
* @example
* ```typescript
* import {
* MDocument,
* loadDocument,
* RAGPipeline,
* ChunkerRegistry,
* ChunkerFactory,
* CircuitBreaker
* } from '@juspay/neurolink';
*
* // Load and process a document
* const doc = await loadDocument('/path/to/document.md');
* await doc.chunk({ strategy: 'markdown', config: { maxSize: 1000 } });
* await doc.embed('openai', 'text-embedding-3-small');
*
* // Or use the full RAG pipeline
* const pipeline = new RAGPipeline({
* embeddingModel: { provider: 'openai', modelName: 'text-embedding-3-small' },
* generationModel: { provider: 'openai', modelName: 'gpt-4o-mini' }
* });
* await pipeline.ingest(['/path/to/docs/*.md']);
* const response = await pipeline.query('What are the key features?');
*
* // Use factory pattern for chunker creation
* const chunker = await ChunkerFactory.createChunker('semantic', { maxSize: 500 });
* const chunks = await chunker.chunk(text);
* ```
*/
export { ChunkerFactory, chunkerFactory, createChunker, getAvailableStrategies as getFactoryStrategies, getDefaultConfig as getFactoryDefaultConfig, } from "./ChunkerFactory.js";
export { ChunkerRegistry as ChunkerRegistryV2, chunkerRegistry, getAvailableChunkers, getChunker, getChunkerMetadata, } from "./ChunkerRegistry.js";
export * from "./chunkers/index.js";
export { CharacterChunker, ChunkerRegistry, chunkText, HTMLChunker, JSONChunker, LaTeXChunker, MarkdownChunker, RecursiveChunker, SemanticChunker, SentenceChunker, TokenChunker, } from "./chunking/index.js";
export { CSVLoader, HTMLLoader, JSONLoader, loadDocument, loadDocuments, MarkdownLoader, MDocument, PDFLoader, TextLoader, WebLoader, } from "./document/index.js";
export * from "./errors/index.js";
export { GraphRAG } from "./graphRag/index.js";
export { createMetadataExtractor, extractMetadata, getAvailableExtractors, getAvailableExtractorTypes, getExtractor, getExtractorDefaultConfig, getExtractorMetadata, getRegisteredExtractorMetadata, LLMMetadataExtractor, MetadataExtractorFactory, MetadataExtractorRegistry, metadataExtractorFactory, metadataExtractorRegistry, } from "./metadata/index.js";
export { assembleContext, createContextWindow, createRAGPipeline, extractKeySentences, formatContextWithCitations, orderByDocumentStructure, RAGPipeline, summarizeContext, } from "./pipeline/index.js";
export { prepareRAGTool } from "./ragIntegration.js";
export { batchRerank, CohereRelevanceScorer, CrossEncoderReranker, createReranker, getAvailableRerankers, getAvailableRerankerTypes, getRegisteredRerankerMetadata, getReranker, getRerankerDefaultConfig, getRerankerMetadata, RerankerFactory, RerankerRegistry, rerank, rerankerFactory, rerankerRegistry, simpleRerank, } from "./reranker/index.js";
export * from "./resilience/index.js";
export { createHybridSearch, createVectorQueryTool, InMemoryBM25Index, InMemoryVectorStore, linearCombination, reciprocalRankFusion, } from "./retrieval/index.js";
export * from "../types/index.js";
import type { Chunk, ChunkingStrategy, ExtractParams } from "../types/index.js";
/**
* Process a document through the full RAG pipeline
*
* @param text - Document text to process
* @param options - Processing options
* @returns Processed chunks with optional metadata
*/
export declare function processDocument(text: string, options?: {
/** Chunking strategy (default: recursive) */
strategy?: ChunkingStrategy;
/** Maximum chunk size */
maxSize?: number;
/** Chunk overlap */
overlap?: number;
/** Metadata extraction options */
extract?: ExtractParams;
/** Provider for metadata extraction */
provider?: string;
/** Model for metadata extraction */
model?: string;
/** Custom metadata to add */
metadata?: Record<string, unknown>;
}): Promise<Chunk[]>;
/**
* Get recommended chunking strategy based on content type
*
* @param contentType - MIME type or file extension
* @returns Recommended chunking strategy
*/
export declare function getRecommendedStrategy(contentType: string): ChunkingStrategy;
/**
* Get available chunking strategies
*
* @returns Array of available strategy names
*/
export declare function getAvailableStrategies(): ChunkingStrategy[];
/**
* Get default configuration for a chunking strategy
*
* @param strategy - Chunking strategy name
* @returns Default configuration object
*/
export declare function getDefaultChunkerConfig(strategy: ChunkingStrategy): Record<string, unknown>;