@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
132 lines (131 loc) • 3.91 kB
TypeScript
/**
* Document Loaders
*
* Provides loaders for various document formats including:
* - Text files
* - Markdown files
* - HTML files and web pages
* - JSON files
* - CSV files
* - PDF files
*
* @example
* ```typescript
* import { loadDocument, WebLoader, PDFLoader } from 'neurolink/rag';
*
* // Load from file path
* const doc = await loadDocument('/path/to/document.md');
*
* // Load from URL
* const webDoc = await WebLoader.load('https://example.com/article');
*
* // Load PDF
* const pdfDoc = await PDFLoader.load('/path/to/document.pdf');
* ```
*/
import type { LoaderOptions, WebLoaderOptions, PDFLoaderOptions, CSVLoaderOptions, DocumentLoader } from "../../types/index.js";
import { MDocument } from "./MDocument.js";
/**
* Text file loader
*/
export declare class TextLoader implements DocumentLoader {
load(source: string, options?: LoaderOptions): Promise<MDocument>;
canHandle(source: string): boolean;
protected loadContent(source: string, encoding?: BufferEncoding): Promise<string>;
protected getSourceName(source: string): string;
}
/**
* Markdown file loader
*/
export declare class MarkdownLoader extends TextLoader {
load(source: string, options?: LoaderOptions): Promise<MDocument>;
canHandle(source: string): boolean;
}
/**
* HTML file loader
*/
export declare class HTMLLoader extends TextLoader {
load(source: string, options?: LoaderOptions): Promise<MDocument>;
canHandle(source: string): boolean;
}
/**
* JSON file loader
*/
export declare class JSONLoader extends TextLoader {
load(source: string, options?: LoaderOptions): Promise<MDocument>;
canHandle(source: string): boolean;
}
/**
* CSV file loader
*/
export declare class CSVLoader extends TextLoader {
load(source: string, options?: CSVLoaderOptions): Promise<MDocument>;
canHandle(source: string): boolean;
private parseCSVLine;
private toMarkdownTable;
private toTextTable;
}
/**
* PDF file loader
*
* Note: Requires external PDF processing library for full functionality.
* Falls back to placeholder implementation if pdf-parse is not available.
*/
export declare class PDFLoader implements DocumentLoader {
load(source: string, options?: PDFLoaderOptions): Promise<MDocument>;
canHandle(source: string): boolean;
private loadPdfParser;
private parsePageRange;
}
/**
* Web page loader
*
* Fetches and extracts content from web pages.
* Supports basic HTML parsing without external dependencies.
*/
export declare class WebLoader implements DocumentLoader {
private defaultUserAgent;
load(source: string, options?: WebLoaderOptions): Promise<MDocument>;
canHandle(source: string): boolean;
/**
* Extract main content from HTML
*/
private extractMainContent;
/**
* Convert HTML to plain text
*/
private htmlToText;
}
/**
* Load document from file path, URL, or content
*
* Automatically detects the document type and uses the appropriate loader.
*
* @param source - File path, URL, or raw content
* @param options - Loader options
* @returns Promise resolving to MDocument
*
* @example
* ```typescript
* // Load from file
* const doc = await loadDocument('/path/to/document.md');
*
* // Load from URL
* const webDoc = await loadDocument('https://example.com/article');
*
* // Load with options
* const pdfDoc = await loadDocument('/path/to/doc.pdf', {
* pageRange: '1-5',
* metadata: { project: 'research' }
* });
* ```
*/
export declare function loadDocument(source: string, options?: LoaderOptions): Promise<MDocument>;
/**
* Load multiple documents
*
* @param sources - Array of file paths, URLs, or content
* @param options - Loader options (applied to all)
* @returns Promise resolving to array of MDocuments
*/
export declare function loadDocuments(sources: string[], options?: LoaderOptions): Promise<MDocument[]>;