UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

132 lines (131 loc) 3.91 kB
/** * Document Loaders * * Provides loaders for various document formats including: * - Text files * - Markdown files * - HTML files and web pages * - JSON files * - CSV files * - PDF files * * @example * ```typescript * import { loadDocument, WebLoader, PDFLoader } from 'neurolink/rag'; * * // Load from file path * const doc = await loadDocument('/path/to/document.md'); * * // Load from URL * const webDoc = await WebLoader.load('https://example.com/article'); * * // Load PDF * const pdfDoc = await PDFLoader.load('/path/to/document.pdf'); * ``` */ import type { LoaderOptions, WebLoaderOptions, PDFLoaderOptions, CSVLoaderOptions, DocumentLoader } from "../../types/index.js"; import { MDocument } from "./MDocument.js"; /** * Text file loader */ export declare class TextLoader implements DocumentLoader { load(source: string, options?: LoaderOptions): Promise<MDocument>; canHandle(source: string): boolean; protected loadContent(source: string, encoding?: BufferEncoding): Promise<string>; protected getSourceName(source: string): string; } /** * Markdown file loader */ export declare class MarkdownLoader extends TextLoader { load(source: string, options?: LoaderOptions): Promise<MDocument>; canHandle(source: string): boolean; } /** * HTML file loader */ export declare class HTMLLoader extends TextLoader { load(source: string, options?: LoaderOptions): Promise<MDocument>; canHandle(source: string): boolean; } /** * JSON file loader */ export declare class JSONLoader extends TextLoader { load(source: string, options?: LoaderOptions): Promise<MDocument>; canHandle(source: string): boolean; } /** * CSV file loader */ export declare class CSVLoader extends TextLoader { load(source: string, options?: CSVLoaderOptions): Promise<MDocument>; canHandle(source: string): boolean; private parseCSVLine; private toMarkdownTable; private toTextTable; } /** * PDF file loader * * Note: Requires external PDF processing library for full functionality. * Falls back to placeholder implementation if pdf-parse is not available. */ export declare class PDFLoader implements DocumentLoader { load(source: string, options?: PDFLoaderOptions): Promise<MDocument>; canHandle(source: string): boolean; private loadPdfParser; private parsePageRange; } /** * Web page loader * * Fetches and extracts content from web pages. * Supports basic HTML parsing without external dependencies. */ export declare class WebLoader implements DocumentLoader { private defaultUserAgent; load(source: string, options?: WebLoaderOptions): Promise<MDocument>; canHandle(source: string): boolean; /** * Extract main content from HTML */ private extractMainContent; /** * Convert HTML to plain text */ private htmlToText; } /** * Load document from file path, URL, or content * * Automatically detects the document type and uses the appropriate loader. * * @param source - File path, URL, or raw content * @param options - Loader options * @returns Promise resolving to MDocument * * @example * ```typescript * // Load from file * const doc = await loadDocument('/path/to/document.md'); * * // Load from URL * const webDoc = await loadDocument('https://example.com/article'); * * // Load with options * const pdfDoc = await loadDocument('/path/to/doc.pdf', { * pageRange: '1-5', * metadata: { project: 'research' } * }); * ``` */ export declare function loadDocument(source: string, options?: LoaderOptions): Promise<MDocument>; /** * Load multiple documents * * @param sources - Array of file paths, URLs, or content * @param options - Loader options (applied to all) * @returns Promise resolving to array of MDocuments */ export declare function loadDocuments(sources: string[], options?: LoaderOptions): Promise<MDocument[]>;