UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

251 lines (250 loc) 8.48 kB
/** * Excel Processor * * Handles downloading, validating, and processing Excel files (.xlsx, .xls). * Uses exceljs library for parsing with streaming support for large files. * * Key features: * - Supports both .xlsx and legacy .xls formats * - Extracts worksheet data with headers * - Handles complex cell types (formulas, rich text, dates) * - Respects configurable row and sheet limits * - Provides truncation metadata when limits are exceeded * * @module processors/document/ExcelProcessor * * @example * ```typescript * import { excelProcessor, processExcel, isExcelFile } from "./ExcelProcessor.js"; * * // Check if a file is an Excel file * if (isExcelFile(fileInfo.mimetype, fileInfo.name)) { * // Process the Excel file * const result = await processExcel(fileInfo, { * authHeaders: { Authorization: "Bearer token" }, * }); * * if (result.success) { * console.log(`Processed ${result.data.sheetCount} sheets`); * console.log(`Total rows: ${result.data.totalRows}`); * * for (const sheet of result.data.worksheets) { * console.log(`Sheet: ${sheet.name}, Rows: ${sheet.rowCount}`); * } * } * } * ``` */ import { BaseFileProcessor } from "../base/BaseFileProcessor.js"; import type { FileInfo, ProcessOptions, ProcessedExcel, ProcessorFileProcessingResult } from "../../types/index.js"; /** * Excel Processor - handles .xlsx and .xls files. * Uses exceljs library for parsing with support for large files. * * Features: * - ZIP format validation (XLSX files are ZIP archives) * - Sheet count limiting (MAX_EXCEL_SHEETS) * - Row count limiting per sheet (MAX_EXCEL_ROWS) * - Cell type handling (text, numbers, formulas, dates, rich text) * * @example * ```typescript * const processor = new ExcelProcessor(); * * // Process a file * const result = await processor.processFile(fileInfo, { * authHeaders: { Authorization: "Bearer token" }, * }); * * if (result.success) { * console.log(`Sheets: ${result.data.sheetCount}`); * console.log(`Truncated: ${result.data.truncated}`); * } * ``` */ export declare class ExcelProcessor extends BaseFileProcessor<ProcessedExcel> { constructor(); /** * Validate downloaded Excel file has correct format. * XLSX files are ZIP archives starting with PK signature. * * @param buffer - Downloaded file content * @param _fileInfo - Original file information (unused but required by interface) * @returns null if valid, error message if invalid */ protected validateDownloadedFile(buffer: Buffer, _fileInfo: FileInfo): Promise<string | null>; /** * Build processed result stub. * Note: This is a synchronous stub - actual parsing happens in processFile override. * * @param buffer - Downloaded file content * @param fileInfo - Original file information * @returns Empty ProcessedExcel structure (populated by processFile) */ protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedExcel; /** * Override processFile for async Excel parsing with exceljs. * This override is necessary because exceljs uses async parsing. * * @param fileInfo - File information (can include URL or buffer) * @param options - Optional processing options (auth headers, timeout, etc.) * @returns Processing result with parsed Excel data or error */ processFile(fileInfo: FileInfo, options?: ProcessOptions): Promise<ProcessorFileProcessingResult<ProcessedExcel>>; /** * Parse Excel buffer into workbook using exceljs. * * @param buffer - Excel file content * @returns Parsed ExcelJS Workbook */ private parseWorkbook; /** * Extract worksheet data from workbook with row and sheet limits. * * @param workbook - Parsed ExcelJS Workbook * @returns Extracted worksheets with truncation metadata */ private extractWorksheets; /** * Convert an Excel cell value to a primitive type. * Handles various cell types including formulas, rich text, and dates. * * @param cell - ExcelJS cell value (can be various types) * @returns Primitive value (string, number, boolean, or null) */ private getCellValue; /** * Extract text from rich text cell format. * Rich text cells contain an array of text fragments with formatting. * * @param richText - Array of rich text fragments * @returns Concatenated plain text */ private extractRichText; /** * Extract a specific range from a spreadsheet. * * Called by the `extract_file_content` tool for targeted data access. * Returns TSV-formatted text for the specified sheet, row range, and columns. * * @param buffer - Excel file buffer * @param sheet - Sheet name or 0-based index (default: first sheet) * @param rowStart - Starting row (1-indexed, default: 1) * @param rowEnd - Ending row (1-indexed, default: all rows) * @param columns - Specific column letters to include (e.g., ["A", "B", "D"]) * @returns TSV-formatted string with the extracted data */ extractSheetRange(buffer: Buffer, sheet?: string | number, rowStart?: number, rowEnd?: number, columns?: string[]): Promise<string>; } /** * Singleton Excel processor instance. * Use this for standard Excel processing operations. * * @example * ```typescript * import { excelProcessor } from "./ExcelProcessor.js"; * * const result = await excelProcessor.processFile(fileInfo); * ``` */ export declare const excelProcessor: ExcelProcessor; /** * Check if a file is an Excel file. * Matches by MIME type or file extension. * * @param mimetype - MIME type of the file * @param filename - Filename (for extension-based detection) * @returns true if the file is an Excel file * * @example * ```typescript * if (isExcelFile("application/vnd.ms-excel", "data.xls")) { * // Process as Excel * } * * if (isExcelFile("", "report.xlsx")) { * // Also matches by extension * } * ``` */ export declare function isExcelFile(mimetype: string, filename: string): boolean; /** * Validate Excel file size against configured limit. * * @param sizeBytes - File size in bytes * @returns true if size is within the Excel file limit * * @example * ```typescript * if (!validateExcelSize(fileInfo.size)) { * console.error(`File too large: max ${SIZE_LIMITS.EXCEL_MAX_MB}MB`); * } * ``` */ export declare function validateExcelSize(sizeBytes: number): boolean; /** * Process a single Excel file. * Convenience function that uses the singleton processor. * * @param fileInfo - File information (can include URL or buffer) * @param options - Optional processing options (auth headers, timeout, etc.) * @returns Processing result with parsed Excel data or error * * @example * ```typescript * import { processExcel } from "./ExcelProcessor.js"; * * const result = await processExcel(fileInfo, { * authHeaders: { Authorization: "Bearer token" }, * timeout: 120000, // 2 minutes for large files * }); * * if (result.success) { * const { worksheets, totalRows, truncated } = result.data; * console.log(`Extracted ${totalRows} rows from ${worksheets.length} sheets`); * * if (truncated) { * console.warn("Some data was truncated due to size limits"); * } * } else { * console.error(`Processing failed: ${result.error?.userMessage}`); * } * ``` */ export declare function processExcel(fileInfo: FileInfo, options?: ProcessOptions): Promise<ProcessorFileProcessingResult<ProcessedExcel>>; /** * Get Excel max size in MB. * * @returns Maximum Excel file size in megabytes * * @example * ```typescript * const maxSize = getExcelMaxSizeMB(); // 10 * console.log(`Maximum Excel file size: ${maxSize}MB`); * ``` */ export declare function getExcelMaxSizeMB(): number; /** * Get Excel max rows per sheet. * * @returns Maximum rows to process per worksheet * * @example * ```typescript * const maxRows = getExcelMaxRows(); // 5000 * console.log(`Maximum rows per sheet: ${maxRows}`); * ``` */ export declare function getExcelMaxRows(): number; /** * Get Excel max sheets to process. * * @returns Maximum number of worksheets to process * * @example * ```typescript * const maxSheets = getExcelMaxSheets(); // 10 * console.log(`Maximum sheets to process: ${maxSheets}`); * ``` */ export declare function getExcelMaxSheets(): number;