@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
251 lines (250 loc) • 8.48 kB
TypeScript
/**
* Excel Processor
*
* Handles downloading, validating, and processing Excel files (.xlsx, .xls).
* Uses exceljs library for parsing with streaming support for large files.
*
* Key features:
* - Supports both .xlsx and legacy .xls formats
* - Extracts worksheet data with headers
* - Handles complex cell types (formulas, rich text, dates)
* - Respects configurable row and sheet limits
* - Provides truncation metadata when limits are exceeded
*
* @module processors/document/ExcelProcessor
*
* @example
* ```typescript
* import { excelProcessor, processExcel, isExcelFile } from "./ExcelProcessor.js";
*
* // Check if a file is an Excel file
* if (isExcelFile(fileInfo.mimetype, fileInfo.name)) {
* // Process the Excel file
* const result = await processExcel(fileInfo, {
* authHeaders: { Authorization: "Bearer token" },
* });
*
* if (result.success) {
* console.log(`Processed ${result.data.sheetCount} sheets`);
* console.log(`Total rows: ${result.data.totalRows}`);
*
* for (const sheet of result.data.worksheets) {
* console.log(`Sheet: ${sheet.name}, Rows: ${sheet.rowCount}`);
* }
* }
* }
* ```
*/
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
import type { FileInfo, ProcessOptions, ProcessedExcel, ProcessorFileProcessingResult } from "../../types/index.js";
/**
* Excel Processor - handles .xlsx and .xls files.
* Uses exceljs library for parsing with support for large files.
*
* Features:
* - ZIP format validation (XLSX files are ZIP archives)
* - Sheet count limiting (MAX_EXCEL_SHEETS)
* - Row count limiting per sheet (MAX_EXCEL_ROWS)
* - Cell type handling (text, numbers, formulas, dates, rich text)
*
* @example
* ```typescript
* const processor = new ExcelProcessor();
*
* // Process a file
* const result = await processor.processFile(fileInfo, {
* authHeaders: { Authorization: "Bearer token" },
* });
*
* if (result.success) {
* console.log(`Sheets: ${result.data.sheetCount}`);
* console.log(`Truncated: ${result.data.truncated}`);
* }
* ```
*/
export declare class ExcelProcessor extends BaseFileProcessor<ProcessedExcel> {
constructor();
/**
* Validate downloaded Excel file has correct format.
* XLSX files are ZIP archives starting with PK signature.
*
* @param buffer - Downloaded file content
* @param _fileInfo - Original file information (unused but required by interface)
* @returns null if valid, error message if invalid
*/
protected validateDownloadedFile(buffer: Buffer, _fileInfo: FileInfo): Promise<string | null>;
/**
* Build processed result stub.
* Note: This is a synchronous stub - actual parsing happens in processFile override.
*
* @param buffer - Downloaded file content
* @param fileInfo - Original file information
* @returns Empty ProcessedExcel structure (populated by processFile)
*/
protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedExcel;
/**
* Override processFile for async Excel parsing with exceljs.
* This override is necessary because exceljs uses async parsing.
*
* @param fileInfo - File information (can include URL or buffer)
* @param options - Optional processing options (auth headers, timeout, etc.)
* @returns Processing result with parsed Excel data or error
*/
processFile(fileInfo: FileInfo, options?: ProcessOptions): Promise<ProcessorFileProcessingResult<ProcessedExcel>>;
/**
* Parse Excel buffer into workbook using exceljs.
*
* @param buffer - Excel file content
* @returns Parsed ExcelJS Workbook
*/
private parseWorkbook;
/**
* Extract worksheet data from workbook with row and sheet limits.
*
* @param workbook - Parsed ExcelJS Workbook
* @returns Extracted worksheets with truncation metadata
*/
private extractWorksheets;
/**
* Convert an Excel cell value to a primitive type.
* Handles various cell types including formulas, rich text, and dates.
*
* @param cell - ExcelJS cell value (can be various types)
* @returns Primitive value (string, number, boolean, or null)
*/
private getCellValue;
/**
* Extract text from rich text cell format.
* Rich text cells contain an array of text fragments with formatting.
*
* @param richText - Array of rich text fragments
* @returns Concatenated plain text
*/
private extractRichText;
/**
* Extract a specific range from a spreadsheet.
*
* Called by the `extract_file_content` tool for targeted data access.
* Returns TSV-formatted text for the specified sheet, row range, and columns.
*
* @param buffer - Excel file buffer
* @param sheet - Sheet name or 0-based index (default: first sheet)
* @param rowStart - Starting row (1-indexed, default: 1)
* @param rowEnd - Ending row (1-indexed, default: all rows)
* @param columns - Specific column letters to include (e.g., ["A", "B", "D"])
* @returns TSV-formatted string with the extracted data
*/
extractSheetRange(buffer: Buffer, sheet?: string | number, rowStart?: number, rowEnd?: number, columns?: string[]): Promise<string>;
}
/**
* Singleton Excel processor instance.
* Use this for standard Excel processing operations.
*
* @example
* ```typescript
* import { excelProcessor } from "./ExcelProcessor.js";
*
* const result = await excelProcessor.processFile(fileInfo);
* ```
*/
export declare const excelProcessor: ExcelProcessor;
/**
* Check if a file is an Excel file.
* Matches by MIME type or file extension.
*
* @param mimetype - MIME type of the file
* @param filename - Filename (for extension-based detection)
* @returns true if the file is an Excel file
*
* @example
* ```typescript
* if (isExcelFile("application/vnd.ms-excel", "data.xls")) {
* // Process as Excel
* }
*
* if (isExcelFile("", "report.xlsx")) {
* // Also matches by extension
* }
* ```
*/
export declare function isExcelFile(mimetype: string, filename: string): boolean;
/**
* Validate Excel file size against configured limit.
*
* @param sizeBytes - File size in bytes
* @returns true if size is within the Excel file limit
*
* @example
* ```typescript
* if (!validateExcelSize(fileInfo.size)) {
* console.error(`File too large: max ${SIZE_LIMITS.EXCEL_MAX_MB}MB`);
* }
* ```
*/
export declare function validateExcelSize(sizeBytes: number): boolean;
/**
* Process a single Excel file.
* Convenience function that uses the singleton processor.
*
* @param fileInfo - File information (can include URL or buffer)
* @param options - Optional processing options (auth headers, timeout, etc.)
* @returns Processing result with parsed Excel data or error
*
* @example
* ```typescript
* import { processExcel } from "./ExcelProcessor.js";
*
* const result = await processExcel(fileInfo, {
* authHeaders: { Authorization: "Bearer token" },
* timeout: 120000, // 2 minutes for large files
* });
*
* if (result.success) {
* const { worksheets, totalRows, truncated } = result.data;
* console.log(`Extracted ${totalRows} rows from ${worksheets.length} sheets`);
*
* if (truncated) {
* console.warn("Some data was truncated due to size limits");
* }
* } else {
* console.error(`Processing failed: ${result.error?.userMessage}`);
* }
* ```
*/
export declare function processExcel(fileInfo: FileInfo, options?: ProcessOptions): Promise<ProcessorFileProcessingResult<ProcessedExcel>>;
/**
* Get Excel max size in MB.
*
* @returns Maximum Excel file size in megabytes
*
* @example
* ```typescript
* const maxSize = getExcelMaxSizeMB(); // 10
* console.log(`Maximum Excel file size: ${maxSize}MB`);
* ```
*/
export declare function getExcelMaxSizeMB(): number;
/**
* Get Excel max rows per sheet.
*
* @returns Maximum rows to process per worksheet
*
* @example
* ```typescript
* const maxRows = getExcelMaxRows(); // 5000
* console.log(`Maximum rows per sheet: ${maxRows}`);
* ```
*/
export declare function getExcelMaxRows(): number;
/**
* Get Excel max sheets to process.
*
* @returns Maximum number of worksheets to process
*
* @example
* ```typescript
* const maxSheets = getExcelMaxSheets(); // 10
* console.log(`Maximum sheets to process: ${maxSheets}`);
* ```
*/
export declare function getExcelMaxSheets(): number;