UNPKG

@lobehub/chat

Version:

Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.

145 lines (127 loc) 5.2 kB
import debug from 'debug'; import { readFile } from 'node:fs/promises'; import * as xlsx from 'xlsx'; import type { DocumentPage, FileLoaderInterface } from '../../types'; import { promptTemplate } from './prompt'; const log = debug('file-loaders:excel'); /** * Converts sheet data (array of objects) to a Markdown table string. * Handles empty sheets and escapes pipe characters. */ function sheetToMarkdownTable(jsonData: Record<string, any>[]): string { log('Converting sheet data to Markdown table, rows:', jsonData?.length || 0); if (!jsonData || jsonData.length === 0) { log('Sheet is empty, returning placeholder message'); return '*Sheet is empty or contains no data.*'; } // Ensure all rows have the same keys based on the first row, handle potentially sparse data const headers = Object.keys(jsonData[0] || {}); log('Sheet headers:', headers); if (headers.length === 0) { log('Sheet has no headers, returning placeholder message'); return '*Sheet has headers but no data.*'; } const headerRow = `| ${headers.join(' | ')} |`; const separatorRow = `| ${headers.map(() => '---').join(' | ')} |`; log('Building data rows for Markdown table'); const dataRows = jsonData .map((row) => { const cells = headers.map((header) => { const value = row[header]; // Handle null/undefined and escape pipe characters within cells const cellContent = value === null || value === undefined ? '' : String(value).replaceAll('|', '\\|'); return cellContent.trim(); // Trim whitespace from cells }); return `| ${cells.join(' | ')} |`; }) .join('\n'); const result = `${headerRow}\n${separatorRow}\n${dataRows}`; log('Markdown table created, length:', result.length); return result; } /** * Loads Excel files (.xlsx, .xls) using the 'xlsx' library. * Each sheet becomes a DocumentPage containing a Markdown table generated by sheetToMarkdownTable. */ export class ExcelLoader implements FileLoaderInterface { async loadPages(filePath: string): Promise<DocumentPage[]> { log('Loading Excel file:', filePath); const pages: DocumentPage[] = []; try { // Use readFile for async operation compatible with other loaders log('Reading Excel file as buffer'); const dataBuffer = await readFile(filePath); log('Excel file read successfully, size:', dataBuffer.length, 'bytes'); log('Parsing Excel workbook'); const workbook = xlsx.read(dataBuffer, { type: 'buffer' }); log('Excel workbook parsed successfully, sheets:', workbook.SheetNames.length); for (const sheetName of workbook.SheetNames) { log(`Processing sheet: ${sheetName}`); const worksheet = workbook.Sheets[sheetName]; // Use sheet_to_json to get array of objects for our custom markdown function const jsonData = xlsx.utils.sheet_to_json<Record<string, any>>(worksheet, { // Get formatted strings, not raw values defval: '', raw: false, // Use empty string for blank cells }); log(`Sheet ${sheetName} converted to JSON, rows:`, jsonData.length); // Convert to markdown using YOUR helper function const tableMarkdown = sheetToMarkdownTable(jsonData); const lines = tableMarkdown.split('\n'); const lineCount = lines.length; const charCount = tableMarkdown.length; log(`Sheet ${sheetName} converted to Markdown, lines: ${lineCount}, chars: ${charCount}`); pages.push({ // Trim whitespace charCount, lineCount, metadata: { sheetName: sheetName, }, pageContent: tableMarkdown.trim(), }); log(`Added sheet ${sheetName} as page`); } if (pages.length === 0) { log('Excel file contains no sheets, creating empty page with error'); pages.push({ charCount: 0, lineCount: 0, metadata: { error: 'Excel file contains no sheets.', }, pageContent: '', }); } log('Excel loading completed, total pages:', pages.length); return pages; } catch (e) { const error = e as Error; log('Error encountered while loading Excel file'); console.error(`Error loading Excel file ${filePath}: ${error.message}`); const errorPage: DocumentPage = { charCount: 0, lineCount: 0, metadata: { error: `Failed to load Excel file: ${error.message}`, }, pageContent: '', }; log('Created error page for failed Excel loading'); return [errorPage]; } } /** * Aggregates content from Excel sheets (Markdown tables). * Adds the sheet name as a header before each table. * @param pages Array of DocumentPage objects from loadPages. * @returns Aggregated content as a string. */ async aggregateContent(pages: DocumentPage[]): Promise<string> { log('Aggregating content from', pages.length, 'Excel pages'); const result = promptTemplate(pages); log('Excel content aggregated successfully, length:', result.length); return result; } }