@lobehub/chat
Version:
Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.
145 lines (127 loc) • 5.2 kB
text/typescript
import debug from 'debug';
import { readFile } from 'node:fs/promises';
import * as xlsx from 'xlsx';
import type { DocumentPage, FileLoaderInterface } from '../../types';
import { promptTemplate } from './prompt';
const log = debug('file-loaders:excel');
/**
* Converts sheet data (array of objects) to a Markdown table string.
* Handles empty sheets and escapes pipe characters.
*/
function sheetToMarkdownTable(jsonData: Record<string, any>[]): string {
log('Converting sheet data to Markdown table, rows:', jsonData?.length || 0);
if (!jsonData || jsonData.length === 0) {
log('Sheet is empty, returning placeholder message');
return '*Sheet is empty or contains no data.*';
}
// Ensure all rows have the same keys based on the first row, handle potentially sparse data
const headers = Object.keys(jsonData[0] || {});
log('Sheet headers:', headers);
if (headers.length === 0) {
log('Sheet has no headers, returning placeholder message');
return '*Sheet has headers but no data.*';
}
const headerRow = `| ${headers.join(' | ')} |`;
const separatorRow = `| ${headers.map(() => '---').join(' | ')} |`;
log('Building data rows for Markdown table');
const dataRows = jsonData
.map((row) => {
const cells = headers.map((header) => {
const value = row[header];
// Handle null/undefined and escape pipe characters within cells
const cellContent =
value === null || value === undefined ? '' : String(value).replaceAll('|', '\\|');
return cellContent.trim(); // Trim whitespace from cells
});
return `| ${cells.join(' | ')} |`;
})
.join('\n');
const result = `${headerRow}\n${separatorRow}\n${dataRows}`;
log('Markdown table created, length:', result.length);
return result;
}
/**
* Loads Excel files (.xlsx, .xls) using the 'xlsx' library.
* Each sheet becomes a DocumentPage containing a Markdown table generated by sheetToMarkdownTable.
*/
export class ExcelLoader implements FileLoaderInterface {
async loadPages(filePath: string): Promise<DocumentPage[]> {
log('Loading Excel file:', filePath);
const pages: DocumentPage[] = [];
try {
// Use readFile for async operation compatible with other loaders
log('Reading Excel file as buffer');
const dataBuffer = await readFile(filePath);
log('Excel file read successfully, size:', dataBuffer.length, 'bytes');
log('Parsing Excel workbook');
const workbook = xlsx.read(dataBuffer, { type: 'buffer' });
log('Excel workbook parsed successfully, sheets:', workbook.SheetNames.length);
for (const sheetName of workbook.SheetNames) {
log(`Processing sheet: ${sheetName}`);
const worksheet = workbook.Sheets[sheetName];
// Use sheet_to_json to get array of objects for our custom markdown function
const jsonData = xlsx.utils.sheet_to_json<Record<string, any>>(worksheet, {
// Get formatted strings, not raw values
defval: '',
raw: false, // Use empty string for blank cells
});
log(`Sheet ${sheetName} converted to JSON, rows:`, jsonData.length);
// Convert to markdown using YOUR helper function
const tableMarkdown = sheetToMarkdownTable(jsonData);
const lines = tableMarkdown.split('\n');
const lineCount = lines.length;
const charCount = tableMarkdown.length;
log(`Sheet ${sheetName} converted to Markdown, lines: ${lineCount}, chars: ${charCount}`);
pages.push({
// Trim whitespace
charCount,
lineCount,
metadata: {
sheetName: sheetName,
},
pageContent: tableMarkdown.trim(),
});
log(`Added sheet ${sheetName} as page`);
}
if (pages.length === 0) {
log('Excel file contains no sheets, creating empty page with error');
pages.push({
charCount: 0,
lineCount: 0,
metadata: {
error: 'Excel file contains no sheets.',
},
pageContent: '',
});
}
log('Excel loading completed, total pages:', pages.length);
return pages;
} catch (e) {
const error = e as Error;
log('Error encountered while loading Excel file');
console.error(`Error loading Excel file ${filePath}: ${error.message}`);
const errorPage: DocumentPage = {
charCount: 0,
lineCount: 0,
metadata: {
error: `Failed to load Excel file: ${error.message}`,
},
pageContent: '',
};
log('Created error page for failed Excel loading');
return [errorPage];
}
}
/**
* Aggregates content from Excel sheets (Markdown tables).
* Adds the sheet name as a header before each table.
* @param pages Array of DocumentPage objects from loadPages.
* @returns Aggregated content as a string.
*/
async aggregateContent(pages: DocumentPage[]): Promise<string> {
log('Aggregating content from', pages.length, 'Excel pages');
const result = promptTemplate(pages);
log('Excel content aggregated successfully, length:', result.length);
return result;
}
}