UNPKG

excel-parser-mcp

Version:

MCP server for parsing Excel documents and extracting test case data

100 lines 3.69 kB
import { fileTypeFromBuffer } from 'file-type'; import { SUPPORTED_FORMATS } from '../types/index.js'; export class FileTypeDetector { async detectExcelType(buffer) { try { // 方法1: 使用 file-type 库检测 MIME 类型 const fileType = await fileTypeFromBuffer(buffer); if (fileType && SUPPORTED_FORMATS.includes(fileType.mime)) { return fileType.mime === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ? 'xlsx' : 'xls'; } // 方法2: 检测魔术字节 const magicType = this.detectByMagicBytes(buffer); if (magicType !== 'unknown') { return magicType; } return 'unknown'; } catch (error) { console.warn('文件类型检测失败:', error); return 'unknown'; } } detectByMagicBytes(buffer) { // Excel 2007+ (.xlsx) - 以 PK 开头的 ZIP 文件 if (buffer.length >= 4 && buffer[0] === 0x50 && buffer[1] === 0x4B && (buffer[2] === 0x03 || buffer[2] === 0x05) && (buffer[3] === 0x04 || buffer[3] === 0x06)) { // 进一步检查是否包含 Excel 特有的内容 const content = buffer.toString('utf8'); if (content.includes('xl/workbook.xml') || content.includes('xl/worksheets/') || content.includes('xl/sharedStrings.xml')) { return 'xlsx'; } } // Excel 97-2003 (.xls) - Microsoft Office 文档格式 if (buffer.length >= 8) { const signature = buffer.subarray(0, 8); const oleCfsSignature = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]); if (signature.equals(oleCfsSignature)) { return 'xls'; } } return 'unknown'; } detectByFilename(filename) { const ext = filename.toLowerCase().split('.').pop(); switch (ext) { case 'xlsx': return 'xlsx'; case 'xls': return 'xls'; default: return 'unknown'; } } async isExcelFile(buffer, filename) { const detectedType = await this.detectExcelType(buffer); if (detectedType !== 'unknown') { return true; } // 如果魔术字节检测失败,尝试文件名检测 if (filename) { const filenameType = this.detectByFilename(filename); return filenameType !== 'unknown'; } return false; } validateFileSize(buffer, maxSize = 50 * 1024 * 1024) { return buffer.length <= maxSize; } validateFileIntegrity(buffer) { try { // 基本的文件完整性检查 if (buffer.length < 512) { return false; // 文件过小,不可能是有效的 Excel 文件 } // 检查是否有足够的数据进行解析 return buffer.length > 0 && !this.isCorrupted(buffer); } catch { return false; } } isCorrupted(buffer) { // 简单的损坏检测 // 检查文件是否全为零字节 const isAllZeros = buffer.every(byte => byte === 0); if (isAllZeros) return true; // 检查文件是否全为相同字节 const firstByte = buffer[0]; const isAllSame = buffer.every(byte => byte === firstByte); if (isAllSame && buffer.length > 100) return true; return false; } } //# sourceMappingURL=file-detector.js.map