excel-parser-mcp
Version:
MCP server for parsing Excel documents and extracting test case data
100 lines • 3.69 kB
JavaScript
import { fileTypeFromBuffer } from 'file-type';
import { SUPPORTED_FORMATS } from '../types/index.js';
export class FileTypeDetector {
async detectExcelType(buffer) {
try {
// 方法1: 使用 file-type 库检测 MIME 类型
const fileType = await fileTypeFromBuffer(buffer);
if (fileType && SUPPORTED_FORMATS.includes(fileType.mime)) {
return fileType.mime === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ? 'xlsx' : 'xls';
}
// 方法2: 检测魔术字节
const magicType = this.detectByMagicBytes(buffer);
if (magicType !== 'unknown') {
return magicType;
}
return 'unknown';
}
catch (error) {
console.warn('文件类型检测失败:', error);
return 'unknown';
}
}
detectByMagicBytes(buffer) {
// Excel 2007+ (.xlsx) - 以 PK 开头的 ZIP 文件
if (buffer.length >= 4 &&
buffer[0] === 0x50 && buffer[1] === 0x4B &&
(buffer[2] === 0x03 || buffer[2] === 0x05) &&
(buffer[3] === 0x04 || buffer[3] === 0x06)) {
// 进一步检查是否包含 Excel 特有的内容
const content = buffer.toString('utf8');
if (content.includes('xl/workbook.xml') ||
content.includes('xl/worksheets/') ||
content.includes('xl/sharedStrings.xml')) {
return 'xlsx';
}
}
// Excel 97-2003 (.xls) - Microsoft Office 文档格式
if (buffer.length >= 8) {
const signature = buffer.subarray(0, 8);
const oleCfsSignature = Buffer.from([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]);
if (signature.equals(oleCfsSignature)) {
return 'xls';
}
}
return 'unknown';
}
detectByFilename(filename) {
const ext = filename.toLowerCase().split('.').pop();
switch (ext) {
case 'xlsx':
return 'xlsx';
case 'xls':
return 'xls';
default:
return 'unknown';
}
}
async isExcelFile(buffer, filename) {
const detectedType = await this.detectExcelType(buffer);
if (detectedType !== 'unknown') {
return true;
}
// 如果魔术字节检测失败,尝试文件名检测
if (filename) {
const filenameType = this.detectByFilename(filename);
return filenameType !== 'unknown';
}
return false;
}
validateFileSize(buffer, maxSize = 50 * 1024 * 1024) {
return buffer.length <= maxSize;
}
validateFileIntegrity(buffer) {
try {
// 基本的文件完整性检查
if (buffer.length < 512) {
return false; // 文件过小,不可能是有效的 Excel 文件
}
// 检查是否有足够的数据进行解析
return buffer.length > 0 && !this.isCorrupted(buffer);
}
catch {
return false;
}
}
isCorrupted(buffer) {
// 简单的损坏检测
// 检查文件是否全为零字节
const isAllZeros = buffer.every(byte => byte === 0);
if (isAllZeros)
return true;
// 检查文件是否全为相同字节
const firstByte = buffer[0];
const isAllSame = buffer.every(byte => byte === firstByte);
if (isAllSame && buffer.length > 100)
return true;
return false;
}
}
//# sourceMappingURL=file-detector.js.map