UNPKG

requirements-analysis

Version:

简化的需求分析MCP服务 - 基于AI软件工程(优化版)6步流程

220 lines 8.02 kB
"use strict"; /** * 文档内容提取器 * 负责从文档中智能提取关键内容 */ Object.defineProperty(exports, "__esModule", { value: true }); exports.DocumentContentExtractor = void 0; const logger_1 = require("./logger"); class DocumentContentExtractor { logger = new logger_1.Logger(); /** * 从文档中提取关键内容 */ extractKeyContent(content, extractionType = 'summary') { if (!content || content.trim().length === 0) { this.logger.warn('Empty content provided for extraction'); return ''; } try { switch (extractionType) { case 'summary': return this.extractSummary(content); case 'conclusions': return this.extractConclusions(content); case 'recommendations': return this.extractRecommendations(content); case 'key_points': return this.extractKeyPoints(content); case 'full': return this.cleanupFullContent(content); default: return this.extractSummary(content); } } catch (error) { this.logger.error(`Failed to extract content with type ${extractionType}:`, undefined, undefined, error); return this.fallbackExtraction(content); } } /** * 提取摘要和要点 */ extractSummary(content) { const lines = content.split('\n'); const keyLines = lines.filter(line => { const lowerLine = line.toLowerCase().trim(); // 跳过元数据行 if (this.isMetadataLine(line)) { return false; } // 包含关键词的行 return lowerLine.includes('摘要') || lowerLine.includes('要点') || lowerLine.includes('结论') || lowerLine.includes('建议') || lowerLine.includes('===') || lowerLine.includes('【') || lowerLine.includes('】') || line.startsWith('##') || line.startsWith('###') || line.startsWith('- **') || line.startsWith('* **'); }); // 如果提取的内容太少,添加一些上下文 if (keyLines.length < 5) { return this.extractWithContext(content, keyLines); } return keyLines.join('\n').trim(); } /** * 提取结论性内容 */ extractConclusions(content) { const sections = this.splitIntoSections(content); const conclusionSections = sections.filter(section => { const sectionLower = section.toLowerCase(); return sectionLower.includes('结论') || sectionLower.includes('总结') || sectionLower.includes('验证结果') || sectionLower.includes('分析结果') || sectionLower.includes('评估结果') || sectionLower.includes('===') || sectionLower.includes('【验证结果】') || sectionLower.includes('【评估结果】'); }); return conclusionSections.join('\n\n').trim(); } /** * 提取建议性内容 */ extractRecommendations(content) { const sections = this.splitIntoSections(content); const recommendationSections = sections.filter(section => { const sectionLower = section.toLowerCase(); return sectionLower.includes('建议') || sectionLower.includes('改进') || sectionLower.includes('优化') || sectionLower.includes('后续') || sectionLower.includes('下一步') || sectionLower.includes('行动计划') || sectionLower.includes('实施'); }); return recommendationSections.join('\n\n').trim(); } /** * 提取关键要点 */ extractKeyPoints(content) { const lines = content.split('\n'); const keyPointLines = lines.filter(line => { const trimmedLine = line.trim(); // 跳过元数据 if (this.isMetadataLine(line)) { return false; } // 标题行 if (trimmedLine.startsWith('#')) { return true; } // 列表项 if (trimmedLine.startsWith('- ') || trimmedLine.startsWith('* ') || trimmedLine.match(/^\d+\./)) { return true; } // 重要标记 if (trimmedLine.includes('**') || trimmedLine.includes('【') || trimmedLine.includes('】') || trimmedLine.includes('===')) { return true; } return false; }); return keyPointLines.join('\n').trim(); } /** * 清理完整内容 */ cleanupFullContent(content) { const lines = content.split('\n'); const cleanLines = lines.filter(line => !this.isMetadataLine(line)); return cleanLines.join('\n').trim(); } /** * 检查是否为元数据行 */ isMetadataLine(line) { const trimmedLine = line.trim(); return trimmedLine.includes('生成时间:') || trimmedLine.includes('项目:') || trimmedLine.includes('步骤:') || trimmedLine.startsWith('---') || trimmedLine.startsWith('*生成时间') || (trimmedLine.startsWith('*') && trimmedLine.includes('|')); } /** * 将内容分割为段落 */ splitIntoSections(content) { // 按双换行分割段落 let sections = content.split('\n\n'); // 如果段落太少,按单换行分割 if (sections.length < 3) { sections = content.split('\n## ').map(s => s.startsWith('## ') ? s : '## ' + s); } return sections.filter(s => s.trim().length > 0); } /** * 带上下文的提取 */ extractWithContext(content, keyLines) { const lines = content.split('\n'); const result = []; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (keyLines.includes(line)) { // 添加关键行 result.push(line); // 添加后续的相关行(最多3行) for (let j = i + 1; j < Math.min(i + 4, lines.length); j++) { const nextLine = lines[j]; if (nextLine.trim().length > 0 && !this.isMetadataLine(nextLine)) { result.push(nextLine); } else { break; } } } } return result.join('\n').trim(); } /** * 降级提取(简单截取) */ fallbackExtraction(content) { const cleanContent = this.cleanupFullContent(content); // 如果内容太长,截取前1000字符 if (cleanContent.length > 1000) { return cleanContent.substring(0, 1000) + '...\n\n[内容已截取,如需完整内容请查看原文档]'; } return cleanContent; } /** * 获取内容统计信息 */ getContentStats(content) { const lines = content.split('\n'); const sections = this.splitIntoSections(content); const keyPoints = lines.filter(line => line.trim().startsWith('- ') || line.trim().startsWith('* ') || line.trim().match(/^\d+\./)).length; return { totalLines: lines.length, totalChars: content.length, sections: sections.length, keyPoints }; } } exports.DocumentContentExtractor = DocumentContentExtractor; //# sourceMappingURL=document-content-extractor.js.map