requirements-analysis
Version:
简化的需求分析MCP服务 - 基于AI软件工程(优化版)6步流程
220 lines • 8.02 kB
JavaScript
"use strict";
/**
* 文档内容提取器
* 负责从文档中智能提取关键内容
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.DocumentContentExtractor = void 0;
const logger_1 = require("./logger");
class DocumentContentExtractor {
logger = new logger_1.Logger();
/**
* 从文档中提取关键内容
*/
extractKeyContent(content, extractionType = 'summary') {
if (!content || content.trim().length === 0) {
this.logger.warn('Empty content provided for extraction');
return '';
}
try {
switch (extractionType) {
case 'summary':
return this.extractSummary(content);
case 'conclusions':
return this.extractConclusions(content);
case 'recommendations':
return this.extractRecommendations(content);
case 'key_points':
return this.extractKeyPoints(content);
case 'full':
return this.cleanupFullContent(content);
default:
return this.extractSummary(content);
}
}
catch (error) {
this.logger.error(`Failed to extract content with type ${extractionType}:`, undefined, undefined, error);
return this.fallbackExtraction(content);
}
}
/**
* 提取摘要和要点
*/
extractSummary(content) {
const lines = content.split('\n');
const keyLines = lines.filter(line => {
const lowerLine = line.toLowerCase().trim();
// 跳过元数据行
if (this.isMetadataLine(line)) {
return false;
}
// 包含关键词的行
return lowerLine.includes('摘要') ||
lowerLine.includes('要点') ||
lowerLine.includes('结论') ||
lowerLine.includes('建议') ||
lowerLine.includes('===') ||
lowerLine.includes('【') ||
lowerLine.includes('】') ||
line.startsWith('##') ||
line.startsWith('###') ||
line.startsWith('- **') ||
line.startsWith('* **');
});
// 如果提取的内容太少,添加一些上下文
if (keyLines.length < 5) {
return this.extractWithContext(content, keyLines);
}
return keyLines.join('\n').trim();
}
/**
* 提取结论性内容
*/
extractConclusions(content) {
const sections = this.splitIntoSections(content);
const conclusionSections = sections.filter(section => {
const sectionLower = section.toLowerCase();
return sectionLower.includes('结论') ||
sectionLower.includes('总结') ||
sectionLower.includes('验证结果') ||
sectionLower.includes('分析结果') ||
sectionLower.includes('评估结果') ||
sectionLower.includes('===') ||
sectionLower.includes('【验证结果】') ||
sectionLower.includes('【评估结果】');
});
return conclusionSections.join('\n\n').trim();
}
/**
* 提取建议性内容
*/
extractRecommendations(content) {
const sections = this.splitIntoSections(content);
const recommendationSections = sections.filter(section => {
const sectionLower = section.toLowerCase();
return sectionLower.includes('建议') ||
sectionLower.includes('改进') ||
sectionLower.includes('优化') ||
sectionLower.includes('后续') ||
sectionLower.includes('下一步') ||
sectionLower.includes('行动计划') ||
sectionLower.includes('实施');
});
return recommendationSections.join('\n\n').trim();
}
/**
* 提取关键要点
*/
extractKeyPoints(content) {
const lines = content.split('\n');
const keyPointLines = lines.filter(line => {
const trimmedLine = line.trim();
// 跳过元数据
if (this.isMetadataLine(line)) {
return false;
}
// 标题行
if (trimmedLine.startsWith('#')) {
return true;
}
// 列表项
if (trimmedLine.startsWith('- ') || trimmedLine.startsWith('* ') ||
trimmedLine.match(/^\d+\./)) {
return true;
}
// 重要标记
if (trimmedLine.includes('**') || trimmedLine.includes('【') ||
trimmedLine.includes('】') || trimmedLine.includes('===')) {
return true;
}
return false;
});
return keyPointLines.join('\n').trim();
}
/**
* 清理完整内容
*/
cleanupFullContent(content) {
const lines = content.split('\n');
const cleanLines = lines.filter(line => !this.isMetadataLine(line));
return cleanLines.join('\n').trim();
}
/**
* 检查是否为元数据行
*/
isMetadataLine(line) {
const trimmedLine = line.trim();
return trimmedLine.includes('生成时间:') ||
trimmedLine.includes('项目:') ||
trimmedLine.includes('步骤:') ||
trimmedLine.startsWith('---') ||
trimmedLine.startsWith('*生成时间') ||
(trimmedLine.startsWith('*') && trimmedLine.includes('|'));
}
/**
* 将内容分割为段落
*/
splitIntoSections(content) {
// 按双换行分割段落
let sections = content.split('\n\n');
// 如果段落太少,按单换行分割
if (sections.length < 3) {
sections = content.split('\n## ').map(s => s.startsWith('## ') ? s : '## ' + s);
}
return sections.filter(s => s.trim().length > 0);
}
/**
* 带上下文的提取
*/
extractWithContext(content, keyLines) {
const lines = content.split('\n');
const result = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (keyLines.includes(line)) {
// 添加关键行
result.push(line);
// 添加后续的相关行(最多3行)
for (let j = i + 1; j < Math.min(i + 4, lines.length); j++) {
const nextLine = lines[j];
if (nextLine.trim().length > 0 && !this.isMetadataLine(nextLine)) {
result.push(nextLine);
}
else {
break;
}
}
}
}
return result.join('\n').trim();
}
/**
* 降级提取(简单截取)
*/
fallbackExtraction(content) {
const cleanContent = this.cleanupFullContent(content);
// 如果内容太长,截取前1000字符
if (cleanContent.length > 1000) {
return cleanContent.substring(0, 1000) + '...\n\n[内容已截取,如需完整内容请查看原文档]';
}
return cleanContent;
}
/**
* 获取内容统计信息
*/
getContentStats(content) {
const lines = content.split('\n');
const sections = this.splitIntoSections(content);
const keyPoints = lines.filter(line => line.trim().startsWith('- ') ||
line.trim().startsWith('* ') ||
line.trim().match(/^\d+\./)).length;
return {
totalLines: lines.length,
totalChars: content.length,
sections: sections.length,
keyPoints
};
}
}
exports.DocumentContentExtractor = DocumentContentExtractor;
//# sourceMappingURL=document-content-extractor.js.map