soul-mirror
Version:
🔮 融合千年易学智慧与现代AI技术,为程序员量身打造的命理情绪分析工具。94.1%情绪识别准确率,秒级响应,支持五行人格分析与实时运势指导。
363 lines (300 loc) • 11.1 kB
JavaScript
/**
* NTUSD情绪词典集成器
* 将NTUSD词典与现有alias词典智能融合
*/
const fs = require('fs');
const path = require('path');
class NTUSDIntegrator {
constructor() {
this.ntusdPath = path.join(__dirname, '../../data/ntusd/ntusd-processed.json');
this.aliasPath = path.join(__dirname, 'zh-alias-improved.json');
this.outputPath = path.join(__dirname, 'zh-alias-enhanced.json');
this.ntusdData = null;
this.aliasData = null;
this.integratedData = {};
this.config = {
// 优先级配置
aliasWeight: 1.0, // alias词典权重
ntusdWeight: 0.7, // NTUSD词典权重
// 强度阈值
minIntensity: 0.3, // 最小强度阈值
maxWordsPerEmotion: 1000, // 每个情绪类别最大词数
// 去重策略
enableDeduplication: true,
similarityThreshold: 0.8,
// 现代化过滤
filterTraditional: true,
modernBonus: 0.2 // 现代用语加成
};
}
/**
* 加载词典数据
*/
loadData() {
try {
// 加载NTUSD数据
if (fs.existsSync(this.ntusdPath)) {
this.ntusdData = JSON.parse(fs.readFileSync(this.ntusdPath, 'utf8'));
console.log(`✅ 已加载NTUSD词典: ${this.ntusdData.metadata.totalWords}个词`);
} else {
console.log('⚠️ NTUSD词典文件不存在,请先运行ntusd-processor.js');
return false;
}
// 加载alias数据
if (fs.existsSync(this.aliasPath)) {
this.aliasData = JSON.parse(fs.readFileSync(this.aliasPath, 'utf8'));
const totalAlias = Object.values(this.aliasData).reduce((sum, arr) => sum + arr.length, 0);
console.log(`✅ 已加载alias词典: ${totalAlias}个词`);
} else {
console.log('❌ alias词典文件不存在');
return false;
}
return true;
} catch (error) {
console.error('❌ 加载词典数据失败:', error.message);
return false;
}
}
/**
* 检测现代用语
*/
isModernExpression(word) {
const modernPatterns = [
/[a-zA-Z]/, // 包含英文字母
/\d/, // 包含数字
/[\u{1F600}-\u{1F64F}]/u, // 包含emoji表情
/[\u{1F300}-\u{1F5FF}]/u, // 包含emoji符号
/[\u{1F680}-\u{1F6FF}]/u, // 包含emoji交通
/了$/, // 以"了"结尾的口语
/死了$/, // 强化表达
/爆了$/, // 网络用语
/翻了$/, // 网络用语
/yyds/, // 网络流行语
/emo/, // 英文缩写
/wtf/, // 英文缩写
/high/, // 英文词汇
];
return modernPatterns.some(pattern => pattern.test(word));
}
/**
* 计算词汇相似度
*/
calculateSimilarity(word1, word2) {
if (word1 === word2) return 1.0;
if (word1.includes(word2) || word2.includes(word1)) return 0.8;
// 计算编辑距离相似度
const distance = this.levenshteinDistance(word1, word2);
const maxLength = Math.max(word1.length, word2.length);
return 1 - (distance / maxLength);
}
/**
* 编辑距离算法
*/
levenshteinDistance(str1, str2) {
const matrix = [];
for (let i = 0; i <= str2.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= str1.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= str2.length; i++) {
for (let j = 1; j <= str1.length; j++) {
if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j] + 1
);
}
}
}
return matrix[str2.length][str1.length];
}
/**
* 去重处理
*/
deduplicateWords(wordList) {
if (!this.config.enableDeduplication) return wordList;
const deduplicated = [];
const seen = new Set();
for (const wordObj of wordList) {
const word = typeof wordObj === 'string' ? wordObj : wordObj.word;
let isDuplicate = false;
if (seen.has(word)) {
isDuplicate = true;
} else {
// 检查相似词汇
for (const seenWord of seen) {
if (this.calculateSimilarity(word, seenWord) > this.config.similarityThreshold) {
isDuplicate = true;
break;
}
}
}
if (!isDuplicate) {
seen.add(word);
deduplicated.push(wordObj);
}
}
return deduplicated;
}
/**
* 合并单个情绪类别
*/
mergeEmotionCategory(emotion) {
const aliasWords = this.aliasData[emotion] || [];
const ntusdWords = this.ntusdData.emotions[emotion] || [];
// 转换alias词汇为标准格式
const aliasFormatted = aliasWords.map(word => ({
word: word,
intensity: this.isModernExpression(word) ?
Math.min(1.0, 0.6 + this.config.modernBonus) : 0.6,
source: 'alias',
priority: this.config.aliasWeight
}));
// 处理NTUSD词汇
const ntusdFormatted = ntusdWords.map(wordObj => ({
word: wordObj.word,
intensity: Math.max(this.config.minIntensity, wordObj.intensity * this.config.ntusdWeight),
source: 'NTUSD',
priority: this.config.ntusdWeight
}));
// 合并并去重
const combined = [...aliasFormatted, ...ntusdFormatted];
const deduplicated = this.deduplicateWords(combined);
// 按优先级和强度排序
deduplicated.sort((a, b) => {
if (a.priority !== b.priority) return b.priority - a.priority;
return b.intensity - a.intensity;
});
// 限制词汇数量
const limited = deduplicated.slice(0, this.config.maxWordsPerEmotion);
return limited;
}
/**
* 执行集成
*/
integrate() {
console.log('🔄 开始集成NTUSD与alias词典...\n');
if (!this.loadData()) {
return false;
}
// 获取所有情绪类别
const emotions = Object.keys(this.aliasData);
console.log(`📊 处理 ${emotions.length} 个情绪类别...`);
const statistics = {};
for (const emotion of emotions) {
console.log(`\n🎭 处理情绪类别: ${emotion}`);
const aliasCount = (this.aliasData[emotion] || []).length;
const ntusdCount = (this.ntusdData.emotions[emotion] || []).length;
console.log(` alias词汇: ${aliasCount}个`);
console.log(` NTUSD词汇: ${ntusdCount}个`);
const merged = this.mergeEmotionCategory(emotion);
const finalWords = merged.map(item => item.word); // 转换为字符串数组以兼容现有系统
this.integratedData[emotion] = finalWords;
statistics[emotion] = {
original: aliasCount,
ntusd: ntusdCount,
merged: merged.length,
final: finalWords.length,
modernWords: merged.filter(item => this.isModernExpression(item.word)).length,
traditionalWords: merged.filter(item => !this.isModernExpression(item.word)).length
};
console.log(` 合并后: ${merged.length}个`);
console.log(` 最终: ${finalWords.length}个`);
console.log(` 现代用语: ${statistics[emotion].modernWords}个`);
console.log(` 传统词汇: ${statistics[emotion].traditionalWords}个`);
}
// 保存集成结果
this.saveResults(statistics);
return true;
}
/**
* 保存集成结果
*/
saveResults(statistics) {
console.log('\n💾 保存集成结果...');
// 保存增强版词典
fs.writeFileSync(this.outputPath, JSON.stringify(this.integratedData, null, 2), 'utf8');
console.log(`✅ 增强版词典已保存: ${this.outputPath}`);
// 保存统计报告
const report = {
timestamp: new Date().toISOString(),
config: this.config,
statistics: statistics,
summary: {
totalEmotions: Object.keys(this.integratedData).length,
totalWords: Object.values(this.integratedData).reduce((sum, arr) => sum + arr.length, 0),
avgWordsPerEmotion: Math.round(Object.values(this.integratedData).reduce((sum, arr) => sum + arr.length, 0) / Object.keys(this.integratedData).length),
modernWordsTotal: Object.values(statistics).reduce((sum, stat) => sum + stat.modernWords, 0),
traditionalWordsTotal: Object.values(statistics).reduce((sum, stat) => sum + stat.traditionalWords, 0)
}
};
const reportPath = path.join(__dirname, '../../data/ntusd/integration-report.json');
fs.writeFileSync(reportPath, JSON.stringify(report, null, 2), 'utf8');
console.log(`📊 集成报告已保存: ${reportPath}`);
// 打印总结
this.printSummary(report.summary);
}
/**
* 打印集成总结
*/
printSummary(summary) {
console.log('\n🎯 集成总结:');
console.log('='.repeat(50));
console.log(`总情绪类别: ${summary.totalEmotions}个`);
console.log(`总词汇数量: ${summary.totalWords}个`);
console.log(`平均每类: ${summary.avgWordsPerEmotion}个词`);
console.log(`现代用语: ${summary.modernWordsTotal}个 (${(summary.modernWordsTotal/summary.totalWords*100).toFixed(1)}%)`);
console.log(`传统词汇: ${summary.traditionalWordsTotal}个 (${(summary.traditionalWordsTotal/summary.totalWords*100).toFixed(1)}%)`);
console.log('\n💡 使用建议:');
console.log('1. 将 zh-alias-enhanced.json 替换原有的 zh-alias-improved.json');
console.log('2. 增强后的词典保持了现代用语优先级');
console.log('3. 传统词汇作为补充,提高覆盖率');
console.log('4. 建议定期更新以保持现代化');
}
/**
* 创建使用示例
*/
createUsageExample() {
const examplePath = path.join(__dirname, 'enhanced-usage-example.js');
const exampleCode = `
/**
* 增强版情绪词典使用示例
*/
const enhancedDict = require('./zh-alias-enhanced.json');
// 检查词典状态
console.log('📊 增强版词典统计:');
Object.entries(enhancedDict).forEach(([emotion, words]) => {
console.log(\`\${emotion}: \${words.length}个词\`);
});
// 测试现代用语识别
const testCases = [
'emo了', 'yyds', 'high爆了', '破防了',
'开心', '愤怒', '悲伤', '焦虑'
];
console.log('\\n🧪 测试用例:');
testCases.forEach(testWord => {
const matchedEmotions = [];
Object.entries(enhancedDict).forEach(([emotion, words]) => {
if (words.includes(testWord)) {
matchedEmotions.push(emotion);
}
});
console.log(\`"\${testWord}" -> \${matchedEmotions.join(', ') || '未匹配'}\`);
});
`;
fs.writeFileSync(examplePath, exampleCode.trim(), 'utf8');
console.log(`\n📝 使用示例已创建: ${examplePath}`);
}
}
// 如果直接运行脚本
if (require.main === module) {
const integrator = new NTUSDIntegrator();
integrator.integrate();
integrator.createUsageExample();
}
module.exports = NTUSDIntegrator;