UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

42 lines (41 loc) 1.41 kB
/** * CandidateAggregator — 候选条目去重聚合 * * 对候选列表按 title 进行模糊去重,保留最优条目。 * 被 knowledge handler 的 submitKnowledgeBatch 使用。 * * @module service/candidate/CandidateAggregator */ import { jaccardSimilarity, tokenizeForSimilarity } from '../../shared/similarity.js'; /** title 相似度阈值,超过此值视为重复 */ const TITLE_SIMILARITY_THRESHOLD = 0.85; /** * 对候选条目列表进行去重聚合 * * @returns > }} */ export function aggregateCandidates(items, opts = {}) { if (!Array.isArray(items) || items.length === 0) { return { items: [], duplicates: [] }; } const threshold = opts.threshold ?? TITLE_SIMILARITY_THRESHOLD; const kept = []; const duplicates = []; for (const item of items) { const titleTokens = tokenizeForSimilarity(item.title || ''); let isDuplicate = false; for (const existing of kept) { const existingTokens = tokenizeForSimilarity(existing.title || ''); const sim = jaccardSimilarity(titleTokens, existingTokens); if (sim >= threshold) { duplicates.push({ item, duplicateOf: existing.title }); isDuplicate = true; break; } } if (!isDuplicate) { kept.push(item); } } return { items: kept, duplicates }; }