autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
507 lines (506 loc) • 23 kB
JavaScript
/**
* ConsolidationAdvisor — 提交前融合顾问
*
* 解决问题:Agent 逐条提交 Recipe 导致碎片化、低价值条目激增。
*
* 设计思路:在新知识提交前分析已有知识库,给出 4 种建议之一:
* create — 独立有价值,正常新建(走正常可信度判断)
* merge — 与 1 条 Recipe 相似,将候选内容合并到已有 Recipe,合并后 Recipe → staging
* reorganize — 与多条 Recipe 交叉重叠,将候选功能拆分到已有 Recipe 上,被修改的 Recipe → staging
* insufficient — 独立价值不足且已有足够 Recipe 覆盖,交给 Agent 与开发者决定
*
* 分析维度:
* 1. 结构相似度 — 复用 RedundancyAnalyzer 的 4 维算法
* 2. 语义域覆盖 — category + trigger 是否落在已有 Recipe 管辖范围
* 3. 独立价值 — 内容长度、具体性、是否有独立 coreCode
*/
var _a;
import { COUNTABLE_LIFECYCLES } from '../../domain/knowledge/Lifecycle.js';
import Logger from '../../infrastructure/logging/Logger.js';
import { ContradictionDetector } from './ContradictionDetector.js';
/* ────────────────────── Constants ────────────────────── */
/** 低于此阈值的 Recipe 被视为内容不足 / 碎片化 */
const MIN_SUBSTANCE_SCORE = 0.3;
/** 结构相似度达到此阈值 → enhance 建议 */
const ENHANCE_THRESHOLD = 0.4;
/** 结构相似度达到此阈值 → 判定为高度重叠 */
const HIGH_OVERLAP_THRESHOLD = 0.65;
/** 最多分析多少条同域 Recipe(控制性能) */
const MAX_CANDIDATES_PER_ANALYSIS = 30;
const WEIGHTS = { title: 0.2, clause: 0.3, code: 0.3, guard: 0.2 };
/* ────────────────────── Class ────────────────────── */
export class ConsolidationAdvisor {
#knowledgeRepo;
#logger = Logger.getInstance();
constructor(knowledgeRepo) {
this.#knowledgeRepo = knowledgeRepo;
}
/**
* 分析候选知识与现有知识库的关系,返回融合建议。
*
* @param candidate - 待提交的候选数据
* @returns ConsolidationAdvice — 建议 + 理由 + 上下文
*/
async analyze(candidate) {
// ── Step 1: 独立价值评估 ──
const substanceScore = this.#assessSubstance(candidate);
// ── Step 2: 加载同域 / 相关 Recipe ──
const related = await this.#loadRelatedRecipes(candidate);
// ── Step 3: insufficient — 独立价值不足,交给 Agent 与开发者决定 ──
if (substanceScore < MIN_SUBSTANCE_SCORE) {
if (related.length > 0) {
const scored = related.map((r) => ({
recipe: r,
similarity: this.#computeSimilarity(candidate, r),
}));
scored.sort((a, b) => b.similarity - a.similarity);
return {
action: 'insufficient',
confidence: 0.85,
reason: this.#buildInsufficientReason(candidate, substanceScore, scored.slice(0, 3)),
coveredBy: scored.slice(0, 5).map((s) => ({
id: s.recipe.id,
title: s.recipe.title,
similarity: Math.round(s.similarity * 100) / 100,
})),
};
}
return {
action: 'insufficient',
confidence: 0.8,
reason: this.#buildInsufficientReason(candidate, substanceScore, []),
};
}
// ── Step 4: 无相关 Recipe → 正常新建 ──
if (related.length === 0) {
return {
action: 'create',
confidence: 0.95,
reason: `在 ${candidate.category || '全库'} 中未找到相关 Recipe,可安全新建。`,
};
}
// ── Step 5: 结构相似度分析 ──
const scored = related.map((r) => ({
recipe: r,
similarity: this.#computeSimilarity(candidate, r),
}));
scored.sort((a, b) => b.similarity - a.similarity);
const top = scored[0];
const highOverlaps = scored.filter((s) => s.similarity >= HIGH_OVERLAP_THRESHOLD);
const moderateOverlaps = scored.filter((s) => s.similarity >= ENHANCE_THRESHOLD && s.similarity < HIGH_OVERLAP_THRESHOLD);
// ── Step 6: 多条高度重叠 → reorganize(合并重新拆分,旧 Recipe 状态回退) ──
if (highOverlaps.length >= 2) {
return {
action: 'reorganize',
confidence: Math.min(0.9, top.similarity),
reason: `候选与 ${highOverlaps.length} 条现有 Recipe 高度重叠(最高相似度 ${(top.similarity * 100).toFixed(0)}%),` +
`建议将候选功能拆分到这些已有 Recipe 上(保留已有 Recipe 的质量数据),被修改的 Recipe 状态转为 staging。` +
`修改后的 Recipe 走正常可信度判断。`,
reorganizeTargets: highOverlaps.map((s) => ({
id: s.recipe.id,
title: s.recipe.title,
similarity: Math.round(s.similarity * 100) / 100,
})),
relatedRecipes: scored.slice(0, 5).map((s) => ({
id: s.recipe.id,
title: s.recipe.title,
similarity: Math.round(s.similarity * 100) / 100,
})),
};
}
// ── Step 7: 与 1 条高度重叠 → merge(融合为新 Recipe,旧 Recipe 状态回退) ──
if (highOverlaps.length === 1) {
const direction = this.#computeMergeDirection(candidate, top.recipe);
return {
action: 'merge',
confidence: top.similarity,
reason: `候选与「${top.recipe.title}」高度重叠(${(top.similarity * 100).toFixed(0)}%),` +
`建议将候选内容合并到该 Recipe(保留已有 Recipe 的质量数据),合并后 Recipe 状态转为 staging。` +
`${direction.summary}修改后走正常可信度判断。`,
targetRecipe: {
id: top.recipe.id,
title: top.recipe.title,
similarity: Math.round(top.similarity * 100) / 100,
},
mergeDirection: direction,
relatedRecipes: scored.slice(0, 5).map((s) => ({
id: s.recipe.id,
title: s.recipe.title,
similarity: Math.round(s.similarity * 100) / 100,
})),
};
}
// ── Step 8: 中度重叠 → 判断新建还是融合 ──
if (moderateOverlaps.length > 0) {
const direction = this.#computeMergeDirection(candidate, top.recipe);
if (direction.addedDimensions.length === 0) {
// 候选不提供任何新维度 → merge
return {
action: 'merge',
confidence: top.similarity,
reason: `候选与「${top.recipe.title}」有中度重叠(${(top.similarity * 100).toFixed(0)}%),` +
`且未提供新维度,建议将候选内容合并到该 Recipe(保留已有 Recipe 的质量数据),合并后 Recipe 状态转为 staging。` +
`修改后走正常可信度判断。`,
targetRecipe: {
id: top.recipe.id,
title: top.recipe.title,
similarity: Math.round(top.similarity * 100) / 100,
},
mergeDirection: direction,
relatedRecipes: scored.slice(0, 5).map((s) => ({
id: s.recipe.id,
title: s.recipe.title,
similarity: Math.round(s.similarity * 100) / 100,
})),
};
}
// 候选提供了新维度 → 可以新建,但附带上下文
return {
action: 'create',
confidence: 0.7,
reason: `候选与「${top.recipe.title}」有中度重叠(${(top.similarity * 100).toFixed(0)}%),` +
`但提供了新维度(${direction.addedDimensions.join('、')}),允许新建。` +
`请确保新 Recipe 职责边界清晰。`,
relatedRecipes: scored.slice(0, 5).map((s) => ({
id: s.recipe.id,
title: s.recipe.title,
similarity: Math.round(s.similarity * 100) / 100,
})),
};
}
// ── Step 9: 无显著重叠 → 正常新建 ──
return {
action: 'create',
confidence: 0.9,
reason: `候选与最相似 Recipe「${top.recipe.title}」相似度仅 ${(top.similarity * 100).toFixed(0)}%,可安全新建。`,
relatedRecipes: scored.slice(0, 3).map((s) => ({
id: s.recipe.id,
title: s.recipe.title,
similarity: Math.round(s.similarity * 100) / 100,
})),
};
}
/**
* 批量分析候选知识与现有知识库的关系。
*
* 除了对每个候选独立运行 analyze() 外,
* 还检测批次内部候选之间的重叠(防止批量提交碎片化)。
*
* @param candidates - 待提交的候选数组
* @returns BatchConsolidationResult — 每条分析 + 批次内重叠
*/
async analyzeBatch(candidates) {
// 对每个候选独立分析(vs DB)
const items = [];
for (let index = 0; index < candidates.length; index++) {
items.push({ index, advice: await this.analyze(candidates[index]) });
}
// 检测批次内候选之间的相互重叠
const internalOverlaps = [];
for (let i = 0; i < candidates.length; i++) {
for (let j = i + 1; j < candidates.length; j++) {
const sim = this.#computeCandidateSimilarity(candidates[i], candidates[j]);
if (sim >= ENHANCE_THRESHOLD) {
internalOverlaps.push({ indexA: i, indexB: j, similarity: Math.round(sim * 100) / 100 });
}
}
}
return { items, internalOverlaps };
}
/* ════════════════════ 独立价值评估 ════════════════════ */
/**
* 评估候选是否具备独立成条的"实质性"(0-1)。
*
* 维度:
* 1. 内容充实度 (0.4) — doClause+dontClause 长度 + coreCode 存在
* 2. 具体性 (0.3) — 是否有具体的 trigger + whenClause(非通用)
* 3. 独立代码 (0.3) — coreCode 是否足够独立(非 snippet 级别)
*/
#assessSubstance(c) {
let contentScore = 0;
const doLen = (c.doClause || '').length;
const dontLen = (c.dontClause || '').length;
const clauseLen = doLen + dontLen;
// doClause + dontClause 内容长度评估
if (clauseLen >= 100) {
contentScore = 1.0;
}
else if (clauseLen >= 40) {
contentScore = 0.6;
}
else if (clauseLen > 0) {
contentScore = 0.3;
}
// 有 coreCode 加分
const codeLen = (c.coreCode || '').trim().length;
if (codeLen >= 50) {
contentScore = Math.min(1.0, contentScore + 0.2);
}
// 具体性: trigger + whenClause
let specificityScore = 0;
if (c.trigger?.startsWith('@') && c.trigger.length > 3) {
specificityScore += 0.5;
}
if (c.whenClause && c.whenClause.length >= 20) {
specificityScore += 0.5;
}
// 代码独立性
let codeScore = 0;
if (codeLen >= 100) {
codeScore = 1.0;
}
else if (codeLen >= 30) {
codeScore = 0.5;
}
else if (codeLen > 0) {
codeScore = 0.2;
}
const total = contentScore * 0.4 + specificityScore * 0.3 + codeScore * 0.3;
return Math.round(total * 100) / 100;
}
#buildInsufficientReason(c, score, topRelated) {
const issues = [];
if ((c.doClause || '').length < 40) {
issues.push('doClause 过短');
}
if ((c.dontClause || '').length < 20) {
issues.push('dontClause 过短');
}
if ((c.coreCode || '').trim().length < 30) {
issues.push('coreCode 不足');
}
if (!c.trigger || !c.trigger.startsWith('@')) {
issues.push('缺少有效 trigger');
}
if (!c.whenClause || c.whenClause.length < 20) {
issues.push('whenClause 过于笼统');
}
let msg = `候选实质性评分 ${(score * 100).toFixed(0)}% 不足(阈值 ${MIN_SUBSTANCE_SCORE * 100}%)。` +
`问题: ${issues.join('、')}。`;
if (topRelated.length > 0) {
const coverList = topRelated.map((r) => `「${r.recipe.title}」`).join('、');
msg +=
`该领域已有 Recipe 覆盖(${coverList}),` +
`建议与开发者讨论: 是补齐已有 Recipe 还是放弃此候选。`;
}
else {
msg += `建议补充更多具体细节后再提交,或将此内容合并到更广泛的 Recipe 中。`;
}
return msg;
}
/* ════════════════════ 相关 Recipe 加载 ════════════════════ */
async #loadRelatedRecipes(candidate) {
try {
const category = candidate.category || '';
const trigger = candidate.trigger || '';
const triggerPrefix = trigger.startsWith('@')
? trigger.slice(0, Math.max(3, trigger.indexOf('-', 1) > 0 ? trigger.indexOf('-', 1) : trigger.length))
: '';
const toSummary = (e) => ({
id: e.id,
title: e.title,
doClause: e.doClause || null,
dontClause: e.dontClause || null,
coreCode: e.coreCode || null,
category: e.category || null,
trigger: e.trigger || null,
whenClause: e.whenClause || null,
guardPattern: e.content?.pattern || null,
});
if (category) {
const entries = await this.#knowledgeRepo.findAllByLifecyclesAndCategory(COUNTABLE_LIFECYCLES, category, MAX_CANDIDATES_PER_ANALYSIS);
const results = entries.map(toSummary);
if (results.length < 5 && triggerPrefix.length >= 3) {
const extra = await this.#knowledgeRepo.findByLifecyclesAndTriggerPrefix(COUNTABLE_LIFECYCLES, category, triggerPrefix, MAX_CANDIDATES_PER_ANALYSIS - results.length);
const existingIds = new Set(results.map((r) => r.id));
for (const e of extra) {
const s = toSummary(e);
if (!existingIds.has(s.id)) {
results.push(s);
}
}
}
return results;
}
const entries = await this.#knowledgeRepo.findAllByLifecycles(COUNTABLE_LIFECYCLES);
return entries.slice(0, MAX_CANDIDATES_PER_ANALYSIS).map(toSummary);
}
catch (err) {
this.#logger.warn(`ConsolidationAdvisor: failed to load recipes: ${err instanceof Error ? err.message : String(err)}`);
return [];
}
}
/* ════════════════════ 结构相似度计算 ════════════════════ */
/**
* 计算候选与某条 Recipe 的 4 维结构相似度。
* 复用 RedundancyAnalyzer 的权重配比。
*/
#computeSimilarity(candidate, recipe) {
const d1 = _a.#titleJaccard(candidate.title, recipe.title);
const d2 = _a.#clauseJaccard([candidate.doClause, candidate.dontClause], [recipe.doClause, recipe.dontClause]);
const d3 = _a.#codeSimilarity(candidate.coreCode ?? null, recipe.coreCode ?? null);
const candidatePattern = candidate.content?.pattern ?? null;
const d4 = candidatePattern && recipe.guardPattern && candidatePattern === recipe.guardPattern ? 1.0 : 0;
return WEIGHTS.title * d1 + WEIGHTS.clause * d2 + WEIGHTS.code * d3 + WEIGHTS.guard * d4;
}
/**
* 计算两个候选之间的结构相似度(批次内重叠检测用)。
* 复用 title / clause / code 三维,跳过 guardPattern。
*/
#computeCandidateSimilarity(a, b) {
const d1 = _a.#titleJaccard(a.title, b.title);
const d2 = _a.#clauseJaccard([a.doClause, a.dontClause], [b.doClause, b.dontClause]);
const d3 = _a.#codeSimilarity(a.coreCode ?? null, b.coreCode ?? null);
// 批次内无 guardPattern,权重重分配: title 0.25 / clause 0.4 / code 0.35
return 0.25 * d1 + 0.4 * d2 + 0.35 * d3;
}
/* ════════════════════ 融合方向分析 ════════════════════ */
/**
* 分析候选能为已有 Recipe 补充哪些新「维度」。
* 如果候选不提供任何新维度 → 纯重复,应合并到已有 Recipe。
*/
#computeMergeDirection(candidate, target) {
const added = [];
// 1. 候选有 coreCode 但目标无(或很短)
const candidateCodeLen = (candidate.coreCode || '').trim().length;
const targetCodeLen = (target.coreCode || '').trim().length;
if (candidateCodeLen > 30 && targetCodeLen < 30) {
added.push('coreCode');
}
// 2. 候选有 dontClause 但目标无
if ((candidate.dontClause || '').length > 20 && !(target.dontClause || '').trim()) {
added.push('dontClause');
}
// 3. 候选有更具体的 whenClause
if ((candidate.whenClause || '').length > 30 && (target.whenClause || '').length < 15) {
added.push('whenClause');
}
// 4. 候选的 doClause 提供了 target 未涵盖的关键词
const candidateKeywords = _a.#extractKeyTerms(candidate.doClause || '');
const targetKeywords = _a.#extractKeyTerms([target.doClause, target.dontClause].filter(Boolean).join(' '));
const newTerms = [...candidateKeywords].filter((t) => !targetKeywords.has(t));
if (newTerms.length >= 3) {
added.push(`新关键词(${newTerms.slice(0, 3).join(',')})`);
}
let summary;
if (added.length > 0) {
summary = `候选可为已有 Recipe 补充: ${added.join('、')}。`;
}
else {
summary = `候选未提供已有 Recipe 缺失的维度,合并后内容以已有 Recipe 为主。`;
}
return { addedDimensions: added, summary };
}
/* ════════════════════ 静态工具方法 ════════════════════ */
static #titleJaccard(titleA, titleB) {
const wordsA = ContradictionDetector.extractTopicWords(titleA);
const wordsB = ContradictionDetector.extractTopicWords(titleB);
if (wordsA.size === 0 && wordsB.size === 0) {
return 0;
}
let intersection = 0;
for (const w of wordsA) {
if (wordsB.has(w)) {
intersection++;
}
}
const union = wordsA.size + wordsB.size - intersection;
return union === 0 ? 0 : intersection / union;
}
static #clauseJaccard(clausesA, clausesB) {
const textA = clausesA.filter(Boolean).join(' ');
const textB = clausesB.filter(Boolean).join(' ');
if (!textA || !textB) {
return 0;
}
const wordsA = ContradictionDetector.extractTopicWords(textA);
const wordsB = ContradictionDetector.extractTopicWords(textB);
if (wordsA.size === 0 && wordsB.size === 0) {
return 0;
}
let intersection = 0;
for (const w of wordsA) {
if (wordsB.has(w)) {
intersection++;
}
}
const union = wordsA.size + wordsB.size - intersection;
return union === 0 ? 0 : intersection / union;
}
static #codeSimilarity(codeA, codeB) {
if (!codeA || !codeB) {
return 0;
}
const a = codeA.replace(/\s+/g, '');
const b = codeB.replace(/\s+/g, '');
if (a.length === 0 || b.length === 0) {
return 0;
}
// n-gram Jaccard (n=3) — 适合中等长度代码比较
return _a.#ngramJaccard(a, b, 3);
}
static #ngramJaccard(a, b, n) {
const gramsA = new Set();
const gramsB = new Set();
for (let i = 0; i <= a.length - n; i++) {
gramsA.add(a.slice(i, i + n));
}
for (let i = 0; i <= b.length - n; i++) {
gramsB.add(b.slice(i, i + n));
}
if (gramsA.size === 0 && gramsB.size === 0) {
return 0;
}
let intersection = 0;
for (const g of gramsA) {
if (gramsB.has(g)) {
intersection++;
}
}
const union = gramsA.size + gramsB.size - intersection;
return union === 0 ? 0 : intersection / union;
}
/**
* 从文本中提取关键术语(过滤掉小词和常见停用词)
*/
static #extractKeyTerms(text) {
const words = ContradictionDetector.extractTopicWords(text);
const STOP = new Set([
'use',
'using',
'used',
'make',
'code',
'file',
'class',
'method',
'function',
'should',
'must',
'will',
'can',
'need',
'when',
'for',
'with',
'from',
'使用',
'需要',
'可以',
'应该',
'不要',
'必须',
'进行',
'方法',
'函数',
]);
const result = new Set();
for (const w of words) {
if (!STOP.has(w) && w.length >= 3) {
result.add(w);
}
}
return result;
}
}
_a = ConsolidationAdvisor;