autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
115 lines (114 loc) • 4.95 kB
JavaScript
/**
* CoarseRanker — 粗排器
* 多维加权排序(Recall + Semantic + Freshness + Popularity)
* Quality 维度保留但默认权重 0 — 待场景化区分后按需启用
*/
export class CoarseRanker {
#weights;
constructor(options = {}) {
this.#weights = {
recall: options.recallWeight ?? 0.45,
semantic: options.semanticWeight ?? 0.3,
quality: options.qualityWeight ?? 0,
freshness: options.freshnessWeight ?? 0.15,
popularity: options.popularityWeight ?? 0.1,
};
}
/**
* 粗排
* @param candidates 需有 recallScore、semanticScore 等字段
* @returns sorted with coarseScore
*/
rank(candidates) {
if (!candidates || candidates.length === 0) {
return [];
}
// 动态权重调整:semantic 不可用时将其权重按比例重分配给其他维度
const hasSemanticScores = candidates.some((c) => (c.semanticScore || 0) > 0);
const effectiveWeights = { ...this.#weights };
if (!hasSemanticScores && effectiveWeights.semantic > 0) {
const semWeight = effectiveWeights.semantic;
const otherTotal = 1 - semWeight;
if (otherTotal > 0) {
for (const key of Object.keys(effectiveWeights)) {
if (key !== 'semantic') {
effectiveWeights[key] +=
(effectiveWeights[key] / otherTotal) * semWeight;
}
}
}
effectiveWeights.semantic = 0;
}
// 召回分数 max-based 归一化(保留相对排序,避免 clamp 截断高分差异)
const maxRecall = candidates.reduce((m, c) => Math.max(m, c.recallScore || c.score || 0), 0) || 1;
return candidates
.map((c) => {
const recall = Math.min((c.recallScore || c.score || 0) / maxRecall, 1.0);
const semantic = this.#normalize(c.semanticScore || 0);
const quality = this.#computeQuality(c);
const freshness = this.#computeFreshness(c);
const popularity = this.#computePopularity(c);
const coarseScore = recall * effectiveWeights.recall +
semantic * effectiveWeights.semantic +
quality * effectiveWeights.quality +
freshness * effectiveWeights.freshness +
popularity * effectiveWeights.popularity;
return {
...c,
coarseScore,
coarseSignals: { recall, semantic, quality, freshness, popularity },
};
})
.sort((a, b) => b.coarseScore - a.coarseScore);
}
/**
* E-E-A-T 质量评分
* - 内容完整性 40%: 有 title + code + description
* - 结构质量 30%: 有 category + language + tags
* - 代码可读性 30%: 合理长度、有注释
*/
#computeQuality(candidate) {
let score = 0;
// 内容完整性 (40%)
const hasTitle = !!candidate.title;
const hasCode = !!(candidate.code || candidate.content);
const hasDesc = !!(candidate.description || candidate.summary);
score += (hasTitle ? 0.15 : 0) + (hasCode ? 0.15 : 0) + (hasDesc ? 0.1 : 0);
// 结构质量 (30%)
const hasCat = !!candidate.category;
const hasLang = !!candidate.language;
const hasTags = Array.isArray(candidate.tags) && candidate.tags.length > 0;
score += (hasCat ? 0.1 : 0) + (hasLang ? 0.1 : 0) + (hasTags ? 0.1 : 0);
// 代码可读性 (30%)
const code = candidate.code || candidate.content || '';
const lines = code.split('\n').length;
const hasComments = /\/\/|\/\*|#/.test(code);
const reasonableLength = lines >= 3 && lines <= 500;
score += (hasComments ? 0.15 : 0) + (reasonableLength ? 0.15 : 0);
return Math.min(score, 1.0);
}
#computeFreshness(candidate) {
const updated = candidate.updatedAt || candidate.lastModified || candidate.createdAt;
if (!updated) {
return 0.5;
}
// 自动识别秒级/毫秒级 Unix 时间戳 (秒级 ≤ 9999999999 即 2286 年)
const ts = typeof updated === 'number' && updated > 0 && updated <= 9999999999
? updated * 1000
: typeof updated === 'number'
? updated
: new Date(updated).getTime();
const ageDays = (Date.now() - ts) / 86400000;
if (ageDays < 0) {
return 1.0; // 未来时间戳视为最新
}
return Math.exp((-Math.LN2 * ageDays) / 180); // 半衰期 180 天
}
#computePopularity(candidate) {
const usage = candidate.usageCount || 0;
return usage > 0 ? Math.min(Math.log10(usage + 1) / 3, 1.0) : 0;
}
#normalize(value) {
return Math.min(Math.max(value, 0), 1.0);
}
}