autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
978 lines (977 loc) • 40.5 kB
JavaScript
/**
* SearchEngine - 统一搜索引擎
*
* 三级搜索策略: keyword → FieldWeighted ranking → semantic(可选)
* 从 V1 SearchServiceV2 迁移,适配 V2 架构
*/
import Logger from '../../infrastructure/logging/Logger.js';
import { RawDbKnowledgeAdapter, RawDbSourceRefAdapter, unwrapSearchDb, } from '../../repository/search/SearchRepoAdapter.js';
import { CoarseRanker } from './CoarseRanker.js';
import { contextBoost } from './contextBoost.js';
import { FieldWeightedScorer } from './FieldWeightedScorer.js';
import { MultiSignalRanker } from './MultiSignalRanker.js';
// ── Re-exports for backward compatibility ──
export { BM25Scorer } from './BM25Scorer.js';
export { FieldWeightedScorer } from './FieldWeightedScorer.js';
export { groupByKind, slimSearchResult } from './SearchTypes.js';
export { tokenize } from './tokenizer.js';
/**
* SearchEngine - 完整搜索服务
* 整合召回评分 + 关键词 + 可选 AI 增强
*/
export class SearchEngine {
_cache;
_cacheMaxAge;
_coarseRanker;
_crossEncoder;
_fusionRecallWeight;
_fusionSemanticWeight;
_indexed;
_lastIndexTime = null;
_multiSignalRanker;
_signalBus;
aiProvider;
db;
hybridRetriever;
#knowledgeRepo;
#sourceRefRepo;
logger;
scorer;
vectorService;
vectorStore;
constructor(db, options = {}) {
this.db = unwrapSearchDb(db);
const opts = options;
this.#knowledgeRepo =
opts.knowledgeRepo ?? new RawDbKnowledgeAdapter(this.db);
this.#sourceRefRepo =
opts.sourceRefRepo ?? new RawDbSourceRefAdapter(this.db);
this.logger = Logger.getInstance();
this.aiProvider = options.aiProvider || null;
this.vectorStore = options.vectorStore || null;
this.vectorService = options.vectorService || null;
this.hybridRetriever = options.hybridRetriever || null;
this.scorer = new FieldWeightedScorer();
this._coarseRanker = new CoarseRanker(options);
this._multiSignalRanker = new MultiSignalRanker(options);
this._crossEncoder = options.crossEncoderReranker || null;
this._indexed = false;
this._cache = new Map();
this._cacheMaxAge = options.cacheMaxAge || 300_000; // 5min
// auto 模式 召回+semantic 融合权重(可配置)
this._fusionRecallWeight = options.fusionRecallWeight ?? 0.6;
this._fusionSemanticWeight = options.fusionSemanticWeight ?? 0.4;
this._signalBus = options.signalBus || null;
}
/** 构建搜索索引 - 从数据库加载所有可搜索实体 */
buildIndex() {
this.scorer.clear();
this._cache.clear();
try {
let entries = [];
try {
const rawEntries = this.#knowledgeRepo.findNonDeprecatedSync();
entries = rawEntries.map((e) => ({
...e,
status: e.lifecycle,
}));
}
catch {
/* table may not exist */
}
for (const r of entries) {
const text = this._buildDocText(r);
const meta = this._buildDocMeta(r);
meta.status = r.status; // buildIndex uses mapped status from lifecycle
this.scorer.addDocument(r.id, text, meta);
}
this._indexed = true;
this._lastIndexTime = new Date().toISOString();
this.logger.info('Search index built', {
entries: entries.length,
total: this.scorer.totalDocs,
});
}
catch (err) {
this.logger.error('Failed to build search index', { error: err.message });
}
}
/** 确保索引已构建(幂等),supply 给需要准确 stats 的调用方 */
ensureIndex() {
if (!this._indexed) {
this.buildIndex();
}
}
/**
* 统一搜索入口
* @param query 搜索关键词
* @param options {type, limit, mode, useAI}
*/
async search(query, options = {}) {
const { type = 'all', limit = 20, mode = 'keyword', context } = options;
const shouldRank = options.rank ?? mode !== 'keyword';
const tSearchStart = performance.now();
if (!query || !query.trim()) {
return { items: [], total: 0, query };
}
// 带 sessionHistory 的上下文搜索不缓存(个性化结果)
const hasSessionContext = (context?.sessionHistory?.length ?? 0) > 0;
const cacheKey = hasSessionContext
? null
: `${query}:${type}:${limit}:${mode}:${shouldRank ? 'r' : ''}:${options.groupByKind ? 'g' : ''}`;
if (cacheKey) {
const cached = this._getCache(cacheKey);
if (cached) {
return cached;
}
}
// 确保索引已构建
this.ensureIndex();
// 排序阶段需要更多候选,过采样 3x
const recallLimit = shouldRank ? limit * 3 : limit;
let results;
let actualMode = mode;
switch (mode) {
case 'auto': {
// ── Weighted-First + Confidence Gate ──
// 先跑 weighted(~40ms),评估是否需要 embed(2-22s)
const weightedItems = this._scorerSearch(query, type, recallLimit);
const confidence = this.#computeWeightedConfidence(query, weightedItems, limit);
if (confidence >= 60 || !this.vectorService) {
// 高 confidence: weighted 已足够,跳过 embed
results = weightedItems;
actualMode = `auto(weighted-only,conf=${confidence})`;
this.logger.info(`[QueryRouter] skip-semantic: conf=${confidence} topScore=${weightedItems[0]?.score ?? 0} query="${query}"`);
break;
}
// 低 confidence: 投入 embed,RRF 融合
// 自适应 alpha:confidence 越低 → semantic 权重越高
// conf=0 → alpha=0.75, conf=30 → alpha=0.575, conf=55 → alpha=0.42
const adaptiveAlpha = this._fusionSemanticWeight + (0.75 - this._fusionSemanticWeight) * (1 - confidence / 60);
this.logger.info(`[QueryRouter] invoke-semantic: conf=${confidence} alpha=${adaptiveAlpha.toFixed(2)} topScore=${weightedItems[0]?.score ?? 0} query="${query}"`);
try {
const rrfResults = await this.vectorService.hybridSearch(query, {
topK: recallLimit,
alpha: adaptiveAlpha,
sparseSearchFn: () => weightedItems,
});
if (rrfResults.length > 0) {
results = rrfResults.map((r) => {
const base = r.data?.item ||
r.data ||
{};
const baseMeta = (base.metadata || {});
return {
id: r.id,
title: (base.title ||
baseMeta.title ||
r.id),
type: (base.type || 'recipe'),
kind: (base.kind ||
baseMeta.kind ||
'pattern'),
status: (base.status ||
baseMeta.status ||
'active'),
score: Math.round(r.score * 1000) / 1000,
content: base.content,
description: base.description,
};
});
this._supplementDetails(results);
actualMode = `auto(rrf,conf=${confidence},α=${adaptiveAlpha.toFixed(2)})`;
break;
}
}
catch {
// VectorService RRF 失败, 降级
}
// 降级: embed 失败 → 返回已有的 weighted 结果
results = weightedItems;
actualMode = `auto(weighted-fallback,conf=${confidence})`;
break;
}
case 'weighted':
case 'bm25':
results = this._scorerSearch(query, type, recallLimit);
break;
case 'semantic': {
const semResult = await this._semanticSearch(query, type, recallLimit);
results = semResult.items;
actualMode = semResult.actualMode || 'semantic';
break;
}
default:
results = this._keywordSearch(query, type, limit);
break;
}
// ── Ranking Pipeline ([CrossEncoder] → CoarseRanker → MultiSignalRanker → ContextBoost) ──
if (shouldRank && results.length > 0) {
results = await this._applyRanking(results, query, context);
}
results = results.slice(0, limit);
const response = {
items: results,
total: results.length,
query,
mode: actualMode,
type,
ranked: shouldRank && results.length > 0,
};
// ── 搜索计时日志 ──
const tSearchEnd = performance.now();
this.logger.info(`Search completed: mode=${actualMode} total=${results.length} time=${Math.round(tSearchEnd - tSearchStart)}ms ranked=${response.ranked} query="${query}"`);
if (options.groupByKind) {
response.byKind = { rule: [], pattern: [], fact: [] };
for (const r of results) {
const kind = r.kind || 'pattern';
const bucket = response.byKind[kind] ?? response.byKind.pattern;
bucket.push(r);
}
}
if (cacheKey) {
this._setCache(cacheKey, response);
}
// ── Signal emission ──
if (this._signalBus && response.total > 0) {
this._signalBus.send('search', 'SearchEngine', Math.min(response.total / limit, 1), {
metadata: { query, mode: actualMode, total: response.total },
});
}
return response;
}
// ── Ranking Pipeline ────────────────────────────────────────────
/**
* 统一排序管线:
* 规范化 → [CrossEncoder 语义重排] → CoarseRanker (E-E-A-T 5维)
* → MultiSignalRanker (6信号) → 上下文加成
*
* CrossEncoder 仅在构造时传入 crossEncoderReranker 且 AI 可用时生效,
* 否则自动跳过(零额外开销)。
*/
async _applyRanking(items, query, context = {}) {
let normalized = this._normalizeForRanking(items);
// Optional: Cross-Encoder semantic rerank (AI → Jaccard fallback)
if (this._crossEncoder) {
normalized = (await this._crossEncoder.rerank(query, normalized));
}
let ranked = this._coarseRanker.rank(normalized);
ranked = this._multiSignalRanker.rank(ranked, {
...context,
query,
scenario: context?.intent || 'search',
});
if ((context?.sessionHistory?.length ?? 0) > 0) {
ranked = contextBoost(ranked, context);
}
return ranked.map((r) => ({
...r,
recallScore: r.recallScore || 0,
score: r.contextScore || r.rankerScore || r.coarseScore || r.recallScore || 0,
}));
}
/**
* 将召回结果转换为 Ranker 所需格式(解析 content JSON、映射信号字段)
* 保留原始 content 供下游消费者使用
*/
_normalizeForRanking(items) {
return items.map((item) => {
let codeText = '';
if (item.content) {
try {
const parsed = typeof item.content === 'string' ? JSON.parse(item.content) : item.content;
codeText = parsed.pattern || parsed.code || '';
}
catch {
/* ignore */
}
}
let tags = item.tags || [];
if (typeof tags === 'string') {
try {
tags = JSON.parse(tags);
}
catch {
tags = [];
}
}
return {
...item,
code: codeText || item.code || '',
recallScore: item.score || 0,
qualityScore: item.qualityScore || (item.status === 'active' ? 70 : 40),
usageCount: item.usageCount || 0,
authorityScore: item.authorityScore || 0,
tags,
difficulty: item.difficulty || 'intermediate',
};
});
}
/**
* 关键词搜索 - 直接 SQL LIKE
* 返回包含 kind 字段的完整结果,使用 ESCAPE 防止通配符注入
* 当 SQL LIKE 无结果时,降级到 FieldWeighted 搜索以提升自然语言查询的召回率
*/
_keywordSearch(query, type, limit) {
const results = [];
// 转义 LIKE 通配符 (% → \%, _ → \_)
const escaped = query.replace(/[%_\\]/g, (ch) => `\\${ch}`);
const pattern = `%${escaped}%`;
if (type === 'all' ||
type === 'recipe' ||
type === 'knowledge' ||
type === 'rule' ||
type === 'solution') {
try {
let rows = [];
try {
const rawRows = this.#knowledgeRepo.keywordSearchSync(pattern, limit);
rows = rawRows.map((r) => ({
...r,
status: r.lifecycle ?? r.status,
type: 'knowledge',
}));
}
catch {
/* table may not exist */
}
// 基础相关性排序:trigger 精确 > 标题匹配 > 描述匹配 > 内容匹配
const lowerQ = query.toLowerCase();
results.push(...rows.map((r) => {
let score = 0.5;
if (r.trigger?.toLowerCase().includes(lowerQ)) {
score = 1.2;
}
else if (r.title?.toLowerCase().includes(lowerQ)) {
score = 1.0;
}
else if (r.description?.toLowerCase().includes(lowerQ)) {
score = 0.8;
}
return {
...r,
trigger: r.trigger || '',
kind: r.kind || 'pattern',
score: Math.round(score * 1000) / 1000,
};
}));
results.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
}
catch {
/* table may not exist */
}
}
// 补充排序信号字段(whenClause/doClause/tags 等),与 scorer/semantic 路径一致
this._supplementDetails(results);
// 当 SQL LIKE 无结果时,降级到 FieldWeighted 搜索
// 这让自然语言查询(如 "如何处理网络错误")在 keyword 模式下也能返回结果
if (results.length === 0) {
this.ensureIndex();
const scorerResults = this._scorerSearch(query, type, limit);
return scorerResults;
}
return results.slice(0, limit);
}
/**
* 加权字段搜索(FieldWeightedScorer)
* 增加 Title/Trigger 精确匹配 bonus — 当 query 出现在标题/触发词中时
* 给予额外分数加成,确保精确匹配的条目排名靠前
*/
_scorerSearch(query, type, limit) {
let results = this.scorer.search(query, limit * 2);
if (type !== 'all') {
// All types now map to 'recipe' since everything is unified
results = results.filter((r) => {
if (type === 'rule') {
return r.meta.knowledgeType === 'boundary-constraint';
}
return r.meta.type === 'recipe';
});
}
// ── Title/Trigger exact-match bonus ──
// 当 query 精确出现在标题或触发词中时,增加分数
// 这解决了 "BaseRequest" 被 "BD前缀类名命名规范" 排在 "BDBaseRequest 继承请求模式" 前面的问题
const lowerQuery = query.toLowerCase();
const maxScore = results.length > 0 ? results[0].score : 1;
for (const r of results) {
const meta = r.meta;
const title = (meta.title || '').toLowerCase();
const trigger = (meta.trigger || '').toLowerCase();
let bonus = 0;
if (title === lowerQuery || trigger === lowerQuery) {
// 完全匹配: +50% of max score
bonus = maxScore * 0.5;
}
else if (title.includes(lowerQuery) || trigger.includes(lowerQuery)) {
// 子串匹配: +30% of max score
bonus = maxScore * 0.3;
}
else if (lowerQuery.includes(title) && title.length > 3) {
// 反向包含 (query 包含 title): +15% of max score
bonus = maxScore * 0.15;
}
r.score += bonus;
}
// 重新排序
results.sort((a, b) => b.score - a.score);
const items = results.slice(0, limit).map((r) => {
const meta = r.meta;
return {
id: r.id,
title: meta.title,
trigger: meta.trigger || '',
type: meta.type,
kind: meta.kind || 'pattern',
status: meta.status,
language: meta.language || '',
category: meta.category || '',
score: Math.round(r.score * 1000) / 1000,
// 排序信号字段(供 CoarseRanker / MultiSignalRanker 使用)
updatedAt: meta.updatedAt || null,
createdAt: meta.createdAt || null,
difficulty: meta.difficulty || 'intermediate',
tags: meta.tags || [],
usageCount: meta.usageCount || 0,
authorityScore: meta.authorityScore || 0,
qualityScore: meta.qualityScore || 0,
};
});
// 为每个结果补充 content(预览需要)— 批量 IN 查询替代 N+1
this._supplementDetails(items);
return items;
}
/**
* 语义搜索 - 需要 AI Provider 的 embed 功能
* 不可用时降级到 FieldWeighted 搜索
* @returns >}
*/
async _semanticSearch(query, type, limit) {
// 优先使用 VectorService (统一向量服务层)
if (this.vectorService) {
try {
const vectorResults = await this.vectorService.search(query, { topK: limit * 2 });
if (vectorResults.length > 0) {
let results = vectorResults.map((vr) => {
const item = vr.item;
const metadata = (item.metadata || {});
const rawId = item.id || '';
// 从 vector ID 提取 DB entryId: "entry_<uuid>" → "<uuid>"
const entryId = metadata.entryId || rawId.replace(/^entry_/, '');
return {
id: entryId,
title: metadata.title || entryId,
type: 'recipe',
kind: metadata.kind || 'pattern',
status: metadata.status || 'active',
score: Math.round(vr.score * 1000) / 1000,
};
});
// 按 entryId 去重 — 同一 Recipe 的多个 chunk 只保留最高分
results = this.#deduplicateByEntryId(results);
if (type !== 'all') {
results = results.filter((r) => {
if (type === 'rule') {
return r.kind === 'rule';
}
return r.type === 'recipe';
});
}
results = results.slice(0, limit);
this._supplementDetails(results);
return { items: results, actualMode: 'semantic' };
}
}
catch (err) {
this.logger.warn('VectorService search failed, falling back to legacy path', {
error: err.message,
});
}
}
// Legacy fallback: 直接使用 aiProvider embed + vectorStore
if (!this.aiProvider) {
this.logger.debug('AI provider not available, falling back to FieldWeighted search');
return { items: this._scorerSearch(query, type, limit), actualMode: 'weighted' };
}
try {
const queryEmbedding = await this.aiProvider.embed(query);
if (!queryEmbedding || queryEmbedding.length === 0) {
return { items: this._scorerSearch(query, type, limit), actualMode: 'weighted' };
}
if (this.vectorStore) {
try {
let vectorResults;
if (typeof this.vectorStore.hybridSearch === 'function') {
const hybrid = await this.vectorStore.hybridSearch(queryEmbedding, query, {
topK: limit * 2,
});
vectorResults = hybrid.map((r) => ({
id: r.item?.id ?? r.id,
similarity: r.score,
score: r.score,
content: r.item?.content,
metadata: r.item?.metadata || {},
}));
}
else {
vectorResults = await this.vectorStore.query(queryEmbedding, limit * 2);
}
if (vectorResults && vectorResults.length > 0) {
let results = vectorResults.map((vr) => {
const rawId = vr.id || '';
const entryId = vr.metadata?.entryId || rawId.replace(/^entry_/, '');
return {
id: entryId,
title: vr.metadata?.title || entryId,
type: 'recipe',
kind: vr.metadata?.kind || 'pattern',
status: vr.metadata?.status || 'active',
score: Math.round((vr.similarity || vr.score || 0) * 1000) / 1000,
};
});
// 按 entryId 去重
results = this.#deduplicateByEntryId(results);
if (type !== 'all') {
results = results.filter((r) => {
if (type === 'rule') {
return r.kind === 'rule';
}
return r.type === 'recipe';
});
}
results = results.slice(0, limit);
this._supplementDetails(results);
return { items: results, actualMode: 'semantic' };
}
}
catch (vecErr) {
this.logger.warn('Vector store query failed, falling back to FieldWeighted', {
error: vecErr.message,
});
}
}
this.logger.debug('Vector search fallback to FieldWeighted');
return { items: this._scorerSearch(query, type, limit), actualMode: 'weighted' };
}
catch (err) {
this.logger.warn('Semantic search failed, falling back to FieldWeighted', {
error: err.message,
});
return { items: this._scorerSearch(query, type, limit), actualMode: 'weighted' };
}
}
/**
* 按 entryId 去重 — 同一 Recipe 的多个 chunk 只保留最高分的
* 解决向量搜索返回同一条目的多个 chunk 浪费结果位的问题
*/
#deduplicateByEntryId(items) {
const seen = new Map();
for (const item of items) {
const existing = seen.get(item.id);
if (!existing || (item.score ?? 0) > (existing.score ?? 0)) {
seen.set(item.id, item);
}
}
return [...seen.values()].sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
}
/**
* 评估 weighted 搜索结果的 confidence,决定是否需要语义搜索
* 返回 0-100 的分数,>= 60 跳过语义
*/
#computeWeightedConfidence(query, items, requestedLimit) {
let score = 0;
// ── 结果质量信号 ──
// FieldWeightedScorer 分数范围约 0-20,归一化后判断
const topScore = items[0]?.score ?? 0;
const secondScore = items[1]?.score ?? 0;
// top1 与 top2 分差大 → 明确命中
if (items.length >= 2 && topScore > 0) {
const relativeGap = (topScore - secondScore) / topScore;
if (relativeGap > 0.3) {
score += 25;
}
else if (relativeGap > 0.15) {
score += 15;
}
}
// title/trigger 匹配(子串级别)
const lq = query.toLowerCase();
const matchLevel = items.slice(0, 3).reduce((best, it) => {
const t = (it.title || '').toLowerCase();
const tr = (it.trigger || '').toLowerCase();
if (t === lq || tr === lq || tr === `@${lq}`) {
return Math.max(best, 3); // 完全匹配
}
if (t.includes(lq) || tr.includes(lq)) {
return Math.max(best, 2); // 子串匹配
}
if (lq.includes(t) && t.length > 3) {
return Math.max(best, 1); // 反向包含
}
return best;
}, 0);
if (matchLevel === 3) {
score += 50;
}
else if (matchLevel === 2) {
score += 35;
}
else if (matchLevel === 1) {
score += 15;
}
// 代码术语检测(CamelCase、snake_case、@trigger)
if (/^[A-Z][a-zA-Z0-9]+$/.test(query) ||
/^[a-z]+(_[a-z]+)+$/.test(query) ||
query.startsWith('@')) {
score += 25;
}
// 候选充足
if (items.length >= requestedLimit) {
score += 10;
}
// ── 查询特征信号(降低 confidence → 倾向调用语义)──
// 中文自然语言疑问句
if (/[如怎什为何哪]么?|是否|有没有|都有哪些|应该|需要/.test(query)) {
score -= 40;
}
// 英文自然语言问句
if (/^(how|what|why|when|where|which|can|does|is|should)\b/i.test(query)) {
score -= 40;
}
// 较长查询(可能是描述性语句)
if (query.length > 20) {
score -= 20;
}
else if (query.length > 10) {
score -= 10;
}
return Math.max(0, Math.min(100, score));
}
/**
* 补充详细字段(content / description / trigger / delivery 字段)— 批量 IN 查询
* 用于向量搜索结果与 FieldWeighted 结果的一致性
*/
_supplementDetails(items) {
if (!items || items.length === 0) {
return;
}
try {
const ids = items.map((it) => it.id);
let rows = [];
try {
rows = this.#knowledgeRepo.findByIdsDetailSync(ids);
}
catch {
/* table may not exist */
}
const rowMap = new Map(rows.map((r) => [r.id, r]));
for (const item of items) {
const row = rowMap.get(item.id);
if (row) {
item.content = item.content || row.content || undefined;
item.description = item.description || row.description || '';
item.trigger = item.trigger || row.trigger || '';
if (row.headers) {
item.headers = row.headers;
}
if (row.moduleName) {
item.moduleName = row.moduleName;
}
// Cursor 交付字段 — 供 Agent 投影生成 actionHint
if (!item.whenClause && row.whenClause) {
item.whenClause = row.whenClause;
}
if (!item.doClause && row.doClause) {
item.doClause = row.doClause;
}
// 排序信号补充 — 确保 Funnel/Ranker 有真实数据
if (!item.language && row.language) {
item.language = row.language;
}
if (!item.category && row.category) {
item.category = row.category;
}
if (!item.updatedAt && row.updatedAt) {
item.updatedAt = row.updatedAt;
}
if (!item.createdAt && row.createdAt) {
item.createdAt = row.createdAt;
}
if (!item.difficulty && row.difficulty) {
item.difficulty = row.difficulty;
}
// 解析 tags
if (!item.tags || (Array.isArray(item.tags) && item.tags.length === 0)) {
try {
item.tags = JSON.parse(row.tags || '[]');
}
catch {
/* ignore */
}
}
// 解析 quality JSON → qualityScore
if (!item.qualityScore) {
try {
item.qualityScore = JSON.parse(row.quality || '{}').overall || 0;
}
catch {
/* ignore */
}
}
// 解析 stats JSON → usageCount + authorityScore
if (!item.usageCount) {
try {
const stats = JSON.parse(row.stats || '{}');
item.usageCount =
(stats.adoptions || 0) + (stats.applications || 0) + (stats.searchHits || 0);
if (!item.authorityScore) {
item.authorityScore = stats.authority || 0;
}
}
catch {
/* ignore */
}
}
}
}
}
catch {
/* DB may not be available */
}
// ── 从 recipe_source_refs 桥接表批量读取已验证的 sourceRefs ──
try {
const ids = items.map((it) => it.id);
if (ids.length === 0) {
return;
}
let refsRows;
refsRows = this.#sourceRefRepo.findActiveByRecipeIds(ids);
this.logger.debug('recipe_source_refs query', {
idCount: ids.length,
rowCount: refsRows.length,
});
const refsMap = new Map();
for (const row of refsRows) {
const recipeId = row.recipeId ??
row.recipe_id;
const sourcePath = row.sourcePath ??
row.source_path;
const status = row.status;
const newPath = row.newPath ??
row.new_path;
const refPath = status === 'renamed' && newPath ? newPath : sourcePath;
if (!refsMap.has(recipeId)) {
refsMap.set(recipeId, []);
}
refsMap.get(recipeId)?.push(refPath);
}
for (const item of items) {
const refs = refsMap.get(item.id);
if (refs && refs.length > 0) {
item.sourceRefs = refs;
}
}
}
catch {
/* recipe_source_refs table may not exist */
}
}
/**
* 刷新索引(增量模式)
*
* 策略:
* 1. 如果尚未构建索引 → 全量 buildIndex()
* 2. 否则只加载 updatedAt > lastIndexTime 的条目 + 已删除(deprecated)条目
* - 新增/更新 → scorer.updateDocument()
* - 已删除 → scorer.removeDocument()
* 3. 清空缓存以确保搜索结果刷新
*
* @param [opts] - force=true 强制全量重建
*/
refreshIndex(opts = {}) {
if (opts.force || !this._indexed || !this._lastIndexTime) {
this._indexed = false;
this.buildIndex();
return;
}
this._cache.clear();
try {
// 查找自上次索引后更新的条目
const changed = this.#knowledgeRepo.findUpdatedSinceSync(this._lastIndexTime);
let added = 0;
let removed = 0;
for (const r of changed) {
if (r.lifecycle === 'deprecated') {
// 已废弃 → 从索引中移除
if (this.scorer.removeDocument(r.id)) {
removed++;
}
continue;
}
// 解析文档文本(复用 buildIndex 逻辑)
const text = this._buildDocText(r);
const meta = this._buildDocMeta(r);
this.scorer.updateDocument(r.id, text, meta);
added++;
}
this._lastIndexTime = new Date().toISOString();
if (added > 0 || removed > 0) {
this.logger.info('Search index refreshed (incremental)', { added, removed });
}
}
catch (err) {
// 增量失败 → 降级全量重建
this.logger.warn('Incremental refresh failed, falling back to full rebuild', {
error: err.message,
});
this._indexed = false;
this.buildIndex();
}
}
/**
* 从 DB 行构建索引文本
*
* 高价值字段(title, trigger)通过重复出现提升 TF 权重
* — title ×3, trigger ×2, description ×1.5(通过重复 token 实现)
* 这确保标题匹配的文档获得显著更高的分数
* 注:此逻辑主要服务于 BM25Scorer,FieldWeightedScorer 内部已有字段权重机制
*/
_buildDocText(r) {
let contentText = '';
try {
const content = JSON.parse(r.content || '{}');
contentText = [content.pattern, content.rationale, content.markdown]
.filter(Boolean)
.join(' ');
}
catch {
/* ignore */
}
let tagText = '';
try {
tagText = JSON.parse(r.tags || '[]').join(' ');
}
catch {
/* ignore */
}
// Field boosting via token repetition:
// title ×2, trigger ×2, description ×1, others ×1
// 使用较温和的 boost 避免长文档 avgLength 膨胀导致 content 匹配被过度稀释
const title = r.title || '';
const trigger = r.trigger || '';
const desc = r.description || '';
const fields = [
title,
title, // ×2 boost
trigger,
trigger, // ×2 boost
desc, // ×1 (no boost — description already contributes naturally)
r.language,
r.category,
r.knowledgeType,
tagText,
contentText,
];
return fields.filter(Boolean).join(' ');
}
/**
* 从 DB 行构建文档 meta
*/
_buildDocMeta(r) {
let parsedTags = [];
try {
parsedTags = JSON.parse(r.tags || '[]');
}
catch {
/* ignore */
}
let usageCount = 0;
let authorityScore = 0;
try {
const stats = JSON.parse(r.stats || '{}');
usageCount = (stats.adoptions || 0) + (stats.applications || 0) + (stats.searchHits || 0);
authorityScore = stats.authority || 0;
}
catch {
/* ignore */
}
let qualityOverall = 0;
try {
qualityOverall = JSON.parse(r.quality || '{}').overall || 0;
}
catch {
/* ignore */
}
// 提取 description 和 contentText 供 FieldWeightedScorer 字段级评分使用
let contentText = '';
try {
const content = JSON.parse(r.content || '{}');
contentText = [content.pattern, content.rationale, content.markdown]
.filter(Boolean)
.join(' ');
}
catch {
/* ignore */
}
return {
type: 'knowledge',
title: r.title,
trigger: r.trigger || '',
description: r.description || '',
contentText,
status: r.lifecycle,
knowledgeType: r.knowledgeType,
kind: r.kind || 'pattern',
language: r.language || '',
category: r.category || '',
updatedAt: r.updatedAt || null,
createdAt: r.createdAt || null,
difficulty: r.difficulty || 'intermediate',
tags: parsedTags,
usageCount,
authorityScore,
qualityScore: qualityOverall,
};
}
/** 获取索引统计(如果尚未构建索引,自动触发构建) */
getStats() {
return {
indexed: this._indexed,
totalDocuments: this.scorer.totalDocs,
avgDocLength: Math.round(this.scorer.avgLength * 10) / 10,
cacheSize: this._cache.size,
uniqueTokens: Object.keys(this.scorer.docFreq).length,
hasVectorStore: !!this.vectorStore,
hasVectorService: !!this.vectorService,
hasAiProvider: !!this.aiProvider,
};
}
_getCache(key) {
const entry = this._cache.get(key);
if (!entry) {
return null;
}
if (Date.now() - entry.time > this._cacheMaxAge) {
this._cache.delete(key);
return null;
}
// LRU: 重新插入以更新 Map 迭代顺序,使热点 key 不被淘汰
this._cache.delete(key);
this._cache.set(key, entry);
return entry.data;
}
_setCache(key, data) {
// LRU:超限时批量淘汰最旧的 20%
if (this._cache.size > 500) {
const toDelete = Math.floor(this._cache.size * 0.2);
const keys = this._cache.keys();
for (let i = 0; i < toDelete; i++) {
const k = keys.next().value;
if (k !== undefined) {
this._cache.delete(k);
}
}
}
this._cache.set(key, { data, time: Date.now() });
}
}
export default SearchEngine;