UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

184 lines (183 loc) 8.12 kB
/** * PrimeSearchPipeline — Enrichment Layer * * Multi-query parallel search + scenario routing + session history accumulation. * Replaces TaskKnowledgeBridge with full search pipeline integration. * * @module service/task/PrimeSearchPipeline */ import { slimSearchResult } from '#service/search/SearchTypes.js'; // ── Constants ─────────────────────────────────────── /** Absolute minimum score — items below this are definitely noise */ const MIN_SCORE_THRESHOLD = 0.3; /** Relative threshold — items scoring below this fraction of the best result are dropped */ const RELATIVE_SCORE_RATIO = 0.15; /** Gap ratio — if score drops by more than this factor from the previous item, truncate */ const GAP_DROP_RATIO = 0.25; // ── PrimeSearchPipeline ───────────────────────────── export class PrimeSearchPipeline { #search; #sessionQueries = []; constructor(searchEngine) { this.#search = searchEngine; } /** * Core method: multi-query search + scenario routing + result merging. */ async search(intent) { if (!intent.queries.length || !intent.queries[0]?.trim()) { return null; } // Build ranking context const context = { language: intent.language ?? undefined, intent: intent.scenario, sessionHistory: this.#buildSessionHistory(), }; // Multi-query parallel search (auto mode + keyword mode for cross-language) const allResults = await this.#multiQuerySearch(intent.queries, intent.keywordQueries ?? [], context); // Quality filter: absolute threshold + relative-to-best + score gap detection const filtered = this.#qualityFilter(allResults); if (filtered.length === 0) { return null; } // Classify: knowledge vs rules const knowledge = filtered.filter((r) => r.kind !== 'rule').slice(0, 5); const rules = filtered.filter((r) => r.kind === 'rule').slice(0, 3); // Record search to session history this.#sessionQueries.push(intent.raw.userQuery); return { relatedKnowledge: knowledge, guardRules: rules, searchMeta: { queries: intent.queries, scenario: intent.scenario, language: intent.language, module: intent.module, resultCount: allResults.length, filteredCount: filtered.length, }, }; } /** * Reset session history (called on new session start). */ resetSession() { this.#sessionQueries = []; } // ── Private ─────────────────────────────────────── /** * Quality filter: absolute threshold + relative-to-best + score gap detection. * Expects items sorted by score descending. */ #qualityFilter(items) { if (items.length === 0) { return []; } const maxScore = items[0]?.score ?? 0; const effectiveThreshold = Math.max(MIN_SCORE_THRESHOLD, maxScore * RELATIVE_SCORE_RATIO); const result = []; let prevScore = maxScore; for (const item of items) { const score = item.score; if (score < effectiveThreshold) { break; } // Gap detection: if score drops sharply from previous item, stop if (result.length > 0 && score < prevScore * GAP_DROP_RATIO) { break; } result.push(item); prevScore = score; } return result; } /** * Multi-query parallel search with optional Reciprocal Rank Fusion (RRF). * * Single-query: preserves original search engine scores (BM25/CoarseRanker). * Multi-query: uses RRF to fuse results, but weights by original score to * retain magnitude information. */ async #multiQuerySearch(autoQueries, keywordQueries, context) { // Auto-mode searches (BM25 without CoarseRanker ranking) // Using rank: false preserves raw BM25/FWS score magnitude, // which the quality filter needs for effective discrimination. // CoarseRanker's max-normalization + freshness/popularity signals // would cluster scores around 0.35–0.41, defeating the filter. const autoPromises = autoQueries.map((q) => this.#search .search(q, { mode: 'auto', limit: 8, rank: false, context }) .catch(() => ({ items: [] }))); // Semantic-mode search for primary query — ensures semantic is always // part of RRF fusion even when auto mode skips it (confidence ≥ 60) const semanticPromise = autoQueries[0] ? this.#search .search(autoQueries[0], { mode: 'semantic', limit: 6, rank: false }) .catch(() => ({ items: [] })) : Promise.resolve({ items: [] }); // Keyword-mode searches (raw FWS scores — for cross-language synonym matching) const kwPromises = keywordQueries.map((q) => this.#search .search(q, { mode: 'keyword', limit: 8, rank: false }) .catch(() => ({ items: [] }))); const [autoResponses, kwResponses, semanticResponse] = await Promise.all([ Promise.all(autoPromises), Promise.all(kwPromises), semanticPromise, ]); // Merge: auto + semantic + keyword const semanticItems = (semanticResponse.items || []); const allResponses = [ ...autoResponses, ...(semanticItems.length > 0 ? [semanticResponse] : []), ...kwResponses, ]; // Single-query shortcut: preserve original scores from search engine. // RRF is pointless with one response — it just converts rank to score, // discarding the magnitude information from BM25/CoarseRanker. if (allResponses.length === 1) { const items = (allResponses[0]?.items || []); return items.map(slimSearchResult).sort((a, b) => b.score - a.score); } // Multi-query: Weighted RRF — RRF(d) = Σ origScore / (k + rank) // Retains original score magnitude while still boosting cross-query overlap. const RRF_K = 60; const rrfScores = new Map(); const itemById = new Map(); for (const resp of allResponses) { const items = (resp.items || []); for (let rank = 0; rank < items.length; rank++) { const raw = items[rank]; const origScore = Math.max(raw.score || 0, 0.01); const item = slimSearchResult(raw); rrfScores.set(item.id, (rrfScores.get(item.id) ?? 0) + origScore / (RRF_K + rank)); // Keep the richest metadata version if (!itemById.has(item.id)) { itemById.set(item.id, item); } } } // Assign fused scores and sort // Rescale: RRF_K division crushes scores to ~0.003–0.02 range, // which falls below qualityFilter's MIN_SCORE_THRESHOLD (0.1). // Multiply by RRF_K to restore original score magnitude. // Effective formula: Σ origScore / (1 + rank/K), preserving magnitude // while still giving a gentle rank-based discount. const results = []; for (const [id, rrfScore] of rrfScores) { const item = itemById.get(id); if (!item) { continue; } item.score = Math.round(rrfScore * RRF_K * 1000) / 1000; results.push(item); } return results.sort((a, b) => b.score - a.score); } /** * Build sessionHistory for contextBoost (last 5 queries). */ #buildSessionHistory() { return this.#sessionQueries.slice(-5).map((q) => ({ content: q })); } }