@autobe/agent
Version:
AI backend server code generator
284 lines • 12.1 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.buildVectorIndexHybrid = buildVectorIndexHybrid;
exports.retrieveTopKAdaptiveHybrid = retrieveTopKAdaptiveHybrid;
exports.clearIndexCache = clearIndexCache;
exports.buildAnalysisContextSections = buildAnalysisContextSections;
/** @author juntak */
const node_crypto_1 = require("node:crypto");
function cosineSimilarity(a, b) {
let dot = 0;
let normA = 0;
let normB = 0;
const n = Math.min(a.length, b.length);
for (let i = 0; i < n; i++) {
const ai = a[i];
const bi = b[i];
dot += ai * bi;
normA += ai * ai;
normB += bi * bi;
}
if (normA === 0 || normB === 0)
return 0;
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}
// BM25
function tokenize(text) {
return text
.toLowerCase()
.replace(/[`"'.,:;!?()[\]{}<>]/g, " ")
.split(/\s+/)
.filter((t) => t.length >= 2);
}
function buildTf(tokens) {
var _a;
const tf = new Map();
for (const t of tokens)
tf.set(t, ((_a = tf.get(t)) !== null && _a !== void 0 ? _a : 0) + 1);
return tf;
}
function buildDf(indexDocs) {
var _a;
const df = new Map();
for (const d of indexDocs) {
const uniq = new Set(d.tokens);
for (const term of uniq)
df.set(term, ((_a = df.get(term)) !== null && _a !== void 0 ? _a : 0) + 1);
}
return df;
}
function bm25Score(queryTokens, docTf, docLen, stats, k1 = 1.5, b = 0.75) {
var _a, _b;
let score = 0;
const uq = new Set(queryTokens);
for (const term of uq) {
const df = (_a = stats.df.get(term)) !== null && _a !== void 0 ? _a : 0;
if (df === 0)
continue;
const idf = Math.log(1 + (stats.N - df + 0.5) / (df + 0.5));
const tf = (_b = docTf.get(term)) !== null && _b !== void 0 ? _b : 0;
if (tf === 0)
continue;
const denom = tf + k1 * (1 - b + b * (docLen / stats.avgdl));
score += idf * ((tf * (k1 + 1)) / denom);
}
return score;
}
function minMaxNormalize(values) {
if (values.length === 0)
return [];
const min = Math.min(...values);
const max = Math.max(...values);
if (max === min)
return values.map(() => (max === 0 ? 0 : 1));
return values.map((v) => (v - min) / (max - min));
}
function buildVectorIndexHybrid(embedder, sections) {
return __awaiter(this, void 0, void 0, function* () {
const docs = sections.map((s) => {
const text = `${s.heading}\n${s.content}`;
const tokens = tokenize(text);
const tf = buildTf(tokens);
return {
id: `${s.filename}:${s.index}`,
text,
section: s,
tokens,
tf,
docLen: tokens.length,
};
});
const vectors = yield embedder.embed(docs.map((d) => d.text));
const N = docs.length;
const totalLen = docs.reduce((acc, d) => acc + d.docLen, 0);
const avgdl = N > 0 ? totalLen / N : 0;
const df = buildDf(docs);
const bm25 = { N, avgdl, df };
const index = docs.map((d, i) => ({
id: d.id,
section: d.section,
vector: vectors[i],
tf: d.tf,
docLen: d.docLen,
}));
return { index, bm25 };
});
}
function clamp(x, lo, hi) {
return Math.max(lo, Math.min(hi, x));
}
function percentile(sortedAsc, p) {
if (sortedAsc.length === 0)
return 0;
const pos = (p / 100) * (sortedAsc.length - 1);
const base = Math.floor(pos);
const rest = pos - base;
const a = sortedAsc[base];
const b = sortedAsc[Math.min(base + 1, sortedAsc.length - 1)];
return a + (b - a) * rest;
}
// Hybrid Retrieval
function retrieveTopKAdaptiveHybrid(embedder_1, queryText_1, index_1, bm25_1) {
return __awaiter(this, arguments, void 0, function* (embedder, queryText, index, bm25, kMin = 3, kMax, wVec = 0.6, wBm25 = 0.4, debug = false) {
const N = index.length;
const effectiveKMax = kMax !== null && kMax !== void 0 ? kMax : Math.min(12, Math.max(8, Math.ceil(0.05 * N)));
const qVecs = yield embedder.embed([queryText]);
const qVec = qVecs[0];
if (!qVec)
return [];
const qTokens = tokenize(queryText);
const vecScores = index.map((item) => cosineSimilarity(qVec, item.vector));
const bmScores = index.map((item) => bm25Score(qTokens, item.tf, item.docLen, bm25));
const vecNorm = minMaxNormalize(vecScores);
const bmNorm = minMaxNormalize(bmScores);
const hits = index.map((item, i) => {
const score = wVec * vecNorm[i] + wBm25 * bmNorm[i];
return {
section: item.section,
score,
reason: `hybrid=${score.toFixed(4)} (vec=${vecNorm[i].toFixed(3)}, bm25=${bmNorm[i].toFixed(3)})`,
};
});
// Compute Dynamic K with debug info
const scores = hits.map((h) => h.score);
const sorted = [...scores].sort((a, b) => a - b);
const p90 = percentile(sorted, 90);
const p50 = percentile(sorted, 50);
const gap = p90 - p50;
const GAP_MIN = 0.02;
const GAP_MAX = 0.5;
const sharpness = clamp((gap - GAP_MIN) / (GAP_MAX - GAP_MIN), 0, 1);
const K = Math.round(kMin + (1 - sharpness) * (effectiveKMax - kMin));
if (debug) {
console.log(`[DYNAMIC-K-DEBUG]`);
console.log(` kMin=${kMin}, kMax=${effectiveKMax}, computedK=${K}`);
console.log(` p90=${p90.toFixed(4)}, p50=${p50.toFixed(4)}, gap=${gap.toFixed(4)}`);
console.log(` sharpness=${sharpness.toFixed(4)} (0=flat, 1=sharp)`);
console.log(` totalHits=${hits.length}`);
}
return hits.sort((a, b) => b.score - a.score).slice(0, K);
});
}
const _sectionIndexCache = new Map();
const _sectionBuildingPromises = new Map();
function computeSectionsHash(sections) {
const payload = sections
.map((s) => `${s.filename}:${s.index}:${s.heading}\n${s.content}`)
.join("\n---\n");
return (0, node_crypto_1.createHash)("sha256").update(payload).digest("hex");
}
function getOrBuildSectionIndex(embedder, sections) {
return __awaiter(this, void 0, void 0, function* () {
if (sections.length === 0) {
return {
index: [],
bm25: { N: 0, avgdl: 0, df: new Map() },
};
}
const hash = computeSectionsHash(sections);
const cached = _sectionIndexCache.get(hash);
if (cached) {
return { index: cached.index, bm25: cached.bm25 };
}
const existingPromise = _sectionBuildingPromises.get(hash);
if (existingPromise) {
return existingPromise;
}
const buildPromise = (() => __awaiter(this, void 0, void 0, function* () {
const { index, bm25 } = yield buildVectorIndexHybrid(embedder, sections);
_sectionIndexCache.set(hash, { hash, index, bm25 });
return { index, bm25 };
}))();
_sectionBuildingPromises.set(hash, buildPromise);
try {
return yield buildPromise;
}
finally {
_sectionBuildingPromises.delete(hash);
}
});
}
function clearIndexCache() {
_sectionIndexCache.clear();
_sectionBuildingPromises.clear();
}
/**
* Build analysis context at section granularity.
*
* Operates on `IAnalysisSectionEntry[]` from `convertToSectionEntries`. Each
* section entry is treated as an independent retrieval unit (~200-600 words),
* yielding fine-grained context for downstream agents.
*
* Uses SHA256-based index caching to avoid rebuilding the vector index when
* called repeatedly with the same section pool (e.g. across batch items in
* `executeCachedBatch`).
*
* @param embedder - Embedding provider for vector search (only used in TOPK)
* @param sections - Source section entries (from convertToSectionEntries)
* @param query - Query text for retrieval (only used in TOPK mode)
* @param mode - Analysis context mode (NONE/FULL/TOPK)
* @param options - Optional parameters for retrieval tuning
* @returns Filtered or full section entries based on mode
*/
function buildAnalysisContextSections(embedder, sections, query, mode, options) {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b;
const log = (_a = options === null || options === void 0 ? void 0 : options.log) !== null && _a !== void 0 ? _a : false;
const prefix = (options === null || options === void 0 ? void 0 : options.logPrefix) ? `[${options.logPrefix}]` : "";
const inputTotalChars = sections.reduce((sum, s) => { var _a, _b; return sum + ((_b = (_a = s.content) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0); }, 0);
if (mode === "NONE" || sections.length === 0) {
if (log) {
console.log(`[RAG-SECTIONS]${prefix} mode=${mode} sections=0 chars=0 (skipped)`);
}
return [];
}
if (mode === "FULL") {
if (log) {
console.log(`[RAG-SECTIONS]${prefix} mode=FULL sections=${sections.length} chars=${inputTotalChars} (pass-through)`);
}
return sections;
}
// TOPK mode: convert section entries to RequirementSection for retrieval
const reqSections = sections.map((s) => ({
filename: s.filename,
heading: `### ${s.sectionTitle}`,
content: `[${s.unitTitle}] ${s.content}`,
index: s.id,
level: 3,
}));
const { index, bm25 } = yield getOrBuildSectionIndex(embedder, reqSections);
if (index.length === 0) {
if (log) {
console.log(`[RAG-SECTIONS]${prefix} mode=TOPK sections=0 chars=0 (empty index)`);
}
return [];
}
const split = (_b = options === null || options === void 0 ? void 0 : options.splitCount) !== null && _b !== void 0 ? _b : 1;
const kMin = (options === null || options === void 0 ? void 0 : options.kMin) ? Math.ceil(options.kMin / split) : undefined;
const kMax = (options === null || options === void 0 ? void 0 : options.kMax) ? Math.ceil(options.kMax / split) : undefined;
const hits = yield retrieveTopKAdaptiveHybrid(embedder, query, index, bm25, kMin, kMax);
// Map hits back to section entries by index (set to s.id above)
const sectionMap = new Map(sections.map((s) => [s.id, s]));
const result = hits
.map((h) => sectionMap.get(h.section.index))
.filter((s) => s !== undefined);
if (log) {
const resultTotalChars = result.reduce((sum, s) => { var _a, _b; return sum + ((_b = (_a = s.content) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0); }, 0);
const reduction = inputTotalChars > 0
? ((1 - resultTotalChars / inputTotalChars) * 100).toFixed(1)
: "0";
console.log(`[RAG-SECTIONS]${prefix} mode=TOPK sections=${result.length} chars=${resultTotalChars} reduction=${reduction}%`);
}
return result;
});
}
//# sourceMappingURL=RAGRetrieval.js.map