@zosmaai/pi-llm-wiki

Version:

Self-maintaining LLM Wiki for Pi — Karpathy-pattern knowledge base with immutable source capture, automated ingestion, search, linting, and Obsidian-compatible vault. auto-updating personal & company wiki.

github.com/zosmaai/pi-llm-wiki

zosmaai/pi-llm-wiki

1,036 lines (927 loc) • 37.3 kB

text/typescript

import { existsSync, readFileSync } from "node:fs"; import { join } from "node:path"; import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { Type } from "typebox"; import { type Embedder, type EmbeddingStore, cosineSimilarity, normalizeVector, readEmbeddingStore, resolveEmbedder, } from "./embeddings.js"; import type { Registry } from "./metadata.js"; import type { Runtime } from "./runtime.js"; import type { TaskConfig } from "./task-config.js"; import { type VaultPaths, getPersonalWikiPaths, isPersonalVault, parseFrontmatter, readJson, resolveVaultPaths, } from "./utils.js"; // ─── Public API ──────────────────────────────────────── export interface RecallResult { /** Page identifier (folder-qualified, e.g. "concepts/rag") */ id: string; /** Page title */ title: string; /** Page type: source, entity, concept, synthesis, analysis */ type: string; /** First N chars of page content for context */ preview: string; /** Absolute filesystem path to the page (resolvable by the `read` tool). */ path: string; /** Vault source label for dual-vault results */ vaultLabel?: string; /** Relevance score (higher = better match). Used for filtering auto-injected results. */ score: number; } type Scored = { id: string; entry: Registry["pages"][string]; score: number; pagePath: string; bestChunkPreview: string; /** Cosine similarity to the query vector (0 when no semantic context). */ semCos: number; }; // ─── Hybrid (lexical + semantic) ranking ───────────────── /** * Semantic re-ranking context for a single search (issue #67, epic #63). * * The query vector is computed ONCE per query (a single, cached embedding * lookup) in the async wrapper; the per-vault page vectors are read from the * precomputed `meta/embeddings.json` sidecar (written at #66 write-time). The * actual ranking is pure vector math — there is NO embedding/LLM call in * `searchWiki` itself, so the lexical hot path stays synchronous and offline. */ export interface SemanticContext { /** L2-normalized embedding of the query string. */ queryVector: number[]; /** Blend weight for the semantic signal (0 = lexical only, 1 = max boost). */ weight: number; } /** Default blend weight when none is configured. */ export const DEFAULT_SEMANTIC_WEIGHT = 0.5; /** * Lexical points a perfect (cosine = 1) semantic match is worth at full * weight. Chosen so a strong paraphrase match (cosine ≳ 0.84) at the default * weight (0.5) clears the auto-injection threshold (minScore = 5) on its own, * while weak/incidental similarity stays below it. */ export const SEMANTIC_SCALE = 12; /** * Minimum cosine for a page with NO lexical match to even be considered a * semantic candidate. Keeps the candidate set bounded (near-orthogonal pages * are ignored) instead of pulling in the entire embedded vault. */ export const SEMANTIC_MIN_COSINE = 0.2; /** * Blend a lexical score with a cosine similarity. The lexical score keeps its * original absolute scale (so `minScore` semantics survive); the semantic * signal is added as a bounded, weighted boost on a comparable scale. With no * semantic signal (cosine ≤ 0) this is the identity on the lexical score, so * the pure-lexical path is preserved exactly. */ export function fuseScores(lexical: number, cosine: number, weight: number): number { return lexical + weight * SEMANTIC_SCALE * Math.max(cosine, 0); } /** * Normalize text for recall matching. * * Wiki queries are often short and multilingual (for example: "继续学习pi"). * Normalization keeps CJK characters intact, lowercases Latin text, removes * punctuation boundaries, and makes hyphenated page IDs match space-separated * queries. */ function normalizeText(value: unknown): string { return flattenSearchValue(value) .toLowerCase() .normalize("NFKC") .replace(/[\-_./\\]+/g, " ") .replace(/[\p{P}\p{S}]+/gu, " ") .replace(/\s+/g, " ") .trim(); } function compactText(value: string): string { return value.replace(/\s+/g, ""); } function flattenSearchValue(value: unknown): string { if (value == null) return ""; if (Array.isArray(value)) return value.map(flattenSearchValue).join(" "); if (typeof value === "object") return Object.values(value).map(flattenSearchValue).join(" "); return String(value); } function unique(values: string[]): string[] { return [...new Set(values.filter(Boolean))]; } /** * Tokenize with support for CJK short queries and English/kebab-case terms. * * Besides whitespace tokens, this returns Latin/digit runs ("pi", "recall") * and overlapping CJK bigrams/trigrams. The full normalized query is also kept * so exact short phrases still rank highest. */ function queryTerms(query: string): string[] { const normalized = normalizeText(query); const compact = compactText(normalized); const terms: string[] = []; if (normalized) terms.push(normalized); if (compact && compact !== normalized) terms.push(compact); for (const part of normalized.split(/\s+/)) { if (part.length >= 2) terms.push(part); } const latinRuns = normalized.match(/[a-z0-9]{2,}/g) ?? []; terms.push(...latinRuns); const cjkRuns = normalized.match(/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}]+/gu) ?? []; for (const run of cjkRuns) { for (let size = 2; size <= 3; size++) { if (run.length < size) continue; for (let i = 0; i <= run.length - size; i++) { terms.push(run.slice(i, i + size)); } } } return unique(terms).slice(0, 30); } function includesTerm(haystack: string, term: string): boolean { if (!haystack || !term) return false; return haystack.includes(term) || compactText(haystack).includes(compactText(term)); } function scoreField(value: unknown, terms: string[], weight: number): number { const text = normalizeText(value); if (!text) return 0; let score = 0; for (const term of terms) { if (includesTerm(text, term)) score += weight; } return score; } // ─── Common English stopwords ───────────────────────── const STOPWORDS = new Set([ "the", "this", "that", "with", "from", "have", "been", "were", "they", "their", "them", "will", "would", "could", "should", "about", "there", "which", "what", "when", "where", "than", "then", "also", "just", "more", "some", "such", "only", "other", "into", "over", "very", "after", "before", "because", "between", "through", "during", "without", "within", "along", "these", "those", "page", "section", "note", "info", "type", "used", "using", ]); // ─── Chunk-Level Indexing ──────────────────────────── interface PageChunk { /** The heading line (e.g. "## Configuration") or empty for the intro section */ heading: string; /** Content of this chunk */ content: string; /** Heading level (0 for intro, 1 for #, 2 for ##, etc.) */ level: number; } /** * Split a page's body into chunks by headings. * Each heading and its following content become one chunk. * Content before the first heading becomes the intro chunk. */ function chunkPage(body: string): PageChunk[] { if (!body.trim()) return []; const chunks: PageChunk[] = []; const lines = body.split("\n"); let currentHeading = ""; let currentLevel = 0; let currentContent: string[] = []; for (const line of lines) { const headingMatch = line.trim().match(/^(#{1,6})\s+(.+)$/); if (headingMatch) { // Save previous chunk if (currentContent.length > 0 || currentHeading) { chunks.push({ heading: currentHeading, content: currentContent.join("\n").trim(), level: currentLevel, }); } currentHeading = headingMatch[2].trim(); currentLevel = headingMatch[1].length; currentContent = []; } else { currentContent.push(line); } } // Save last chunk if (currentContent.length > 0 || currentHeading) { chunks.push({ heading: currentHeading, content: currentContent.join("\n").trim(), level: currentLevel, }); } return chunks; } function pagePreview(content: string): string { const { body } = parseFrontmatter(content); return body.trim().slice(0, 200).replace(/\n/g, " "); } /** * Get a preview of the best-matching chunk, or fall back to the page intro. * Shows the heading (if any) and the first ~200 chars of content. */ function chunkPreview(heading: string, content: string): string { const trimmed = content.slice(0, 180).replace(/\n/g, " "); if (heading) { return `#${heading} — ${trimmed}`; } return trimmed; } /** * Extract distinctive terms from the top search results for query expansion. * Pseudo-relevance feedback: terms from top-matching pages that aren't in * the original query become expansion candidates. */ function extractExpansionTerms( scored: Scored[], originalQuery: string, paths: VaultPaths, maxTerms = 6, ): string[] { const topResults = scored.slice(0, Math.min(3, scored.length)); if (topResults.length === 0) return []; const originalNorm = normalizeText(originalQuery); const termFreq = new Map<string, number>(); for (const { pagePath, entry } of topResults) { // Collect text from registry metadata + file content const metaText = normalizeText( [entry.title, entry.aliases, entry.tags, entry.summary, entry.description] .filter(Boolean) .join(" "), ); for (const w of metaText.split(/\s+/)) { if (w.length >= 4 && !originalNorm.includes(w) && !STOPWORDS.has(w)) { termFreq.set(w, (termFreq.get(w) || 0) + 1); } } // Also extract from file body if (existsSync(pagePath)) { const content = readFileSync(pagePath, "utf-8"); const { body } = parseFrontmatter(content); const bodyNorm = normalizeText(body); for (const w of bodyNorm.split(/\s+/)) { if (w.length >= 4 && !originalNorm.includes(w) && !STOPWORDS.has(w)) { termFreq.set(w, (termFreq.get(w) || 0) + 1); } } } } // Sort by frequency descending, take top N return Array.from(termFreq.entries()) .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])) .slice(0, maxTerms) .map(([term]) => term); } /** * Search a single vault's registry for pages matching a query. * Returns up to `maxResults` matches, each with a content preview. * Results below `minScore` are excluded (default 0 = no filtering). */ export function searchWiki( paths: VaultPaths, query: string, maxResults = 5, minScore = 0, semantic?: SemanticContext, ): RecallResult[] { const registry = readJson<Registry>(join(paths.meta, "registry.json"), { version: "1.0", last_updated: "", pages: {}, }); const terms = queryTerms(query); if (terms.length === 0) return []; // Read this vault's precomputed embedding sidecar (synchronous, offline). // Missing/empty sidecar => no semantic signal => pure lexical, by construction. const embeddingStore: EmbeddingStore | undefined = semantic ? readEmbeddingStore(paths) : undefined; const scored: Scored[] = []; for (const [id, entry] of Object.entries(registry.pages)) { const pagePath = join(paths.wiki, `${id}.md`); const content = existsSync(pagePath) ? readFileSync(pagePath, "utf-8") : ""; const { frontmatter, body } = parseFrontmatter(content); let score = 0; // Strong identifiers: exact command/short-query aliases should win. score += scoreField(id, terms, 3); score += scoreField(entry.title, terms, 5); score += scoreField(frontmatter.title, terms, 5); score += scoreField(entry.type, terms, 1); // Recall-oriented metadata. Arrays are supported by parseFrontmatter and // legacy comma/bracket strings still flatten into searchable text. score += scoreField(entry.aliases, terms, 6); score += scoreField(frontmatter.aliases, terms, 6); score += scoreField(entry.recall_triggers, terms, 7); score += scoreField(frontmatter.recall_triggers, terms, 7); score += scoreField(entry.summary, terms, 3); score += scoreField(frontmatter.summary, terms, 3); score += scoreField(entry.description, terms, 3); score += scoreField(frontmatter.description, terms, 3); // General metadata from the registry/frontmatter. score += scoreField(entry.tags, terms, 2); score += scoreField(entry.category, terms, 2); score += scoreField(entry.domain, terms, 2); score += scoreField(frontmatter.tags, terms, 2); score += scoreField(frontmatter.category, terms, 2); score += scoreField(frontmatter.domain, terms, 2); // Body search: use chunk-level indexing for more precise matching. // Each section of the page is scored independently, so a query about // "Postgres" matches only the Postgres section, not the whole page. let bestChunkScore = 0; let bestChunkHeading = ""; let bestChunkContent = ""; if (body.trim()) { const chunks = chunkPage(body); for (const chunk of chunks) { let chunkScore = 0; // Heading gets a strong boost chunkScore += scoreField(chunk.heading, terms, 4); // Chunk body content chunkScore += scoreField(chunk.content, terms, 1); if (chunkScore > bestChunkScore) { bestChunkScore = chunkScore; bestChunkHeading = chunk.heading; bestChunkContent = chunk.content; } } } // Add best chunk score to total page score score += bestChunkScore; // Semantic candidacy: a page with no lexical match can still qualify if its // precomputed vector is sufficiently close to the query vector. The boost // itself is applied AFTER pseudo-relevance feedback so PRF stays lexical. let semCos = 0; if (semantic && embeddingStore) { const vec = embeddingStore.entries[id]?.vector; if (vec && vec.length === semantic.queryVector.length) { semCos = cosineSimilarity(semantic.queryVector, vec); } } const semEligible = semCos >= SEMANTIC_MIN_COSINE; if (score > 0 || semEligible) { scored.push({ id, entry, score, pagePath, bestChunkPreview: bestChunkContent ? chunkPreview(bestChunkHeading, bestChunkContent) : "", semCos, }); } } scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id)); // ── Pseudo-Relevance Feedback (PRF) ───────────────── // Extract distinctive terms from the top 3 results and use them to // boost semantically related pages. This gives "semantic" expansion // without external dependencies: if an "Authentication" page mentions // JWT, OAuth, and sessions, those terms boost other pages that discuss // related concepts. const expansionTerms = extractExpansionTerms(scored, query, paths, 6); if (expansionTerms.length > 0) { const expTermList = queryTerms(expansionTerms.join(" ")); // Apply expansion scoring to the top 25 results (cheap re-read) const expansionCandidates = scored.slice(0, Math.min(25, scored.length)); for (const item of expansionCandidates) { const content = existsSync(item.pagePath) ? readFileSync(item.pagePath, "utf-8") : ""; const { body } = parseFrontmatter(content); let expChunkScore = 0; if (body.trim()) { const chunks = chunkPage(body); for (const chunk of chunks) { let cs = 0; cs += scoreField(chunk.heading, expTermList, 2); // half weight cs += scoreField(chunk.content, expTermList, 0.5); if (cs > expChunkScore) expChunkScore = cs; } } // Dampened addition — expansion contributes at most 40% item.score += expChunkScore * 0.4; } } // ── Semantic fusion ───────────────────────────────── // Blend the precomputed cosine similarity into the (lexical + PRF) score. // Applied last so PRF expansion remains purely lexical and so a strongly // paraphrase-relevant page that lexical missed can clear `minScore`. With no // semantic context every boost is 0, leaving the lexical ranking untouched. if (semantic) { for (const item of scored) { item.score = fuseScores(item.score, item.semCos, semantic.weight); } } // Re-sort after expansion + semantic scoring scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id)); const top = scored.filter((s) => s.score >= minScore).slice(0, maxResults); return top.map(({ id, entry, pagePath, score, bestChunkPreview }) => { let preview = bestChunkPreview; if (!preview && existsSync(pagePath)) { // Fallback: no chunk matched, show page intro preview = pagePreview(readFileSync(pagePath, "utf-8")); } return { id, title: String(entry.title || id), type: String(entry.type || "page"), preview, path: pagePath, score, }; }); } /** * Search both project/primary vault and personal vault, merging results. * Personal results are appended after primary results, deduplicated by page ID. * * @param minScore - Minimum relevance score (default 0 = no filter). * @param includePersonal - Whether to search the personal vault (default true). * Auto-injection should pass false to avoid personal-vault contamination. */ export function searchWikiLayered( primaryPaths: VaultPaths, query: string, maxResults = 5, minScore = 0, includePersonal = true, semantic?: SemanticContext, ): RecallResult[] { // Search primary vault const primaryResults = searchWiki(primaryPaths, query, maxResults, minScore, semantic); // If primary is already the personal vault, no layered search needed if (isPersonalVault(primaryPaths)) return primaryResults; // Search personal vault as secondary layer (only when explicitly requested) let personalResults: RecallResult[] = []; if (includePersonal) { const personalPaths = getPersonalWikiPaths(); if (existsSync(join(personalPaths.dotWiki, "config.json"))) { personalResults = searchWiki(personalPaths, query, maxResults, minScore, semantic); } } // Merge: personal results first (they're the user's accumulated knowledge), // then primary results (project-specific). Deduplicate by page ID. const seen = new Set<string>(); const merged: RecallResult[] = []; for (const r of [...personalResults, ...primaryResults]) { if (seen.has(r.id)) continue; seen.add(r.id); // If it's from personal vault, tag it if (personalResults.includes(r)) { merged.push({ ...r, vaultLabel: "📓 personal" }); } else { merged.push(r); } } return merged.slice(0, maxResults); } // ─── Async hybrid entry point (the single, cached query embedding) ─── /** * Cache of query string → normalized embedding vector. The query embedding is * the ONLY embedding call in the recall hot path; caching collapses repeated * recalls of the same query within a session (e.g. auto-injection + an explicit * wiki_recall) into a single network call, satisfying the #67 "single cached * query-embedding lookup" bound. */ const queryEmbeddingCache = new Map<string, number[]>(); const QUERY_CACHE_MAX = 256; function queryCacheKey(model: string, query: string): string { return `${model}\u0000${normalizeText(query)}`; } /** Test-only: reset the module-level query-embedding cache. */ export function __clearQueryEmbeddingCache(): void { queryEmbeddingCache.clear(); } /** True if a vault has at least one stored embedding vector. */ function storeHasEntries(paths: VaultPaths): boolean { return Object.keys(readEmbeddingStore(paths).entries).length > 0; } /** * Embed the query string once (cached), returning a normalized vector, or * `undefined` when no embedder is configured or the call yields nothing. */ async function embedQuery(embedder: Embedder, query: string): Promise<number[] | undefined> { const key = queryCacheKey(embedder.model, query); const cached = queryEmbeddingCache.get(key); if (cached) return cached; const [raw] = await embedder.embed([query]); if (!raw || raw.length === 0) return undefined; const vec = normalizeVector(raw); if (queryEmbeddingCache.size >= QUERY_CACHE_MAX) { const oldest = queryEmbeddingCache.keys().next().value; if (oldest !== undefined) queryEmbeddingCache.delete(oldest); } queryEmbeddingCache.set(key, vec); return vec; } /** * Hybrid layered recall: lexical scoring blended with semantic cosine ranking. * * Design (issue #67): page vectors are precomputed at write time (#66); the * ONLY per-query embedding work is a single, cached lookup of the (short) query * string. If no vault has embeddings, the query embedding is skipped entirely * and this degrades to exactly `searchWikiLayered` (pure lexical, zero network). * Likewise when no embedder is configured. `opts.embedder` is an injection seam * for tests (mirrors `embedPages`) so unit tests never touch the network. */ export async function searchWikiHybrid( primaryPaths: VaultPaths, query: string, maxResults = 5, minScore = 0, includePersonal = true, opts: { config?: TaskConfig; embedder?: Embedder } = {}, ): Promise<RecallResult[]> { // Pure-lexical fast path: no semantic signal anywhere => no embedding call. let anyEmbeddings = storeHasEntries(primaryPaths); if (!anyEmbeddings && includePersonal && !isPersonalVault(primaryPaths)) { const personalPaths = getPersonalWikiPaths(); if (existsSync(join(personalPaths.dotWiki, "config.json"))) { anyEmbeddings = storeHasEntries(personalPaths); } } if (!anyEmbeddings) { return searchWikiLayered(primaryPaths, query, maxResults, minScore, includePersonal); } const embedder = opts.embedder ?? (opts.config ? resolveEmbedder(opts.config) : undefined); if (!embedder) { // Embeddings exist but no embedder configured to embed the query: fall back // to pure lexical rather than guess. (Degrades gracefully.) return searchWikiLayered(primaryPaths, query, maxResults, minScore, includePersonal); } let semantic: SemanticContext | undefined; try { const queryVector = await embedQuery(embedder, query); if (queryVector) { const weight = opts.config?.semanticWeight ?? DEFAULT_SEMANTIC_WEIGHT; semantic = { queryVector, weight }; } } catch { // Network/embedding failure must never break recall — fall back to lexical. semantic = undefined; } return searchWikiLayered(primaryPaths, query, maxResults, minScore, includePersonal, semantic); } /** * Default page-count gate for two-stage (links-first) recall (issue #68). * When a vault's registered page count exceeds this, recall returns ranked * links (expand on demand via `read`) instead of inline content previews. */ export const DEFAULT_RECALL_LINKS_THRESHOLD = 50; /** Max characters of the 1-line snippet shown beside a link in links-first mode. */ const LINKS_SNIPPET_MAX = 80; /** Count the registered pages of a single vault (O(1), no page-body I/O). */ function registryPageCount(paths: VaultPaths): number { const registry = readJson<Registry>(join(paths.meta, "registry.json"), { version: "1.0", last_updated: "", pages: {}, }); return Object.keys(registry.pages).length; } /** * Total registered page count across the vault(s) recall will actually search. * Mirrors `searchWikiLayered`'s vault selection so the two-stage gate is keyed * to the same corpus the agent sees. Reads only `registry.json` — never a page * body — so the gate stays cheap as the vault grows. */ export function vaultPageCount(primaryPaths: VaultPaths, includePersonal = true): number { let count = registryPageCount(primaryPaths); if (includePersonal && !isPersonalVault(primaryPaths)) { const personalPaths = getPersonalWikiPaths(); if (existsSync(join(personalPaths.dotWiki, "config.json"))) { count += registryPageCount(personalPaths); } } return count; } /** * Decide whether recall should use links-first (stage 1) rendering: true when * the vault page count is STRICTLY GREATER THAN the configured threshold. * Threshold 0 forces links-first for any non-empty vault; a very large value * keeps previews inline always. Default `DEFAULT_RECALL_LINKS_THRESHOLD`. */ export function shouldUseLinksFirst(pageCount: number, config?: TaskConfig): boolean { const threshold = config?.recallLinksThreshold ?? DEFAULT_RECALL_LINKS_THRESHOLD; return pageCount > threshold; } /** One-line snippet for links-first rendering, derived from the chunk preview. */ function linkSnippet(preview: string): string { const oneLine = preview.replace(/\s+/g, " ").trim(); if (!oneLine) return ""; return oneLine.length > LINKS_SNIPPET_MAX ? `${oneLine.slice(0, LINKS_SNIPPET_MAX)}…` : oneLine; } /** * Default cap on chars of a skill/case body inlined directly into a recall * block. Overridable per-vault via `recallSkillInlineMax` (0 disables inlining). * Mirrors `DEFAULT_RECALL_LINKS_THRESHOLD` — the sibling context-window lever. */ export const DEFAULT_RECALL_SKILL_INLINE_MAX = 1600; /** * Skills/working-memory carve-out from links-first: short, high-value * procedural pages (`skill`/`case`) are meant to be APPLIED immediately, so we * inline their body directly rather than make the agent expand a link it often * skips (adherence > context-economy for these page types). Returns null for * non-skill pages or when the body can't be read. */ function isSkillOrCase(r: RecallResult): boolean { return ( r.type === "skill" || r.type === "case" || r.id.startsWith("skills/") || r.id.startsWith("cases/") ); } /** * Inlined body for a skill/case page, or null. `max <= 0` disables inlining and * short-circuits BEFORE any filesystem access, so a vault that opts out keeps * recall page-body-I/O-free (issue #68's cheap-recall invariant). Otherwise the * read is bounded: it fires only for skill/case results (which exist only when * the trajectories feature is on) and only the top-N ranked hits. */ function inlineSkillBody(r: RecallResult, max = DEFAULT_RECALL_SKILL_INLINE_MAX): string | null { if (max <= 0) return null; if (!isSkillOrCase(r)) return null; if (!r.path || !existsSync(r.path)) return null; // Normalize CRLF first so the LF-anchored frontmatter strip below works on // Windows-authored / git-autocrlf'd vaults (otherwise the raw YAML leaks in). let body = readFileSync(r.path, "utf-8").replace(/\r\n/g, "\n"); body = body.replace(/^---\n[\s\S]*?\n---\n/, "").trim(); // strip YAML frontmatter if (!body) return null; if (body.length > max) { body = `${body.slice(0, max)}\n…(truncated — \`read\` the path above for the full page)`; } return body; } /** * A backtick fence guaranteed longer than any backtick run inside `body`. * Skill/case pages routinely embed their own fenced code blocks; CommonMark * closes a fenced block only on a fence of length >= the opener, so opening * with (longest inner run + 1, min 3) keeps an inlined body from terminating * the wrapper early — and stays safe even when truncation cuts mid-fence. */ function codeFenceFor(body: string): string { let longest = 0; for (const run of body.match(/`+/g) ?? []) longest = Math.max(longest, run.length); return "`".repeat(Math.max(3, longest + 1)); } /** Indented, fence-safe lines wrapping an inlined skill/case body. */ function inlineBlockLines(body: string, indent: string): string[] { const fence = codeFenceFor(body); return [ "", `${indent}${fence}`, ...body.split("\n").map((line) => `${indent}${line}`), `${indent}${fence}`, ]; } /** * Format recall results as a compact system-prompt section. * * Two render modes (issue #68): * - Default / `linksOnly: false` — preview-inline. For ordinary pages this is * byte-for-byte the pre-fix small-vault rendering (no regression); the * resolvable read-path + new footer copy are confined to links-first, where * there is no inline content and the agent MUST resolve a link. * - `linksOnly: true` — stage-1 "links-first": a ranked list of links carrying * id, title, type, score, and a single short snippet, each with a resolvable * `read <path>`. The agent expands the links it wants on demand (stage 2). * Used above the vault-size threshold to keep large vaults from flooding context. * * `skillInlineMax` (default `DEFAULT_RECALL_SKILL_INLINE_MAX`) caps how much of a * skill/case body is inlined; 0 disables inlining (pure links-first for those too). */ export function formatRecallContext( results: RecallResult[], opts: { linksOnly?: boolean; skillInlineMax?: number } = {}, ): string { if (results.length === 0) return ""; const skillInlineMax = opts.skillInlineMax ?? DEFAULT_RECALL_SKILL_INLINE_MAX; const hasLayered = results.some((r) => r.vaultLabel); const label = hasLayered ? " (personal + project)" : ""; // Salience nudge: when a distilled skill/case matches, tell the agent to // apply it BEFORE experimenting (the dominant cost is recall non-adherence). const hasSkill = results.some(isSkillOrCase); const skillNudge = "⚠\ufe0f A distilled skill/case below matches this task — read and APPLY it BEFORE experimenting on your own."; if (opts.linksOnly) { const lines: string[] = [ "## Relevant Wiki Knowledge (links-first)", "", `_${results.length} page(s) matched your query${label}, ranked. Two-stage recall: links only — open the ones you need to read their full content._`, "", ]; if (hasSkill) lines.splice(1, 0, "", skillNudge); results.forEach((r, i) => { const vaultTag = r.vaultLabel ? ` ${r.vaultLabel}` : ""; const snippet = linkSnippet(r.preview); const tail = snippet ? ` — ${snippet}` : ""; lines.push( `${i + 1}. **[[${r.id}]]** — *${r.type}* — score ${r.score.toFixed(1)}${vaultTag} — ${r.title}${tail}`, ); // Surface a read-resolvable path so expansion is a single, first-try // `read` (issue: wikilink ids aren't resolvable by the file read tool). if (r.path) lines.push(` ↳ \`read ${r.path}\``); // Skills/case carve-out: inline the body so the agent doesn't have to // (and often won't) expand the link before acting. const inl = inlineSkillBody(r, skillInlineMax); if (inl) lines.push(...inlineBlockLines(inl, " ")); }); lines.push( "", "Call `read` on the exact path shown under each link to pull its full content." + " Add new findings via wiki_ensure_page or wiki_retro.", "", ); return lines.join("\n"); } const lines: string[] = [ "## Relevant Wiki Knowledge", "", `_${results.length} page(s) matched your query${label}._`, "", ]; if (hasSkill) lines.splice(1, 0, "", skillNudge); for (const r of results) { const vaultTag = r.vaultLabel ? ` ${r.vaultLabel}` : ""; lines.push(`- **[[${r.id}]]** — *${r.type}* — ${r.title}${vaultTag}`); // Skills/case carve-out: inline the body (adherence > context-economy). Only // here does the default path deviate from the pre-fix small-vault render — // and only when a skill/case matched (i.e. the trajectories feature is on), // so ordinary pages stay byte-for-byte unchanged (#68 no-regression promise). const inl = inlineSkillBody(r, skillInlineMax); if (inl) { // Resolvable path so a truncated inline body is one `read` away. if (r.path) lines.push(` ↳ \`read ${r.path}\``); lines.push(...inlineBlockLines(inl, " ")); } else if (r.preview) { // Truncate preview to one line const preview = r.preview.length > 120 ? `${r.preview.slice(0, 120)}…` : r.preview; lines.push(` ${preview}`); } lines.push(""); } lines.push( "Use `read` to view full pages. Add new findings via wiki_ensure_page or wiki_retro.", "", ); return lines.join("\n"); } // ─── Tool Registration ────────────────────────────────── /** * Register the `wiki_recall` tool. * The model can call this explicitly to search the wiki. * It is also called automatically via before_agent_start hook. */ export function registerWikiRecall(pi: ExtensionAPI, runtime?: Runtime): void { pi.registerTool({ name: "wiki_recall", label: "Wiki Recall", description: "Search the wiki for pages relevant to a query. " + "Returns matching page IDs, titles, types, and content previews (small vaults) " + "or a ranked list of links to expand with `read` (large vaults, two-stage recall). " + "Called automatically at session start — use explicitly to dig deeper.", promptSnippet: "Recall wiki knowledge relevant to the current task", promptGuidelines: [ "Use wiki_recall at the START of every task to find relevant wiki knowledge.", "The extension auto-calls wiki_recall — but calling it explicitly with specific terms gets better results.", ], parameters: Type.Object({ query: Type.String({ description: "Search query — use the user's full request or key terms", }), max_results: Type.Optional( Type.Number({ description: "Max results (default: 5, max: 10)", default: 5 }), ), }), async execute(_toolCallId, params, _signal, _onUpdate, ctx) { const paths = resolveVaultPaths(ctx.cwd ?? process.cwd()); if (!existsSync(join(paths.dotWiki, "config.json"))) { return { content: [ { type: "text", text: "No wiki vault found at this location. Initialize one with wiki_bootstrap first.", }, ], details: { error: "no_vault" } as Record<string, unknown>, isError: true, }; } const maxResults = Math.min(params.max_results ?? 5, 10); // Use layered hybrid search: personal vault + project vault, blending // lexical scoring with precomputed semantic embeddings when available. // No embeddings / no embedder => pure lexical, no network call. if (runtime) runtime.ensureConfig(ctx.cwd ?? paths.root); const results = await searchWikiHybrid(paths, params.query, maxResults, 0, true, { config: runtime?.config, }); if (results.length === 0) { return { content: [ { type: "text", text: `No wiki pages found matching "${params.query}". The wiki is empty — use wiki_retro to start building knowledge.`, }, ], details: { query: params.query, matches: [] } as Record<string, unknown>, }; } const hasPersonal = results.some((r) => r.vaultLabel); const layerTag = hasPersonal ? " (personal + project)" : ""; // Two-stage gate (issue #68): large vaults return ranked LINKS only; // the agent expands chosen links on demand via `read`. Small vaults keep // the inline-preview behavior. Page count is read from the registry only. const linksFirst = shouldUseLinksFirst(vaultPageCount(paths, true), runtime?.config); if (linksFirst) { const linkLines = results .map((r, i) => { const vault = r.vaultLabel ? ` ${r.vaultLabel}` : ""; const snippet = linkSnippet(r.preview); const tail = snippet ? ` — ${snippet}` : ""; return `${i + 1}. [[${r.id}]] — ${r.title} (${r.type}, score ${r.score.toFixed(1)})${vault}\n Path: ${r.path}${tail}`; }) .join("\n"); const text = [ `Found ${results.length} wiki page(s) matching "${params.query}"${layerTag} (two-stage recall — ranked links, expand on demand):`, "", linkLines, "", "Call `read` on the path(s) you need to pull full content.", ].join("\n"); return { content: [{ type: "text", text }], details: { query: params.query, mode: "links", matches: results } as Record< string, unknown >, }; } return { content: [ { type: "text", text: `Found ${results.length} wiki page(s) matching "${params.query}"${layerTag}:\n\n${results .map((r) => { const vault = r.vaultLabel ? ` ${r.vaultLabel}` : ""; return `## [[${r.id}]] — ${r.title}${vault}\nType: ${r.type}\nPath: ${r.path}\n\n${r.preview}`; }) .join("\n\n---\n\n")}`, }, ], details: { query: params.query, mode: "preview", matches: results } as Record< string, unknown >, }; }, }); }