UNPKG

aiwg

Version:

Deployment tool and support utility for AI context. Copies agents, skills, commands, rules, and behaviors into the paths each AI platform reads (Claude Code, Codex, Copilot, Cursor, Warp, OpenClaw, and 6 more) so one source of truth works across 10 platfo

794 lines • 35.2 kB

JavaScript

/** * Artifact Index Builder * * Scans .aiwg/ directories, extracts metadata from artifact frontmatter, * computes checksums, extracts @-mention dependencies, and builds a * structured index at .aiwg/.index/. * * @implements #415 * @source @src/artifacts/types.ts * @tests @test/unit/artifacts/index-builder.test.ts */ import fs from 'fs'; import path from 'path'; import { createHash } from 'crypto'; import { load as loadYaml } from 'js-yaml'; import { INDEX_VERSION, INDEX_DIR, PHASE_DIRECTORIES, GRAPH_CONFIGS, loadUserGraphConfigs } from './types.js'; import { parseCitationSidecar, citationResultToEdges, buildRefToPathMap } from './citation-parser.js'; import { writeIndexFile, resolveIndexDir, loadGraphIndexFile } from './index-reader.js'; import { loadManifest, writeManifest, statMatches, makeEntry } from './checksum-manifest.js'; /** * Parse YAML frontmatter from markdown content */ export function parseFrontmatter(content) { const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/); if (!match) return { data: {}, body: content }; try { const data = (loadYaml(match[1]) ?? {}); return { data, body: match[2] }; } catch { return { data: {}, body: content }; } } /** * Extract @-mention references from content */ export function extractMentions(content) { const mentions = new Set(); // Match @path/to/file.ext and @.aiwg/path patterns const pattern = /@(\.?aiwg\/[\w./-]+|[a-zA-Z][\w./-]+\.\w+)/g; let match; while ((match = pattern.exec(content)) !== null) { mentions.add(match[1]); } return Array.from(mentions); } /** * Extract title from content (first # heading or frontmatter title) */ function extractTitle(data, body) { if (typeof data.title === 'string') return data.title; const headingMatch = body.match(/^#\s+(.+)$/m); return headingMatch ? headingMatch[1].trim() : 'Untitled'; } /** * Extract canonical short name (#1233) used for exact-name search match. * * Priority: * 1. frontmatter `name:` field if present * 2. parent directory basename for skills/agents/commands/rules layouts * (skills conventionally live in `<...>/skills/<name>/SKILL.md`) * 3. filename without extension as final fallback * * The result is the literal slug a user would type to find the artifact — * `aiwg-doctor`, `intake-wizard`, `flow-deploy-to-production` — preserving * hyphens so the scorer can match queries that contain them. */ function extractCanonicalName(data, relativePath) { if (typeof data.name === 'string' && data.name.trim().length > 0) { return data.name.trim(); } const filename = path.basename(relativePath); // For skill/agent/command/rule layouts: <type>s/<name>/<TYPE>.md const isCanonicalLayout = /^(SKILL|AGENT|COMMAND|RULE)\.md$/i.test(filename); if (isCanonicalLayout) { return path.basename(path.dirname(relativePath)); } return filename.replace(/\.md$/i, ''); } /** * Extract summary from content (first 500 chars of description or body) */ function extractSummary(data, body) { if (typeof data.description === 'string') return data.description.slice(0, 500); // Skip headings, get first paragraph const lines = body.split('\n').filter(l => l.trim() && !l.startsWith('#')); return lines.slice(0, 5).join(' ').slice(0, 500).trim(); } /** * Determine SDLC phase from file path */ function inferPhase(filePath) { for (const [phase, dir] of Object.entries(PHASE_DIRECTORIES)) { if (filePath.startsWith(dir)) return phase; } return 'other'; } /** * Determine artifact type from frontmatter or filename. * * Path-based detection for AIWG artifact kinds (#1214) takes precedence * over the legacy basename heuristics so `agentic/code/.../skills/foo/SKILL.md` * always lands as `type: 'skill'` regardless of frontmatter. */ function inferType(data, filePath) { if (typeof data.type === 'string') return data.type; // Normalize separators so matchers are cross-platform. const normalized = filePath.replace(/\\/g, '/'); const basename = path.basename(filePath, path.extname(filePath)).toLowerCase(); // Nearest-type-dir-ancestor classification (#1221 audit). // // The AIWG corpus uses many different nesting depths for artifacts: // frameworks/<f>/skills/<slug>/SKILL.md // frameworks/<f>/extensions/<sub>/skills/<slug>/SKILL.md // frameworks/<f>/templates/hermes/skills/<slug>/SKILL.md // frameworks/<f>/elaboration/agents/<n>.md (research-complete) // addons/<a>/agents/<n>.md // addons/<a>/prompts/agents/<n>.md (aiwg-utils) // addons/<a>/skills/skills/SKILL.md (aiwg-utils self-skill) // addons/<a>/templates/<n>.md // addons/<a>/behaviors/<n>.md // addons/<a>/hooks/<n>.md // extensions/<e>/skills/<n>.md (flat — no slug dir) // extensions/<e>/rules/<n>.md // agentic/code/behaviors/<sub>/... (top-level behaviors) // // Walk the path segments from the file upward; the first segment matching // a known artifact-type directory determines the kind. This handles every // nested layout uniformly. const segments = normalized.split('/'); const skipBasenames = new Set(['readme', 'rules-index', 'index']); if (!skipBasenames.has(basename) && filePath.match(/\.md$/i)) { // Look at directory segments only (exclude the file itself). for (let i = segments.length - 2; i >= 0; i--) { const seg = segments[i]; switch (seg) { case 'skills': { // Skills come in two layouts: // slug-style: skills/<slug>/SKILL.md → file is SKILL.md // flat-style: skills/<name>.md → file is directly under skills/ // // Do not classify nested reference/support markdown under // skills/<slug>/... as standalone skills; those files should remain // ordinary artifacts so discovery does not advertise them as // invokable capabilities. const afterSkills = segments.length - i - 1; const isSlugSkill = basename === 'skill' && afterSkills === 2; const isFlatSkill = afterSkills === 1; if (isSlugSkill || isFlatSkill) return 'skill'; break; } case 'agents': return 'agent'; case 'commands': return 'command'; case 'rules': // Rules/RULES-INDEX.md is a curated index file, not a rule. if (basename === 'rules-index' || basename === 'index') break; return 'rule'; case 'templates': return 'template'; case 'behaviors': return 'behavior'; case 'hooks': return 'hook'; } } } // Legacy SDLC artifact heuristics (existing behavior preserved). if (basename.startsWith('uc-') || basename.includes('use-case')) return 'use-case'; if (basename.startsWith('adr-') || basename.includes('adr')) return 'adr'; if (basename.startsWith('tp-') || basename.includes('test-plan')) return 'test-plan'; if (basename.startsWith('tc-') || basename.includes('test-case')) return 'test-case'; if (basename.startsWith('tm-') || basename.includes('threat')) return 'threat-model'; if (basename.startsWith('nfr-') || basename.includes('nfr')) return 'nfr'; if (basename.includes('sad') || basename.includes('architecture')) return 'architecture'; if (basename.includes('risk')) return 'risk'; if (basename.includes('deploy')) return 'deployment'; return 'document'; } /** * Extract trigger phrases from a SKILL.md / agent body. * * Skills declare alternate activation phrases under a `## Triggers` * heading; the body typically lists them as bullet points. This * function pulls each bullet's leading phrase (the part before any * `→` arrow or em-dash explanation), lowercased and trimmed. * * Returns an empty array when no `## Triggers` section is found — * non-skill artifacts get `triggers: undefined` after this is wired. * * @implements #1214 */ export function extractTriggers(body) { // Find a triggers heading (case-insensitive). Accepted variants: // ## Triggers // ## Natural Language Triggers (used by orchestration skills) // ## Activation Phrases (alternate vocabulary) // ## When to invoke (intent-style) // Capture the section content until the next `## ` heading or EOF. // Note: avoid the multi-line `m` flag with `$` — `$` would match // every line terminator and stop capture at the first blank line. const sectionMatch = body.match(/(?:^|\n)##\s+(?:(?:Natural\s+Language\s+)?Triggers|Activation\s+Phrases|When\s+to\s+invoke)\b[^\n]*\n([\s\S]*?)(?=\n##\s|$)/i); if (!sectionMatch) return []; const section = sectionMatch[1]; const phrases = []; for (const rawLine of section.split('\n')) { const line = rawLine.trim(); if (!line.startsWith('-') && !line.startsWith('*') && !line.startsWith('+')) continue; // Strip the bullet marker, optional surrounding quotes, and split on // common explanation separators ("→", " — ", " - " when followed by // explanatory text). let phrase = line.replace(/^[-*+]\s+/, '').trim(); // Drop any leading quote characters phrase = phrase.replace(/^["“”'`]+/, '').trim(); // Cut at the first explanation separator const sepMatch = phrase.match(/^(.*?)\s*(?:→|—|--|\s-\s)/); if (sepMatch) phrase = sepMatch[1]; // Strip trailing quotes / colons phrase = phrase.replace(/["“”'`:.]+\s*$/, '').trim(); if (phrase.length === 0) continue; if (phrase.length > 200) continue; // Reject pathological lines phrases.push(phrase.toLowerCase()); } return phrases; } /** * Extract a capability summary for a skill/agent/command/rule. * * Prefers the frontmatter `description` field (used uniformly across * AIWG SKILL.md / agent files). Falls back to the first non-heading * paragraph of the body. Capped at 240 chars so the index stays * token-tight when surfaced via `aiwg index discover`. * * @implements #1214 */ export function extractCapability(data, body) { if (typeof data.description === 'string' && data.description.trim().length > 0) { return data.description.trim().slice(0, 240); } // Fallback: first non-empty paragraph that isn't a heading. const stripped = body.replace(/^---\n[\s\S]*?\n---\n/, ''); for (const block of stripped.split(/\n\s*\n/)) { const trimmed = block.trim(); if (!trimmed || trimmed.startsWith('#')) continue; return trimmed.replace(/\s+/g, ' ').slice(0, 240); } return undefined; } /** * Extract a SkillScriptSpec from skill frontmatter (#1227). * * The `script:` block is optional — only skills with a backing executable * declare it. Schema: * * script: * entrypoint: scripts/voice_loader.py # required, relative to skill dir * runtime: python3 # required (node|python3|bash|...) * cwd: project-root # optional, default project-root * argsHint: "--voice <name> --input <path>" # optional UX hint * * Returns undefined when the block is absent or malformed. Malformed * blocks are silently dropped — index builder logs a warning so authors * see it, but the artifact still indexes as a non-executable skill. */ export function extractSkillScript(data) { const raw = data.script; if (!raw || typeof raw !== 'object' || Array.isArray(raw)) return undefined; const block = raw; const entrypoint = typeof block.entrypoint === 'string' ? block.entrypoint.trim() : ''; const runtime = typeof block.runtime === 'string' ? block.runtime.trim() : ''; if (!entrypoint || !runtime) return undefined; const cwdRaw = typeof block.cwd === 'string' ? block.cwd.trim() : 'project-root'; const cwd = cwdRaw === 'skill-dir' || cwdRaw === 'aiwg-root' ? cwdRaw : 'project-root'; const argsHint = typeof block.argsHint === 'string' ? block.argsHint.trim() : undefined; return { entrypoint, runtime, cwd, ...(argsHint ? { argsHint } : {}), }; } /** * Compute truncated SHA-256 checksum (16 hex chars) */ function computeChecksum(content) { return createHash('sha256').update(content).digest('hex').slice(0, 16); } /** * Convert Python-style named capture groups (?P<name>...) to JS-style (?<name>...) */ export function normalizeNamedCaptures(pattern) { return pattern.replace(/\(\?P</g, '(?<'); } /** * Build a MetadataEntry from filename regex captures instead of file content. * Used when graphConfig.nodeStrategy === 'filename-metadata'. * * @implements #723 */ export function buildFilenameMetadataEntry(relativePath, fullPath, filenamePattern) { const basename = path.basename(fullPath); let captures = {}; if (filenamePattern) { const normalizedPattern = normalizeNamedCaptures(filenamePattern); const regex = new RegExp(normalizedPattern); const match = basename.match(regex); if (match?.groups) { captures = match.groups; } } const ref = captures.ref ? `REF-${captures.ref}` : ''; const titleParts = [ref, captures.author, captures.year, captures.slug].filter(Boolean); const title = titleParts.length > 0 ? titleParts.join(' — ') : basename; // Use file stat for timestamps; checksum is based on filename (content not read) const stat = fs.statSync(fullPath); const checksum = createHash('sha256').update(basename).digest('hex').slice(0, 16); return { path: relativePath, type: captures.ref ? 'paper' : 'document', phase: 'other', title, tags: [], created: stat.birthtime.toISOString(), updated: stat.mtime.toISOString(), checksum, summary: '', dependencies: [], dependents: [], // Store captures as a non-standard field for downstream consumers ...(Object.keys(captures).length > 0 ? { captures } : {}), }; } /** * Apply metadata supplements — merge frontmatter fields from sidecar files. * * @implements #723 */ function applyMetadataSupplements(entries, supplements, cwd) { for (const supplement of supplements) { const sidecarDir = path.join(cwd, supplement.scanDir); if (!fs.existsSync(sidecarDir)) continue; if (!supplement.matchOn || !supplement.nodeKey) continue; // Parse matchOn: "frontmatter.ref" -> field "ref" const matchField = supplement.matchOn.replace(/^frontmatter\./, ''); // Build a map of sidecar files: matchField value -> frontmatter data const sidecarFiles = findArtifactFiles(sidecarDir, ['.md', '.yaml', '.json']); const sidecarMap = new Map(); for (const sidecarPath of sidecarFiles) { const content = fs.readFileSync(sidecarPath, 'utf-8'); const { data } = parseFrontmatter(content); const matchValue = data[matchField]; if (typeof matchValue === 'string') { sidecarMap.set(matchValue, data); } } // Match entries to sidecars and merge fields for (const entry of Object.values(entries)) { const entryCaptures = entry.captures; if (!entryCaptures) continue; const nodeValue = entryCaptures[supplement.nodeKey]; if (!nodeValue) continue; // Build the full ref ID for matching (e.g., captures.ref="008" -> "REF-008") const matchValue = supplement.nodeKey === 'ref' ? `REF-${nodeValue}` : nodeValue; const sidecarData = sidecarMap.get(matchValue); if (!sidecarData) continue; for (const field of supplement.mergeFields) { const value = sidecarData[field]; if (value === undefined) continue; if (field === 'title' && typeof value === 'string') { entry.title = value; } else if (field === 'tags' && Array.isArray(value)) { entry.tags = [...new Set([...entry.tags, ...value.map(String)])]; } else if (field === 'authors' && typeof value === 'string') { entry.summary = value; } // Other fields stored via captures — downstream consumers can access them } } } } /** * Recursively find all indexable files under a directory */ function findArtifactFiles(dir, extensions = ['.md', '.yaml', '.json']) { const results = []; if (!fs.existsSync(dir)) return results; const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); if (entry.isSymbolicLink() && !fs.existsSync(fullPath)) { continue; } if (entry.isDirectory()) { // Skip hidden dirs and .index if (entry.name.startsWith('.')) continue; results.push(...findArtifactFiles(fullPath, extensions)); } else if (extensions.some(ext => entry.name.endsWith(ext))) { results.push(fullPath); } } return results; } /** * Build the artifact index */ export async function buildIndex(cwd, options = {}) { const { force = false, verbose = false, scope, outputDir, graph, explicit = true } = options; const startTime = Date.now(); // Ensure user-defined graphs are loaded loadUserGraphConfigs(cwd); // Determine scan directories based on graph type const graphConfig = graph ? GRAPH_CONFIGS[graph] : undefined; let scanDirs; let fileExtensions; if (scope) { // Explicit scope overrides graph config scanDirs = [path.join(cwd, scope)]; fileExtensions = ['.md', '.yaml', '.json']; } else if (graphConfig) { scanDirs = graphConfig.scanDirs.map(d => path.join(cwd, d)); fileExtensions = graphConfig.extensions; } else { // Default: scan .aiwg/ (backward compatible) scanDirs = [path.join(cwd, '.aiwg')]; fileExtensions = ['.md', '.yaml', '.json']; } // Verify at least one scan directory exists const existingDirs = scanDirs.filter(d => fs.existsSync(d)); if (existingDirs.length === 0) { // If this graph was auto-selected (defaultBuild) rather than explicitly requested via --graph, // skip gracefully — a docs-only repo should not be forced to have src/test/tools. if (graphConfig?.defaultBuild && !explicit) { const relDirs = scanDirs.map(d => path.relative(cwd, d)).join(', '); console.warn(`Warning: ${graph} graph: scan directories not found (${relDirs}), skipping`); return; } console.error(`Error: No scan directories found: ${scanDirs.join(', ')}`); console.log('Run this command from a project with the required directories.'); process.exit(1); } // Determine output index directory let indexOutputDir; if (outputDir) { // Test/custom override — write to outputDir/.aiwg/.index/ (or graph subdir) indexOutputDir = graph ? path.join(outputDir, '.aiwg', '.index', graph) : path.join(outputDir, INDEX_DIR); } else if (graph) { indexOutputDir = resolveIndexDir(cwd, graph); } else { indexOutputDir = path.join(cwd, INDEX_DIR); } fs.mkdirSync(indexOutputDir, { recursive: true }); // effectiveOutputCwd is used for backward-compat loadMetadataIndex calls const effectiveOutputCwd = outputDir ?? cwd; // Load existing index for incremental updates const existingIndex = force ? null : loadGraphIndexFile(effectiveOutputCwd, 'metadata.json', graph); const existingEntries = existingIndex?.entries ?? {}; // Load checksum manifest for fast stat-based change detection (#794). // When --force is set we skip the manifest entirely and rebuild everything. const manifest = force ? { version: 1, generated: '', entries: {} } : loadManifest(indexOutputDir); const nextManifestEntries = {}; const manifestStats = { checked: 0, statSkipped: 0, checksumSkipped: 0, reindexed: 0, pruned: 0, }; // Collect files from all scan directories const files = []; for (const dir of existingDirs) { files.push(...findArtifactFiles(dir, fileExtensions)); } const entries = {}; const tagIndex = {}; const depGraph = {}; let newCount = 0; let updatedCount = 0; let unchangedCount = 0; const useFilenameMetadata = graphConfig?.nodeStrategy === 'filename-metadata'; for (const fullPath of files) { const relativePath = path.relative(cwd, fullPath); let entry; if (useFilenameMetadata) { // Filename-metadata strategy: derive metadata from filename, skip content read. // The checksum manifest doesn't help here (no content read to skip) but we still // preserve any existing manifest entry so cross-graph builds don't drop it. const checksum = createHash('sha256').update(path.basename(fullPath)).digest('hex').slice(0, 16); const existingManifestEntry = manifest.entries[relativePath]; if (existingManifestEntry) { nextManifestEntries[relativePath] = existingManifestEntry; } // Skip unchanged files in incremental mode if (!force && existingEntries[relativePath]?.checksum === checksum) { entries[relativePath] = existingEntries[relativePath]; unchangedCount++; if (verbose) console.log(` unchanged: ${relativePath}`); continue; } entry = buildFilenameMetadataEntry(relativePath, fullPath, graphConfig?.filenamePattern); } else { // Default strategy: read content, parse frontmatter manifestStats.checked++; // Phase 1: stat-based quick filter — skip content read if mtime+size match manifest. // This is the fast path: unchanged files don't touch the filesystem beyond fs.statSync. const stat = fs.statSync(fullPath); const manifestEntry = manifest.entries[relativePath]; if (!force && statMatches(stat, manifestEntry) && existingEntries[relativePath]?.checksum === manifestEntry?.checksum) { // Stat matches manifest AND the stored index entry references the same checksum. // Safe to reuse both without reading the file. entries[relativePath] = existingEntries[relativePath]; nextManifestEntries[relativePath] = manifestEntry; unchangedCount++; manifestStats.statSkipped++; if (verbose) console.log(` unchanged (stat): ${relativePath}`); continue; } // Phase 2: content read + checksum verification for files that looked changed. const content = fs.readFileSync(fullPath, 'utf-8'); const checksum = computeChecksum(content); // Skip unchanged files in incremental mode (checksum matched despite stat drift) if (!force && existingEntries[relativePath]?.checksum === checksum) { entries[relativePath] = existingEntries[relativePath]; // Update manifest with current stat so Phase 1 succeeds next time. nextManifestEntries[relativePath] = makeEntry(checksum, stat); unchangedCount++; manifestStats.checksumSkipped++; if (verbose) console.log(` unchanged (checksum): ${relativePath}`); continue; } manifestStats.reindexed++; nextManifestEntries[relativePath] = makeEntry(checksum, stat); const { data, body } = parseFrontmatter(content); const title = extractTitle(data, body); const phase = typeof data.phase === 'string' ? data.phase : inferPhase(relativePath); const type = inferType(data, relativePath); const tags = Array.isArray(data.tags) ? data.tags.map(String) : []; const summary = extractSummary(data, body); const dependencies = extractMentions(content); // Discovery metadata (#1214) — only meaningful for AIWG artifact // kinds. Kept undefined on other types so the index file stays // small for the common case. const isDiscoverable = type === 'skill' || type === 'agent' || type === 'command' || type === 'rule'; const triggers = isDiscoverable ? extractTriggers(body) : undefined; const capability = isDiscoverable ? extractCapability(data, body) : undefined; const kernel = data.kernel === true || data.kernel === 'true' ? true : undefined; // Script entrypoint metadata is meaningful for skills only (#1227). const script = type === 'skill' ? extractSkillScript(data) : undefined; // Canonical short name (#1233) — used by the scorer to floor exact-name // queries to 1.0 so hyphenated kernel-skill names like `aiwg-doctor` // remain searchable even when the rendered title strips the hyphen. const name = isDiscoverable ? extractCanonicalName(data, relativePath) : undefined; entry = { path: relativePath, type, phase, title, tags, created: typeof data.created === 'string' ? data.created : stat.birthtime.toISOString(), updated: stat.mtime.toISOString(), checksum, summary, dependencies, dependents: [], // Computed after all entries are processed ...(name ? { name } : {}), ...(triggers && triggers.length > 0 ? { triggers } : {}), ...(capability ? { capability } : {}), ...(kernel ? { kernel } : {}), ...(script ? { script } : {}), }; } entries[relativePath] = entry; if (existingEntries[relativePath]) { updatedCount++; if (verbose) console.log(` updated: ${relativePath}`); } else { newCount++; if (verbose) console.log(` new: ${relativePath}`); } } // Apply metadata supplements if configured (enriches filename-metadata nodes from sidecars) if (graphConfig?.metadataSupplements?.length) { applyMetadataSupplements(entries, graphConfig.metadataSupplements, cwd); } // Build tag reverse index for (const entry of Object.values(entries)) { for (const tag of entry.tags) { if (!tagIndex[tag]) tagIndex[tag] = []; tagIndex[tag].push(entry.path); } } // Build dependency graph and compute dependents for (const entry of Object.values(entries)) { if (!depGraph[entry.path]) { depGraph[entry.path] = { upstream: [], downstream: [] }; } for (const dep of entry.dependencies) { // Normalize: check if referenced path exists in the index const normalizedDep = Object.keys(entries).find(p => p === dep || p.endsWith(dep)); if (normalizedDep && normalizedDep !== entry.path) { const upEdge = { path: normalizedDep, type: 'depends-on' }; depGraph[entry.path].upstream.push(upEdge); if (!depGraph[normalizedDep]) { depGraph[normalizedDep] = { upstream: [], downstream: [] }; } const downEdge = { path: entry.path, type: 'depends-on' }; depGraph[normalizedDep].downstream.push(downEdge); // Also update the dependents field on the target entry if (entries[normalizedDep]) { entries[normalizedDep].dependents.push(entry.path); } } } } // Run citation sidecar edge extraction if configured if (graphConfig?.edgeExtraction?.parser === 'citation-sidecar') { // Build REF-XXX → path map from all entries with ref frontmatter const entryFrontmatter = new Map(); for (const entryPath of Object.keys(entries)) { const fullPath = path.join(cwd, entryPath); if (fs.existsSync(fullPath)) { const content = fs.readFileSync(fullPath, 'utf-8'); const { data } = parseFrontmatter(content); entryFrontmatter.set(entryPath, data); } } const refToPath = buildRefToPathMap(entryFrontmatter); // Parse each entry as a citation sidecar and extract edges let citationEdgeCount = 0; for (const entryPath of Object.keys(entries)) { const fullPath = path.join(cwd, entryPath); if (!fs.existsSync(fullPath)) continue; const content = fs.readFileSync(fullPath, 'utf-8'); const result = parseCitationSidecar(content); if (!result) continue; const edges = citationResultToEdges(result, refToPath); if (!depGraph[entryPath]) { depGraph[entryPath] = { upstream: [], downstream: [] }; } // Add upstream "cites" edges for (const edge of edges.upstream) { depGraph[entryPath].upstream.push(edge); // Add reciprocal downstream edge on the target if (!depGraph[edge.path]) { depGraph[edge.path] = { upstream: [], downstream: [] }; } depGraph[edge.path].downstream.push({ path: entryPath, type: 'cites' }); citationEdgeCount++; } // Add downstream "cited-by" edges for (const edge of edges.downstream) { depGraph[entryPath].downstream.push(edge); // Add reciprocal upstream edge on the source if (!depGraph[edge.path]) { depGraph[edge.path] = { upstream: [], downstream: [] }; } depGraph[edge.path].upstream.push({ path: entryPath, type: 'cited-by' }); citationEdgeCount++; } } if (verbose && citationEdgeCount > 0) { console.log(` citation edges: ${citationEdgeCount}`); } } // Deduplicate dependents for (const entry of Object.values(entries)) { entry.dependents = [...new Set(entry.dependents)]; } const buildTimeMs = Date.now() - startTime; // Write index files const index = { version: INDEX_VERSION, builtAt: new Date().toISOString(), buildTimeMs, entries, }; writeIndexFile(effectiveOutputCwd, 'metadata.json', index, indexOutputDir); writeIndexFile(effectiveOutputCwd, 'tags.json', tagIndex, indexOutputDir); writeIndexFile(effectiveOutputCwd, 'dependencies.json', depGraph, indexOutputDir); // Update and persist the checksum manifest for faster future builds (#794). // The next manifest contains entries for every file we processed this build. // Files that disappeared from disk are pruned; the resulting manifest is // written atomically. const nextManifest = { version: 1, generated: new Date().toISOString(), entries: nextManifestEntries, }; manifestStats.pruned = Object.keys(manifest.entries).length - Object.keys(nextManifestEntries).length; if (manifestStats.pruned < 0) manifestStats.pruned = 0; writeManifest(indexOutputDir, nextManifest); // Write stats const totalEdges = Object.values(depGraph).reduce((sum, node) => sum + node.downstream.length, 0); const orphaned = Object.entries(depGraph).filter(([, node]) => node.upstream.length === 0 && node.downstream.length === 0).length; const mostReferenced = Object.entries(depGraph) .map(([p, node]) => ({ path: p, count: node.downstream.length })) .sort((a, b) => b.count - a.count)[0] ?? null; const byPhase = {}; const byType = {}; const tagDist = {}; for (const entry of Object.values(entries)) { byPhase[entry.phase] = (byPhase[entry.phase] || 0) + 1; byType[entry.type] = (byType[entry.type] || 0) + 1; for (const tag of entry.tags) { tagDist[tag] = (tagDist[tag] || 0) + 1; } } writeIndexFile(effectiveOutputCwd, 'stats.json', { version: INDEX_VERSION, builtAt: new Date().toISOString(), buildTimeMs, totalArtifacts: Object.keys(entries).length, byPhase, byType, tagDistribution: tagDist, graphMetrics: { totalEdges, orphanedArtifacts: orphaned, mostReferenced, }, }, indexOutputDir); // Report const total = Object.keys(entries).length; console.log(`Artifact index built in ${buildTimeMs}ms`); console.log(` Indexed ${newCount} new, updated ${updatedCount}, unchanged ${unchangedCount}`); console.log(` Total: ${total} artifacts`); // Report manifest-driven change detection stats (only meaningful when !force // and we actually went through the content-read path for at least one file) if (!force && manifestStats.checked > 0) { const fastPath = manifestStats.statSkipped; const slowPath = manifestStats.checked - manifestStats.statSkipped; console.log(` Change detection: ${manifestStats.checked} checked, ${fastPath} skipped via stat, ${slowPath} content-read, ${manifestStats.reindexed} re-indexed`); if (manifestStats.pruned > 0) { console.log(` Pruned ${manifestStats.pruned} stale manifest entries (files no longer on disk)`); } } const displayDir = graph ? `${INDEX_DIR}/${graph}/` : `${INDEX_DIR}/`; console.log(` Output: ${displayDir}`); } //# sourceMappingURL=index-builder.js.map