UNPKG

aiwg

Version:

Deployment tool and support utility for AI context. Copies agents, skills, commands, rules, and behaviors into the paths each AI platform reads (Claude Code, Codex, Copilot, Cursor, Warp, OpenClaw, and 6 more) so one source of truth works across 10 platfo

162 lines 6.46 kB
/** * Citation Sidecar Edge Extraction * * Parses markdown citation sidecar files into typed graph edges. * Each sidecar has YAML frontmatter with `ref: <id>` and two markdown tables: * * - **Outgoing**: papers this work cites (column: "Inducted REF") → `cites` edges * - **Incoming**: corpus papers that cite this work (column: "REF") → `cited-by` edges * * Supported node-id forms (#105): * - `REF-\d+` research-paper IDs (REF-001, REF-029, ...) * - `PROF-[POFG]-[a-z0-9-]+` entity-profile IDs: * - `PROF-P-*` people, `PROF-O-*` orgs, `PROF-F-*` funders, `PROF-G-*` groups * * Both forms can appear as the sidecar's source (`frontmatter.ref`) and as * targets in the outgoing/incoming tables. The two ID spaces are * unambiguous (always prefixed) and orthogonal. * * @implements #722 * @implements #105 * @source @src/artifacts/types.ts * @tests @test/unit/artifacts/citation-parser.test.ts */ import { parseFrontmatter } from './index-builder.js'; /** * Match a single node identifier (REF-* or PROF-*) anywhere in a string. * Used by `extractRefsFromTable` to pull every ID out of a table cell. */ const NODE_ID_PATTERN = /(?:REF-\d+|PROF-[POFG]-[a-z0-9-]+)/g; /** * Validate that a string is a complete node identifier. * Used by `parseCitationSidecar` and `buildRefToPathMap` to gate * frontmatter `ref` values. */ const NODE_ID_FULL = /^(?:REF-\d+|PROF-[POFG]-[a-z0-9-]+)$/; /** * Test whether a string is a valid sidecar node identifier. * * Accepts `REF-\d+` and `PROF-[POFG]-[a-z0-9-]+`. Returns false for any * other input (including unrelated `PROF-` prefixed strings that don't * match the four-letter type code form). */ export function isNodeId(value) { return typeof value === 'string' && NODE_ID_FULL.test(value); } /** * Extract node identifiers from a markdown table column. * * Scans table rows for a column matching `columnName` (case-insensitive) * and extracts node IDs (REF-* or PROF-*), skipping empty/dash values. * * @param tableText - Markdown table text (header + separator + rows) * @param columnName - Column header to extract from (e.g., "Inducted REF") * @returns Array of node identifiers found */ export function extractRefsFromTable(tableText, columnName) { const lines = tableText.split('\n').filter(l => l.trim().startsWith('|')); if (lines.length < 3) return []; // Need header + separator + at least one row // Parse header to find column index const headerCells = lines[0].split('|').map(c => c.trim()).filter(Boolean); const colIndex = headerCells.findIndex(h => h.toLowerCase() === columnName.toLowerCase()); if (colIndex === -1) return []; // Skip header (line 0) and separator (line 1), parse data rows const refs = []; for (let i = 2; i < lines.length; i++) { const cells = lines[i].split('|').map(c => c.trim()).filter(Boolean); if (colIndex >= cells.length) continue; const value = cells[colIndex].trim(); // Skip empty, dash, or em-dash values if (!value || value === '—' || value === '-' || value === '–') continue; // Extract node-id pattern(s) (REF-* or PROF-*) from the cell. // Reset the lastIndex defensively — NODE_ID_PATTERN is a module-level // /g RegExp shared across calls. NODE_ID_PATTERN.lastIndex = 0; const refMatches = value.match(NODE_ID_PATTERN); if (refMatches) { refs.push(...refMatches); } } return refs; } /** * Parse a citation sidecar markdown file into structured edges. * * @param content - Full markdown content of the sidecar file * @returns Parse result with ref ID and edge arrays, or null if not a valid sidecar */ export function parseCitationSidecar(content) { const { data, body } = parseFrontmatter(content); // Must have a node identifier in frontmatter (REF-* or PROF-*) const ref = typeof data.ref === 'string' ? data.ref : null; if (!ref || !NODE_ID_FULL.test(ref)) return null; // Split body into sections by ## headings const sections = body.split(/^## /m).filter(Boolean); let cites = []; let citedBy = []; for (const section of sections) { const sectionLower = section.toLowerCase(); if (sectionLower.startsWith('outgoing')) { // Outgoing table: extract from "Inducted REF" column cites = extractRefsFromTable(section, 'Inducted REF'); } else if (sectionLower.startsWith('incoming')) { // Incoming table: extract from "REF" column // The incoming section may have subsections (### Corpus Cross-References) // Look for tables anywhere in this section citedBy = extractRefsFromTable(section, 'REF'); } } return { ref, cites, citedBy }; } /** * Convert a CitationParseResult into TypedEdge arrays for the dependency graph. * * @param result - Parsed citation sidecar * @param refToPath - Map from REF-XXX to file path in the index * @returns Object with upstream (cites) and downstream (cited-by) typed edges */ export function citationResultToEdges(result, refToPath) { const upstream = []; const downstream = []; // Outgoing citations → upstream "cites" edges for (const citedRef of result.cites) { const targetPath = refToPath.get(citedRef); if (targetPath) { upstream.push({ path: targetPath, type: 'cites' }); } } // Incoming citations → downstream "cited-by" edges for (const citingRef of result.citedBy) { const sourcePath = refToPath.get(citingRef); if (sourcePath) { downstream.push({ path: sourcePath, type: 'cited-by' }); } } return { upstream, downstream }; } /** * Build a node-id → file path mapping from indexed entries. * * Scans entry frontmatter for `ref` fields matching the node-id pattern * (REF-* or PROF-*). * * @param entries - Map of path → parsed frontmatter data * @returns Map from node identifier to file path */ export function buildRefToPathMap(entries) { const map = new Map(); for (const [filePath, data] of entries) { const ref = typeof data.ref === 'string' ? data.ref : null; if (ref && NODE_ID_FULL.test(ref)) { map.set(ref, filePath); } } return map; } //# sourceMappingURL=citation-parser.js.map