autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
628 lines (627 loc) • 24.2 kB
JavaScript
/**
* KnowledgeFileWriter — 将 KnowledgeEntry 序列化为 .md 文件 / 从 .md 解析回实体
*
* 统一替代 CandidateFileWriter + RecipeFileWriter。
*
* 职责:
* - KnowledgeEntry → YAML frontmatter + Markdown body (serialize)
* - .md 内容 → wire format JSON → KnowledgeEntry.fromJSON() (parse)
* - 落盘到 AutoSnippet/{candidates|recipes}/{category}/ 目录
* - .md 文件 = 完整唯一数据源(Source of Truth),DB = 索引缓存
*
* Frontmatter 分层:
* - 标量字段(人类可读/可编辑):id, title, lifecycle, language, ...
* - 简单数组字段(行内 JSON):tags, headers, headerPaths
* - 值对象(_ 前缀,单行 JSON):_content, _relations, _constraints, ...
*
* 文件名策略:trigger slug > title slug > id[:8]
* 落盘目录:isCandidate() → candidates/ | isActive()/deprecated → recipes/
*/
import { createHash } from 'node:crypto';
import fs from 'node:fs';
import path from 'node:path';
import { CANDIDATES_DIR, RECIPES_DIR } from '../../infrastructure/config/Defaults.js';
import Logger from '../../infrastructure/logging/Logger.js';
import pathGuard from '../../shared/PathGuard.js';
/* ═══════════════════════════════════════════════════════════
* 标量字段定义 — frontmatter 中直接输出为 key: value
* ═══════════════════════════════════════════════════════════ */
const SCALAR_FIELDS = [
'id',
'title',
'trigger',
'lifecycle',
'language',
'category',
'kind',
'knowledgeType',
'complexity',
'scope',
'difficulty',
'description',
'source',
'moduleName',
'topicHint',
'whenClause',
'doClause',
'dontClause',
'coreCode',
'createdBy',
'createdAt',
'updatedAt',
'publishedAt',
'publishedBy',
'reviewedBy',
'reviewedAt',
'rejectionReason',
'sourceFile',
'sourceCandidateId',
];
/* ═══════════════════════════════════════════════════════════
* KnowledgeFileWriter 类
* ═══════════════════════════════════════════════════════════ */
export class KnowledgeFileWriter {
candidatesDir;
logger;
projectRoot;
recipesDir;
/** @param projectRoot 项目根目录 */
constructor(projectRoot) {
this.projectRoot = projectRoot;
this.recipesDir = path.join(projectRoot, RECIPES_DIR);
this.candidatesDir = path.join(projectRoot, CANDIDATES_DIR);
this.logger = Logger.getInstance();
}
/* ═══ 序列化 ═══════════════════════════════════════════ */
/** 将 KnowledgeEntry 序列化为完整 .md(YAML frontmatter + body) */
serialize(entry) {
const json = entry.toJSON();
const lines = ['---'];
// ── 标量字段(人类可读)──
for (const key of SCALAR_FIELDS) {
const val = json[key];
if (val != null && val !== '') {
lines.push(`${key}: ${_yamlValue(key, val)}`);
}
}
// ── 简单数组字段(行内 JSON)──
if (json.tags?.length) {
lines.push(`tags: ${JSON.stringify(json.tags)}`);
}
if (json.headers?.length) {
lines.push(`headers: ${JSON.stringify(json.headers)}`);
}
if (json.headerPaths?.length) {
lines.push(`headerPaths: ${JSON.stringify(json.headerPaths)}`);
}
if (json.includeHeaders) {
lines.push(`includeHeaders: true`);
}
if (json.autoApprovable) {
lines.push(`autoApprovable: true`);
}
// ── JSON 值对象(_ 前缀,单行 JSON)──
const JSON_FIELDS = [
['_content', json.content],
['_relations', json.relations],
['_constraints', json.constraints],
['_reasoning', json.reasoning],
['_quality', json.quality],
['_stats', json.stats],
['_lifecycleHistory', json.lifecycleHistory],
];
for (const [key, val] of JSON_FIELDS) {
if (val && typeof val === 'object') {
// 跳过空对象和空数组
const hasContent = Array.isArray(val) ? val.length > 0 : Object.keys(val).length > 0;
if (hasContent) {
lines.push(`${key}: ${JSON.stringify(val)}`);
}
}
}
if (json.agentNotes) {
lines.push(`_agentNotes: ${JSON.stringify(json.agentNotes)}`);
}
if (json.aiInsight) {
lines.push(`_aiInsight: ${JSON.stringify(json.aiInsight)}`);
}
// _contentHash 占位(后续替换为真实 hash)
const hashPlaceholder = '__HASH_PLACEHOLDER__';
lines.push(`_contentHash: ${hashPlaceholder}`);
lines.push('---');
lines.push('');
// ── Body ──
lines.push(this._buildBody(entry));
lines.push('');
// ── 计算 content hash 并替换 placeholder ──
const md = lines.join('\n');
const cleanedForHash = md.replace(`_contentHash: ${hashPlaceholder}`, '');
const hash = computeKnowledgeHash(cleanedForHash);
return md.replace(hashPlaceholder, hash);
}
/** 构建 Markdown body */
_buildBody(entry) {
const c = entry.content;
const lines = [];
if (c.markdown) {
// Markdown 项目特写 / 完整文章 → 直接输出(去掉可能残留的 frontmatter)
const body = c.markdown.replace(/^---[\s\S]*?---\s*/, '').trim();
lines.push(body);
}
else {
// 结构化构建
lines.push(`## ${entry.title}`);
lines.push('');
if (entry.description) {
lines.push(`> ${entry.description}`);
lines.push('');
}
if (c.pattern) {
lines.push(`\`\`\`${entry.language || 'text'}`);
lines.push(c.pattern);
lines.push('```');
lines.push('');
}
if (c.rationale) {
lines.push('## 设计原理');
lines.push('');
lines.push(c.rationale);
lines.push('');
}
if (c.steps?.length > 0) {
lines.push('## 实施步骤');
lines.push('');
for (const [i, step] of c.steps.entries()) {
if (typeof step === 'string') {
lines.push(`${i + 1}. ${step}`);
}
else {
const title = step.title || '步骤';
const desc = step.description || '';
lines.push(`${i + 1}. **${title}**: ${desc}`);
if (step.code) {
lines.push('');
lines.push('```');
lines.push(step.code);
lines.push('```');
}
}
}
lines.push('');
}
if (entry.constraints.boundaries?.length > 0) {
lines.push('## 约束与边界');
lines.push('');
for (const b of entry.constraints.boundaries) {
lines.push(`- ${b}`);
}
lines.push('');
}
if (entry.reasoning.whyStandard) {
lines.push('## Why Standard');
lines.push('');
lines.push(entry.reasoning.whyStandard);
lines.push('');
}
if (entry.reasoning.sources?.length > 0) {
lines.push('## Sources');
lines.push('');
for (const src of entry.reasoning.sources) {
lines.push(`- ${src}`);
}
lines.push('');
}
}
return lines.join('\n');
}
/* ═══ 文件操作 ═══════════════════════════════════════════ */
/**
* 将 KnowledgeEntry 落盘到对应目录
* - isCandidate() → AutoSnippet/candidates/{category}/
* - isActive()/deprecated → AutoSnippet/recipes/{category}/
*
* @returns 写入的文件路径,失败返回 null
*/
persist(entry) {
try {
if (!entry?.id || !entry?.title) {
this.logger.warn('Cannot persist knowledge entry: missing id or title');
return null;
}
const { dir, filename } = this._resolveFilePath(entry);
// 路径安全检查
pathGuard.assertProjectWriteSafe(dir);
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
// 清理旧文件(lifecycle 切换或 category 变更场景)
this._cleanupOldFile(entry, path.join(dir, filename));
const filePath = path.join(dir, filename);
const markdown = this.serialize(entry);
fs.writeFileSync(filePath, markdown, 'utf8');
// 更新 entry 的 sourceFile 溯源
entry.sourceFile = path.relative(this.projectRoot, filePath);
this.logger.info('Knowledge entry persisted to file', {
entryId: entry.id,
lifecycle: entry.lifecycle,
path: entry.sourceFile,
});
return filePath;
}
catch (error) {
this.logger.error('Failed to persist knowledge entry to file', {
entryId: entry?.id,
error: error instanceof Error ? error.message : String(error),
});
return null;
}
}
/** 删除 KnowledgeEntry 对应的 .md 文件 */
remove(entry) {
if (!entry?.id) {
return false;
}
// 先尝试 sourceFile 精确删除
if (entry.sourceFile) {
const fullPath = path.join(this.projectRoot, entry.sourceFile);
if (fs.existsSync(fullPath)) {
pathGuard.assertSafe(fullPath);
fs.unlinkSync(fullPath);
this.logger.info('Knowledge entry file removed', {
entryId: entry.id,
path: entry.sourceFile,
});
return true;
}
}
// fallback: 按文件名在 candidates/ 和 recipes/ 中扫描
const { filename } = this._resolveFilePath(entry);
const searchDirs = [
path.join(this.candidatesDir, (entry.category || 'general').toLowerCase()),
path.join(this.recipesDir, (entry.category || 'general').toLowerCase()),
];
for (const dir of searchDirs) {
const fp = path.join(dir, filename);
if (fs.existsSync(fp)) {
pathGuard.assertSafe(fp);
fs.unlinkSync(fp);
this.logger.info('Knowledge entry file removed', { entryId: entry.id, path: fp });
return true;
}
}
// 最终 fallback: id 扫描
return this._removeByIdScan(entry.id);
}
/**
* 当 lifecycle 切换时,移动 .md 文件到正确目录
* candidates/ ↔ recipes/
*
* @returns 新的文件路径
*/
moveOnLifecycleChange(entry) {
const oldPath = entry.sourceFile ? path.join(this.projectRoot, entry.sourceFile) : null;
const { dir: newDir, filename } = this._resolveFilePath(entry);
const newPath = path.join(newDir, filename);
// 如果路径没变,直接重新序列化
if (oldPath && path.resolve(oldPath) === path.resolve(newPath)) {
return this.persist(entry);
}
// 删除旧文件
if (oldPath && fs.existsSync(oldPath)) {
pathGuard.assertSafe(oldPath);
fs.unlinkSync(oldPath);
this.logger.info('Removed old knowledge entry file on lifecycle change', {
entryId: entry.id,
oldPath: entry.sourceFile,
});
}
// 写入新位置
return this.persist(entry);
}
/* ═══ 内部工具 ═══════════════════════════════════════════ */
/**
* 计算文件存储路径
* @returns }
*/
_resolveFilePath(entry) {
const baseDir = entry.isCandidate() ? this.candidatesDir : this.recipesDir;
const category = (entry.category || 'general').toLowerCase();
const dir = path.join(baseDir, category);
const filename = _slugFilename(entry.trigger, entry.title, entry.id);
return { dir, filename };
}
/** 清理旧文件(category 变更或 lifecycle 切换场景) */
_cleanupOldFile(entry, newPath) {
if (!entry.sourceFile) {
return;
}
const oldPath = path.join(this.projectRoot, entry.sourceFile);
if (oldPath === newPath) {
return;
}
// 安全防护: 仅清理 AutoSnippet 知识目录内的 .md 文件
// entry.sourceFile 可能被 AI 误设为项目源文件路径(如 .xcdatamodeld),
// 绝不能删除知识目录之外的文件。
const isInsideKnowledge = oldPath.startsWith(this.candidatesDir + path.sep) ||
oldPath.startsWith(this.recipesDir + path.sep);
if (!isInsideKnowledge) {
this.logger.warn('_cleanupOldFile skipped: path outside knowledge dirs', {
entryId: entry.id,
oldPath: entry.sourceFile,
});
return;
}
if (!fs.existsSync(oldPath)) {
return;
}
// 防止误删目录(如 .xcdatamodeld 包)
try {
const stat = fs.statSync(oldPath);
if (!stat.isFile()) {
this.logger.warn('_cleanupOldFile skipped: not a regular file', {
entryId: entry.id,
oldPath: entry.sourceFile,
});
return;
}
}
catch {
return;
}
pathGuard.assertSafe(oldPath);
fs.unlinkSync(oldPath);
this.logger.info('Cleaned up old knowledge entry file', {
entryId: entry.id,
oldPath: entry.sourceFile,
});
}
/** 通过 id 扫描所有 .md 文件来删除 */
_removeByIdScan(id) {
for (const baseDir of [this.candidatesDir, this.recipesDir]) {
if (!fs.existsSync(baseDir)) {
continue;
}
try {
const found = _walkAndRemoveById(baseDir, id);
if (found) {
this.logger.info('Knowledge entry file removed by id scan', { id });
return true;
}
}
catch {
/* ignore scan errors */
}
}
return false;
}
}
/* ═══════════════════════════════════════════════════════════
* 公共工具函数
* ═══════════════════════════════════════════════════════════ */
/**
* 计算 .md 内容的 SHA-256 hash(去除 _content_hash 行后)
* @returns 16 字符 hex
*/
export function computeKnowledgeHash(content) {
const cleaned = content.replace(/^_contentHash:.*\n?/m, '').trim();
return createHash('sha256').update(cleaned, 'utf8').digest('hex').slice(0, 16);
}
/**
* 从 .md 内容解析为 wire format JSON
* 返回值可直接 KnowledgeEntry.fromJSON(data) 构造实体
*
* @param content .md 文件全文
* @param [relPath] 相对路径(用于溯源)
* @returns wire format JSON
*/
export function parseKnowledgeMarkdown(content, relPath) {
const fmMatch = content.match(/^---\s*\r?\n([\s\S]*?)\r?\n---/);
const data = {};
if (fmMatch) {
const fmLines = fmMatch[1].split('\n');
for (let i = 0; i < fmLines.length; i++) {
const line = fmLines[i];
const colonIdx = line.indexOf(':');
if (colonIdx <= 0) {
continue;
}
const key = line.slice(0, colonIdx).trim();
// 跳过带空格的非正常 key
if (/\s/.test(key)) {
continue;
}
let value = line.slice(colonIdx + 1).trim();
// ── _ 前缀字段:统一去掉 _ 前缀存入 data ──
if (key.startsWith('_')) {
const dataKey = key.slice(1); // _content → content, _ai_insight → ai_insight
// JSON 对象/数组值
if (value.startsWith('{') || value.startsWith('[')) {
try {
data[dataKey] = JSON.parse(value);
continue;
}
catch {
// 可能是跨多行的 JSON — 尝试拼接后续行
let jsonStr = value;
while (i + 1 < fmLines.length) {
i++;
jsonStr += fmLines[i];
try {
data[dataKey] = JSON.parse(jsonStr);
break;
}
catch {
/* continue concatenating */
}
}
continue;
}
}
// JSON 字符串值(如 _ai_insight: "text")
if (value.startsWith('"')) {
try {
data[dataKey] = JSON.parse(value);
continue;
}
catch {
/* fall through to plain string */
}
}
// 纯标量值(如 _content_hash: abc123)
if (/^\d+$/.test(value)) {
data[dataKey] = parseInt(value, 10);
continue;
}
if (/^\d+\.\d+$/.test(value)) {
data[dataKey] = parseFloat(value);
continue;
}
if (value === 'true') {
data[dataKey] = true;
continue;
}
if (value === 'false') {
data[dataKey] = false;
continue;
}
data[dataKey] = value;
continue;
}
// ── 布尔 ──
if (value === 'true') {
data[key] = true;
continue;
}
if (value === 'false') {
data[key] = false;
continue;
}
// ── 数值(整数或浮点) ──
if (/^\d+$/.test(value)) {
data[key] = parseInt(value, 10);
continue;
}
if (/^\d+\.\d+$/.test(value)) {
data[key] = parseFloat(value);
continue;
}
// ── JSON 数组(非 _ 前缀) ──
if (value.startsWith('[')) {
try {
data[key] = JSON.parse(value);
continue;
}
catch {
/* fallthrough */
}
}
// ── 去引号 ──
if (/^".*"$/.test(value)) {
value = value.slice(1, -1).replace(/\\"/g, '"').replace(/\\n/g, '\n');
}
data[key] = value;
}
}
// ── 从 body 提取信息 ──
const bodyMatch = content.match(/^---[\s\S]*?---\s*\r?\n([\s\S]*)$/);
if (bodyMatch) {
const body = bodyMatch[1].trim();
// 如果 content 中没有 pattern,从 body 代码块提取
const contentObj = (data.content || {});
if (!contentObj.pattern) {
const codeMatch = body.match(/```\w*\n([\s\S]*?)```/);
if (codeMatch) {
data.content = data.content || {};
data.content.pattern = codeMatch[1].trimEnd();
}
}
// 如果 content 中没有 markdown 且 body 看起来是 Markdown 文章
if (!contentObj.markdown && !contentObj.pattern) {
const isMarkdownArticle = body.includes('— 项目特写') || (body.startsWith('#') && body.length > 200);
if (isMarkdownArticle) {
data.content = data.content || {};
data.content.markdown = body;
}
}
}
// ── 元数据补充 ──
if (relPath) {
data.sourceFile = relPath;
}
// ── fallback: title 从 body heading 提取 ──
if (!data.title) {
const headingMatch = content.match(/^##?\s+(.+)$/m);
if (headingMatch) {
data.title = headingMatch[1].trim();
}
}
return data;
}
/* ═══ 私有辅助 ═══════════════════════════════════════════ */
/**
* 生成文件名 slug
* @returns 文件名(含 .md 后缀)
*/
function _slugFilename(trigger, title, id) {
// 优先用 trigger
if (trigger) {
const clean = trigger
.replace(/^@/, '')
.replace(/[^a-zA-Z0-9_-]/g, '_')
.slice(0, 60);
if (clean.length >= 2) {
return `${clean}.md`;
}
}
// 其次用 title
if (title) {
const slug = title
.toLowerCase()
.replace(/[^\p{L}\p{N}\s-]/gu, '')
.replace(/\s+/g, '-')
.replace(/-{2,}/g, '-')
.replace(/^-|-$/g, '')
.slice(0, 60);
if (slug.length >= 3) {
return `${slug}.md`;
}
}
// 最后用 id 前 8 位
return `${(id || 'unknown').slice(0, 8)}.md`;
}
/** 将 YAML 值安全序列化 */
function _yamlValue(key, val) {
if (typeof val === 'number' || typeof val === 'boolean') {
return String(val);
}
const str = String(val);
// 含特殊字符时加引号
if (/[:#[\]{}&*!|>'"`,@\n]/.test(str) || str.trim() !== str) {
return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n')}"`;
}
return str;
}
/** 递归扫描目录,删除包含指定 id 的 .md 文件 */
function _walkAndRemoveById(dir, id) {
if (!fs.existsSync(dir)) {
return false;
}
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
if (_walkAndRemoveById(full, id)) {
return true;
}
}
else if (entry.name.endsWith('.md') && !entry.name.startsWith('_')) {
const head = fs.readFileSync(full, 'utf8').slice(0, 500);
if (head.includes(`id: ${id}`)) {
pathGuard.assertSafe(full);
fs.unlinkSync(full);
return true;
}
}
}
return false;
}
export default KnowledgeFileWriter;