UNPKG

autosnippet

Version:

Extract code patterns into a knowledge base for AI coding assistants

164 lines (163 loc) 5.72 kB
/** * TopicClassifier — 按主题分组 KnowledgeEntry * * 将 kind='pattern' 的知识条目按主题分组,用于 Channel B(Smart Rules)。 * 每个主题对应一个 .mdc 文件,设 alwaysApply: false + 丰富的 description。 */ /** * 主题定义 * - dimensions: 关联的 bootstrap 维度 * - keywords: 用于匹配 entry 分类的关键词 * - descriptionKeywords: 用于 .mdc description 字段的关键词(Agent 关联性判断依据) */ const TOPIC_MAP = { networking: { dimensions: ['event-and-data-flow'], descriptionKeywords: 'network, HTTP, API, request, response, URL, fetch, socket, REST, download, upload, error handling, retry, timeout', }, ui: { dimensions: ['code-pattern'], descriptionKeywords: 'view, controller, UI, layout, animation, cell, table, collection, button, scroll, navigation, auto layout, gesture, storyboard', }, data: { dimensions: ['code-pattern', 'architecture'], descriptionKeywords: 'model, storage, database, cache, CoreData, Realm, SQLite, keychain, UserDefaults, JSON, parsing, serialization, persistence', }, architecture: { dimensions: ['architecture', 'best-practice'], descriptionKeywords: 'singleton, delegate, factory, observer, protocol, manager, service, dependency injection, module, MVVM, MVC, coordinator, router, design pattern', }, conventions: { dimensions: ['code-standard'], descriptionKeywords: 'naming, format, style, import, header, prefix, convention, documentation, file organization, constants, enum, typedef, pragma mark', }, }; export class TopicClassifier { projectName; /** @param projectName 项目名称 */ constructor(projectName = 'Project') { this.projectName = projectName; } /** * 将 patterns 按主题分组 * @param entries KnowledgeEntry 数组 (kind='pattern') * @returns { topic: [entries] } */ group(entries) { const grouped = {}; const unmatched = []; for (const entry of entries) { const topic = this._classifyEntry(entry); if (topic) { if (!grouped[topic]) { grouped[topic] = []; } grouped[topic].push(entry); } else { unmatched.push(entry); } } // 未匹配的归入 'general' — 但只在有内容时 if (unmatched.length > 0) { grouped.general = unmatched; } return grouped; } /** * 为主题构建 description — Agent 判断关联性的唯一依据 * * v3: 动态丰富化 — 除硬编码 baseKeywords 外,还从 entries 的 * tags, whenClause, trigger 中提取高价值关键词,让 Cursor 的 * description 匹配更精准。 */ buildDescription(topic, entries) { const topicDef = TOPIC_MAP[topic]; const baseKeywords = topicDef ? topicDef.descriptionKeywords : entries .map((e) => e.title || '') .filter(Boolean) .join(', '); // 从 entries 提取动态关键词(tags + whenClause + title/description) const entryKeywords = entries .flatMap((e) => this._extractKeywords(e)) .filter(Boolean); // 从 tags 直接提取(tags 本身就是高质量关键词) const tagKeywords = entries .flatMap((e) => e.tags || []) .map((t) => t.toLowerCase()) .filter((t) => t.length >= 2 && !STOP_WORDS.has(t)); // 从 whenClause 提取(包含触发场景信息) const whenKeywords = entries .flatMap((e) => (e.whenClause || '').match(/[a-zA-Z]{3,}/g) || []) .map((w) => w.toLowerCase()) .filter((w) => !STOP_WORDS.has(w)); const allExtra = [...new Set([...tagKeywords, ...whenKeywords, ...entryKeywords])].slice(0, 15); const extra = allExtra.length > 0 ? `, ${allExtra.join(', ')}` : ''; return `${this._topicLabel(topic)} patterns for ${this.projectName}${baseKeywords}${extra}. Use when writing or reviewing ${this._topicLabel(topic).toLowerCase()}-related code.`; } /** * 分类单个 entry 到主题 — 直读 AI 预计算的 topicHint */ _classifyEntry(entry) { return entry.topicHint || null; // AI 没给 → null → 归入 general } /** * 从 entry 提取关键词 */ _extractKeywords(entry) { const text = `${entry.title || ''} ${entry.description || ''}`; // 提取英文关键词(3+ 字母) const words = text.match(/[a-zA-Z]{3,}/g) || []; const filtered = words.map((w) => w.toLowerCase()).filter((w) => !STOP_WORDS.has(w)); return [...new Set(filtered)].slice(0, 5); } _topicLabel(topic) { const labels = { networking: 'Networking', ui: 'UI', data: 'Data', architecture: 'Architecture', conventions: 'Conventions', general: 'General', }; return (labels[topic] || topic.charAt(0).toUpperCase() + topic.slice(1)); } } const STOP_WORDS = new Set([ 'the', 'and', 'for', 'this', 'that', 'with', 'from', 'use', 'using', 'when', 'not', 'all', 'are', 'has', 'have', 'been', 'will', 'can', 'should', 'must', 'may', 'each', 'which', 'their', 'your', 'its', 'project', 'code', 'file', 'class', 'method', 'function', 'bootstrap', ]); export default TopicClassifier;