autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
346 lines (345 loc) • 12.5 kB
JavaScript
/**
* RoleRefiner — 四重信号融合角色精化
*
* 将 TargetClassifier 的正则推断 (~65% 准确率) 提升到 ≥90%,
* 通过融合 AST 结构、CallGraph 行为、DataFlow 数据流、EntityGraph 拓扑四重信号。
*
* 信号权重:
* AST 结构 0.30 继承链/协议/import/后缀
* CallGraph 行为 0.30 被调用分析/扇入扇出比/调用类型
* DataFlow 数据流 0.15 源汇分析/转换检测
* EntityGraph 拓扑 0.10 入度分析/模式检测
* 正则基线 0.15 TargetClassifier 结果
*
* @module RoleRefiner
*/
import { LanguageProfiles } from '#shared/LanguageProfiles.js';
/* ═══ Constants ═══════════════════════════════════════════ */
const WEIGHTS = {
ast: 0.3,
callGraph: 0.3,
dataFlow: 0.15,
entityGraph: 0.1,
regex: 0.15,
};
/**
* 配置文件层级名 → 模块角色映射
* 当配置文件声明了层级时(如 Boxfile 的 layer),
* 层级名是判断模块角色的强信号
*/
const CONFIG_LAYER_TO_ROLE = {
vendors: 'utility',
vendor: 'utility',
basics: 'core',
basic: 'core',
foundation: 'core',
core: 'core',
services: 'service',
service: 'service',
components: 'feature',
component: 'feature',
accessories: 'feature',
accessory: 'feature',
underlays: 'feature',
application: 'app',
app: 'app',
ui: 'ui',
networking: 'networking',
network: 'networking',
storage: 'storage',
model: 'model',
test: 'test',
tests: 'test',
};
/* ═══ RoleRefiner Class ═══════════════════════════════════ */
export class RoleRefiner {
#bootstrapRepo;
#entityRepo;
#edgeRepo;
#projectRoot;
#families = null;
#superclassMap = null;
#protocolMap = null;
#importPatterns = null;
constructor(bootstrapRepo, entityRepo, edgeRepo, projectRoot) {
this.#bootstrapRepo = bootstrapRepo;
this.#entityRepo = entityRepo;
this.#edgeRepo = edgeRepo;
this.#projectRoot = projectRoot;
}
/** 检测项目语言族,基于 bootstrap_snapshots.primary_lang */
async #detectFamilies() {
if (this.#families) {
return this.#families;
}
const primaryLang = await this.#bootstrapRepo.getLatestPrimaryLang(this.#projectRoot);
this.#families = LanguageProfiles.resolveFamilies(primaryLang);
return this.#families;
}
/** 构建当前项目语言族的超类合并映射 */
async #getSuperclassMap() {
if (this.#superclassMap) {
return this.#superclassMap;
}
this.#superclassMap = LanguageProfiles.superclassRoles(await this.#detectFamilies());
return this.#superclassMap;
}
/** 构建当前项目语言族的协议合并映射 */
async #getProtocolMap() {
if (this.#protocolMap) {
return this.#protocolMap;
}
this.#protocolMap = LanguageProfiles.protocolRoles(await this.#detectFamilies());
return this.#protocolMap;
}
/** 构建当前项目语言族的 import 模式列表 */
async #getImportPatterns() {
if (this.#importPatterns) {
return this.#importPatterns;
}
this.#importPatterns = LanguageProfiles.importRolePatterns(await this.#detectFamilies());
return this.#importPatterns;
}
/**
* 精化单个模块的角色
*/
async refineRole(module) {
const signals = [];
// 1. AST 结构信号 (0.30)
signals.push(...(await this.#extractAstSignals(module)));
// 2. CallGraph 行为信号 (0.30)
signals.push(...(await this.#extractCallSignals(module)));
// 3. DataFlow 数据流信号 (0.15)
signals.push(...(await this.#extractFlowSignals(module)));
// 4. EntityGraph 拓扑信号 (0.10)
signals.push(...(await this.#extractTopoSignals(module)));
// 4.5. 配置层级信号 — 来自 Boxfile/Tuist 等配置文件的 layer 声明
if (module.configLayer) {
const layerRole = CONFIG_LAYER_TO_ROLE[module.configLayer.toLowerCase()];
if (layerRole) {
signals.push({
role: layerRole,
confidence: 0.85,
weight: WEIGHTS.ast, // 与 AST 等权重级别 (0.30)
source: 'config-layer',
});
}
}
// 5. 正则基线 (0.15)
signals.push({
role: module.inferredRole,
confidence: 0.5,
weight: WEIGHTS.regex,
source: 'regex-baseline',
});
// 加权投票
const roleScores = {};
for (const signal of signals) {
roleScores[signal.role] = (roleScores[signal.role] ?? 0) + signal.confidence * signal.weight;
}
const sorted = Object.entries(roleScores).sort((a, b) => b[1] - a[1]);
if (sorted.length === 0) {
return {
refinedRole: module.inferredRole,
confidence: 0,
resolution: 'fallback',
signals,
};
}
const [topRole, topScore] = sorted[0];
const secondScore = sorted[1]?.[1] ?? 0;
// 冲突解决
if (topScore > 0.7) {
return {
refinedRole: topRole,
confidence: Math.min(topScore, 1),
resolution: 'clear',
signals,
};
}
if (topScore - secondScore < 0.1) {
return {
refinedRole: topRole,
confidence: Math.min(topScore, 1),
resolution: 'uncertain',
alternatives: sorted.slice(0, 3),
signals,
};
}
return {
refinedRole: topRole,
confidence: Math.min(topScore, 1),
resolution: topScore > 0.4 ? 'clear' : 'fallback',
signals,
};
}
/**
* 批量精化所有模块
*/
async refineAll(modules) {
const result = new Map();
for (const m of modules) {
result.set(m.name, await this.refineRole(m));
}
return result;
}
/* ─── Signal Extractors ──────────────────────────── */
/** AST 结构信号: 继承链、协议、import */
async #extractAstSignals(module) {
const signals = [];
const filePaths = module.files;
if (filePaths.length === 0) {
return signals;
}
// 查询模块内实体的继承关系
const entities = await this.#entityRepo.findByProjectAndFilePaths(this.#projectRoot, filePaths);
const roleCounts = {};
const superclassMap = await this.#getSuperclassMap();
const protocolMap = await this.#getProtocolMap();
for (const entity of entities) {
// 继承链推断
const superclass = entity.superclass;
if (superclass && superclassMap[superclass]) {
const role = superclassMap[superclass];
roleCounts[role] = (roleCounts[role] ?? 0) + 1;
}
// 协议推断
const protocols = entity.protocols ?? [];
for (const proto of protocols) {
if (protocolMap[proto]) {
const role = protocolMap[proto];
roleCounts[role] = (roleCounts[role] ?? 0) + 0.5;
}
}
}
// import 模式推断
const imports = await this.#edgeRepo.findOutgoingByRelation(module.name, 'depends_on');
for (const imp of imports) {
const depName = imp.toId.toLowerCase();
for (const pat of await this.#getImportPatterns()) {
if (pat.regex.test(depName)) {
roleCounts[pat.role] = (roleCounts[pat.role] ?? 0) + 0.5;
}
}
}
// 转换为信号
const totalSignals = Object.values(roleCounts).reduce((a, b) => a + b, 0);
if (totalSignals > 0) {
for (const [role, count] of Object.entries(roleCounts)) {
signals.push({
role: role,
confidence: Math.min(count / totalSignals, 1),
weight: WEIGHTS.ast,
source: 'ast-structure',
});
}
}
return signals;
}
/** CallGraph 行为信号: 调用流向分析 */
async #extractCallSignals(module) {
const signals = [];
const filePaths = module.files;
if (filePaths.length === 0) {
return signals;
}
// fan-out: 模块内实体调用外部
const fanOut = await this.#edgeRepo.countEdgesJoinedByEntityFiles(this.#projectRoot, filePaths, 'calls', 'from');
// fan-in: 外部调用模块内实体
const fanIn = await this.#edgeRepo.countEdgesJoinedByEntityFiles(this.#projectRoot, filePaths, 'calls', 'to');
if (fanIn + fanOut === 0) {
return signals;
}
const ratio = fanIn / (fanIn + fanOut);
// 高被调用 → 偏 core/service (被依赖)
// 高调用 → 偏 app/ui (消费者)
if (ratio > 0.7) {
signals.push({
role: 'core',
confidence: ratio * 0.8,
weight: WEIGHTS.callGraph,
source: 'call-fanin-heavy',
});
}
else if (ratio < 0.3) {
signals.push({
role: 'ui',
confidence: (1 - ratio) * 0.6,
weight: WEIGHTS.callGraph,
source: 'call-fanout-heavy',
});
}
else {
signals.push({
role: 'service',
confidence: 0.5,
weight: WEIGHTS.callGraph,
source: 'call-balanced',
});
}
return signals;
}
/** DataFlow 数据流信号: 源/汇分析 */
async #extractFlowSignals(module) {
const signals = [];
const filePaths = module.files;
if (filePaths.length === 0) {
return signals;
}
// data_flow out (data producer)
const out = await this.#edgeRepo.countEdgesJoinedByEntityFiles(this.#projectRoot, filePaths, 'data_flow', 'from');
// data_flow in (data consumer)
const _in = await this.#edgeRepo.countEdgesJoinedByEntityFiles(this.#projectRoot, filePaths, 'data_flow', 'to');
if (out + _in === 0) {
return signals;
}
// 大量产出数据 → model/networking
if (out > _in * 2) {
signals.push({
role: 'model',
confidence: 0.6,
weight: WEIGHTS.dataFlow,
source: 'dataflow-producer',
});
}
// 大量消费数据 → ui
if (_in > out * 2) {
signals.push({
role: 'ui',
confidence: 0.5,
weight: WEIGHTS.dataFlow,
source: 'dataflow-consumer',
});
}
return signals;
}
/** EntityGraph 拓扑信号: 入度分析/模式检测 */
async #extractTopoSignals(module) {
const signals = [];
// 查模块下是否有 singleton / delegate 等设计模式
const patterns = await this.#edgeRepo.findPatternsUsedByEntities(this.#projectRoot, module.files);
for (const name of patterns) {
const lowerName = name?.toLowerCase();
if (!lowerName) {
continue;
}
if (lowerName === 'singleton') {
signals.push({
role: 'service',
confidence: 0.6,
weight: WEIGHTS.entityGraph,
source: 'pattern-singleton',
});
}
if (lowerName === 'delegate') {
signals.push({
role: 'ui',
confidence: 0.4,
weight: WEIGHTS.entityGraph,
source: 'pattern-delegate',
});
}
}
return signals;
}
}