autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
625 lines (624 loc) • 22.5 kB
JavaScript
/**
* @module CallSiteExtractor
* @description Phase 5: 从 AST 中提取调用点 (Call Sites)
*
* 采用 Post-walk extraction(方案 B):在 walker 的 walk() 完成后,
* 通过二次遍历提取调用点,零修改现有 walker 逻辑。
*
* 职责:
* - 从 statement_block/block 中提取 call_expression / new_expression
* - 解析 callee、receiver、callType、argCount 等
* - 关联到所在的 className + methodName (上下文推断)
*
* 支持语言:
* - TypeScript / JavaScript / TSX (P0)
* - Python (P0)
* - Go / Java / Kotlin (P1 — via lang plugin extractCallSites)
*/
// ── TypeScript / JavaScript / TSX ──────────────────────────
/**
* 从 TS/JS AST root 中提取所有调用点
* 使用 post-walk 策略,遍历已由 walker 收集的 methods/classes 来定位方法体,
* 然后从方法体中递归提取 call_expression / new_expression。
*
* @param root AST root 节点
* @param ctx walker context (含 classes, methods, callSites, references 等)
* @param lang 语言标识
*/
export function extractCallSitesTS(root, ctx, lang) {
// 收集所有 function/method body 节点与其上下文
const scopes = _collectTSScopes(root);
for (const scope of scopes) {
_extractCallSitesFromBody(scope.body, scope.className, scope.methodName, ctx);
}
}
/**
* 收集 TS/JS 中所有函数/方法作用域
* 遍历 AST 找到 function_declaration / method_definition / arrow_function 等,
* 以及它们对应的 statement_block 和上下文信息。
*
* @returns >}
*/
function _collectTSScopes(root) {
const scopes = [];
function walk(node, className) {
for (let i = 0; i < node.namedChildCount; i++) {
const child = node.namedChild(i);
if (!child) {
continue;
}
switch (child.type) {
case 'class_declaration':
case 'abstract_class_declaration': {
const name = child.namedChildren.find((c) => c.type === 'type_identifier' || c.type === 'identifier')?.text || null;
const body = child.namedChildren.find((c) => c.type === 'class_body');
if (body && name) {
walk(body, name);
}
break;
}
case 'method_definition': {
const name = child.namedChildren.find((c) => c.type === 'property_identifier' ||
c.type === 'identifier' ||
c.type === 'computed_property_name')?.text || 'unknown';
const body = child.namedChildren.find((c) => c.type === 'statement_block');
if (body) {
scopes.push({ body, className, methodName: name });
}
break;
}
case 'function_declaration': {
const name = child.namedChildren.find((c) => c.type === 'identifier')?.text ||
'unknown';
const body = child.namedChildren.find((c) => c.type === 'statement_block');
if (body) {
scopes.push({ body, className, methodName: name });
}
break;
}
case 'lexical_declaration':
case 'variable_declaration': {
// const foo = () => { ... }
for (const decl of child.namedChildren) {
if (decl.type === 'variable_declarator') {
const nameNode = decl.namedChildren.find((c) => c.type === 'identifier');
const valueNode = decl.namedChildren.find((c) => c.type === 'arrow_function' || c.type === 'function');
if (nameNode && valueNode) {
const body = valueNode.namedChildren.find((c) => c.type === 'statement_block');
if (body) {
scopes.push({ body, className, methodName: nameNode.text });
}
}
}
}
break;
}
case 'export_statement': {
// export 下面可能有 function_declaration / class_declaration
walk(child, className);
break;
}
default: {
// 递归进入其他有子节点的容器(但避免进入函数体)
if (child.namedChildCount > 0 &&
!['statement_block', 'function_body', 'template_string'].includes(child.type)) {
walk(child, className);
}
}
}
}
}
walk(root, null);
return scopes;
}
/**
* 从方法体中递归提取调用点
*
* @param bodyNode statement_block 节点
* @param className 所在类名
* @param methodName 所在方法名
* @param ctx walker context
*/
function _extractCallSitesFromBody(bodyNode, className, methodName, ctx) {
if (!bodyNode) {
return;
}
function walk(node, isAwaited) {
// 跳过语法错误节点 (Issue #17: 防御性处理)
if (!node || node.type === 'ERROR' || node.isMissing) {
return;
}
// await expression → 标记下一层的调用为 awaited
if (node.type === 'await_expression') {
for (let i = 0; i < node.namedChildCount; i++) {
const c = node.namedChild(i);
if (c) {
walk(c, true);
}
}
return;
}
if (node.type === 'call_expression') {
const callSite = _parseTSCallExpression(node, className, methodName, isAwaited);
if (callSite) {
ctx.callSites.push(callSite);
}
// 继续遍历参数中的嵌套调用
const args = node.namedChildren.find((c) => c.type === 'arguments');
if (args) {
for (let i = 0; i < args.namedChildCount; i++) {
const c = args.namedChild(i);
if (c) {
walk(c, false);
}
}
}
return;
}
if (node.type === 'new_expression') {
const ctor = node.namedChildren.find((c) => c.type === 'identifier' || c.type === 'member_expression');
if (ctor) {
ctx.callSites.push({
callee: ctor.text,
callerMethod: methodName,
callerClass: className,
callType: 'constructor',
receiver: null,
receiverType: ctor.text,
argCount: _countArgs(node),
line: node.startPosition.row + 1,
isAwait: isAwaited,
});
}
// 继续遍历参数中的嵌套调用
const args = node.namedChildren.find((c) => c.type === 'arguments');
if (args) {
for (let i = 0; i < args.namedChildCount; i++) {
const c = args.namedChild(i);
if (c) {
walk(c, false);
}
}
}
return;
}
// JSX/TSX: 组件渲染视为调用点 (Issue #13)
// <MyComponent /> 或 <MyComponent>...</MyComponent> → 视为 constructor 调用
if (node.type === 'jsx_self_closing_element' || node.type === 'jsx_opening_element') {
const tagNode = node.namedChildren.find((c) => c.type === 'identifier' || c.type === 'jsx_identifier') ||
node.namedChildren.find((c) => c.type === 'member_expression' || c.type === 'jsx_member_expression');
if (tagNode) {
const tagName = tagNode.text;
// 仅大写开头为组件 (小写为 HTML 原生标签如 div, span)
if (tagName && /^[A-Z]/.test(tagName)) {
// 计算 JSX 属性数量作为 argCount
const attrNodes = node.namedChildren.filter((c) => c.type === 'jsx_attribute');
ctx.callSites.push({
callee: tagName,
callerMethod: methodName,
callerClass: className,
callType: 'constructor',
receiver: null,
receiverType: tagName,
argCount: attrNodes.length,
line: node.startPosition.row + 1,
isAwait: false,
});
}
}
// 继续遍历 JSX 表达式中的嵌套调用 (如 onClick={handleClick()})
for (let i = 0; i < node.namedChildCount; i++) {
const child = node.namedChild(i);
if (!child) {
continue;
}
if (child.type === 'jsx_attribute') {
// 属性值中可能有嵌套调用: onClick={doSomething()}
for (let j = 0; j < child.namedChildCount; j++) {
const c = child.namedChild(j);
if (c) {
walk(c, false);
}
}
}
}
return;
}
// 递归子节点
for (let i = 0; i < node.namedChildCount; i++) {
const c = node.namedChild(i);
if (c) {
walk(c, false);
}
}
}
walk(bodyNode, false);
}
/**
* 解析 TS/JS 的 call_expression 节点
*
* @param node call_expression 节点
* @param className 所在类名
* @param methodName 所在方法名
* @param isAwaited 是否被 await
*/
function _parseTSCallExpression(node, className, methodName, isAwaited) {
const func = node.namedChildren[0]; // call_expression 的第一个子节点是被调用者
if (!func) {
return null;
}
let callee;
let receiver = null;
let receiverType = null;
let callType;
if (func.type === 'member_expression') {
// obj.method() — method call
const object = func.namedChildren.find((c) => c.type !== 'property_identifier');
const prop = func.namedChildren.find((c) => c.type === 'property_identifier');
receiver = object?.text || null;
callee = prop?.text || func.text;
callType = 'method';
// 推断 receiverType
if (receiver === 'this' || receiver === 'self') {
receiverType = className;
}
else if (receiver === 'super') {
callType = 'super';
receiverType = className; // 需要 CHA 进一步解析到父类
}
else if (receiver && /^[A-Z]/.test(receiver)) {
// 静态调用推断 e.g. UserService.create()
receiverType = receiver;
callType = 'static';
}
else if (receiver?.startsWith('this.')) {
// this.xxx.method() → xxx 可能是注入的 field
receiverType = null; // 后续由 CallEdgeResolver 从 properties 解析
}
}
else if (func.type === 'identifier') {
// foo() — function call
callee = func.text;
callType = 'function';
}
else if (func.type === 'super') {
// super() — constructor call
callee = 'super';
callType = 'super';
receiverType = className;
}
else {
// 复杂表达式调用 (e.g. getFactory()(), callback())
callee = func.text?.slice(0, 80) || 'unknown';
callType = 'function';
}
// 过滤噪声:跳过常见的内置/工具调用
if (_isNoiseCall(callee, receiver)) {
return null;
}
return {
callee,
callerMethod: methodName,
callerClass: className,
callType,
receiver,
receiverType,
argCount: _countArgs(node),
line: node.startPosition.row + 1,
isAwait: isAwaited,
};
}
// ── Python ─────────────────────────────────────────────────
/**
* 从 Python AST root 中提取所有调用点
*
* @param root AST root 节点
* @param ctx walker context
* @param lang 语言标识
*/
export function extractCallSitesPython(root, ctx, lang) {
const scopes = _collectPyScopes(root);
for (const scope of scopes) {
_extractPyCallSitesFromBody(scope.body, scope.className, scope.methodName, ctx);
}
}
/**
* 收集 Python 中所有函数/方法作用域
*
* @returns >}
*/
function _collectPyScopes(root) {
const scopes = [];
function walk(node, className) {
for (let i = 0; i < node.namedChildCount; i++) {
const child = node.namedChild(i);
if (!child) {
continue;
}
switch (child.type) {
case 'class_definition': {
const name = child.namedChildren.find((c) => c.type === 'identifier')?.text || null;
const body = child.namedChildren.find((c) => c.type === 'block');
if (body && name) {
walk(body, name);
}
break;
}
case 'function_definition': {
const name = child.namedChildren.find((c) => c.type === 'identifier')?.text ||
'unknown';
const body = child.namedChildren.find((c) => c.type === 'block');
if (body) {
scopes.push({ body, className, methodName: name });
}
break;
}
case 'decorated_definition': {
// decorator 后面跟着 function_definition 或 class_definition
const actualDef = child.namedChildren.find((c) => c.type === 'class_definition' || c.type === 'function_definition');
if (actualDef?.type === 'class_definition') {
const name = actualDef.namedChildren.find((c) => c.type === 'identifier')?.text ||
null;
const body = actualDef.namedChildren.find((c) => c.type === 'block');
if (body && name) {
walk(body, name);
}
}
else if (actualDef?.type === 'function_definition') {
const name = actualDef.namedChildren.find((c) => c.type === 'identifier')?.text ||
'unknown';
const body = actualDef.namedChildren.find((c) => c.type === 'block');
if (body) {
scopes.push({ body, className, methodName: name });
}
}
break;
}
default: {
if (child.namedChildCount > 0 && child.type !== 'block') {
walk(child, className);
}
}
}
}
}
walk(root, null);
return scopes;
}
/**
* 从 Python 方法体中递归提取调用点
*
* @param bodyNode block 节点
*/
function _extractPyCallSitesFromBody(bodyNode, className, methodName, ctx) {
if (!bodyNode) {
return;
}
function walk(node, isAwaited) {
// 跳过语法错误节点 (Issue #17: 防御性处理)
if (!node || node.type === 'ERROR' || node.isMissing) {
return;
}
if (node.type === 'await') {
for (let i = 0; i < node.namedChildCount; i++) {
const c = node.namedChild(i);
if (c) {
walk(c, true);
}
}
return;
}
if (node.type === 'call') {
const callSite = _parsePyCallExpression(node, className, methodName, isAwaited);
if (callSite) {
ctx.callSites.push(callSite);
}
// 继续遍历参数中的嵌套调用
const argList = node.namedChildren.find((c) => c.type === 'argument_list');
if (argList) {
for (let i = 0; i < argList.namedChildCount; i++) {
const c = argList.namedChild(i);
if (c) {
walk(c, false);
}
}
}
return;
}
// 递归子节点
for (let i = 0; i < node.namedChildCount; i++) {
const c = node.namedChild(i);
if (c) {
walk(c, false);
}
}
}
walk(bodyNode, false);
}
/**
* 解析 Python 的 call 节点
*
* @param node call 节点
*/
function _parsePyCallExpression(node, className, methodName, isAwaited) {
// Python call 节点: function 是第一个 named child
const func = node.namedChildren[0];
if (!func) {
return null;
}
let callee;
let receiver = null;
let receiverType = null;
let callType;
if (func.type === 'attribute') {
// obj.method() — method call
const object = func.namedChildren.find((c) => c.type !== 'identifier' || c === func.namedChildren[0]);
const prop = func.namedChildren.find((c) => c.type === 'identifier' && c !== func.namedChildren[0]);
// attribute 节点结构: object.attribute — 第一个子节点是 object, 第二个是 attribute name
const parts = func.text.split('.');
if (parts.length >= 2) {
receiver = parts.slice(0, -1).join('.');
callee = parts[parts.length - 1];
}
else {
receiver = object?.text || null;
callee = prop?.text || func.text;
}
callType = 'method';
// 推断 receiverType
if (receiver === 'self') {
receiverType = className;
}
else if (receiver === 'super()') {
callType = 'super';
receiverType = className;
}
else if (receiver && /^[A-Z]/.test(receiver)) {
receiverType = receiver;
callType = 'static';
}
}
else if (func.type === 'identifier') {
callee = func.text;
// Python: 大写开头通常是类/构造函数
if (/^[A-Z]/.test(callee)) {
callType = 'constructor';
receiverType = callee;
}
else {
callType = 'function';
}
}
else {
callee = func.text?.slice(0, 80) || 'unknown';
callType = 'function';
}
// 过滤噪声
if (_isNoiseCall(callee, receiver)) {
return null;
}
return {
callee,
callerMethod: methodName,
callerClass: className,
callType,
receiver,
receiverType,
argCount: _countPyArgs(node),
line: node.startPosition.row + 1,
isAwait: isAwaited,
};
}
// ── 通用提取器注册 ─────────────────────────────────────────
const _extractors = new Map([
['typescript', extractCallSitesTS],
['tsx', extractCallSitesTS],
['javascript', extractCallSitesTS],
['python', extractCallSitesPython],
]);
/** 获取特定语言的 CallSite 提取器 */
export function getCallSiteExtractor(lang) {
return _extractors.get(lang) || null;
}
/**
* 默认的 CallSite 提取器 — 用于无专门提取器的语言
* 使用通用的 call_expression 匹配策略
*/
export function defaultExtractCallSites(root, ctx, lang) {
// 对于未适配的语言,暂不提取(降级为空)
// Phase 5.1 将逐步增加 Go / Rust / Java / Kotlin 等
}
// ── 工具函数 ───────────────────────────────────────────────
/** 计算参数数量 (TS/JS) */
function _countArgs(node) {
const args = node.namedChildren.find((c) => c.type === 'arguments');
if (!args) {
return 0;
}
return args.namedChildCount;
}
/** 计算参数数量 (Python) */
function _countPyArgs(node) {
const args = node.namedChildren.find((c) => c.type === 'argument_list');
if (!args) {
return 0;
}
return args.namedChildCount;
}
/** 判断是否为噪声调用(内置/console/日志等,不产生有意义的调用边) */
function _isNoiseCall(callee, receiver) {
// 常见内置调用噪声
const NOISE_RECEIVERS = new Set([
'console',
'Math',
'JSON',
'Object',
'Array',
'String',
'Number',
'Boolean',
'Date',
'RegExp',
'Promise',
'Set',
'Map',
'WeakMap',
'WeakSet',
'Symbol',
'Reflect',
'Proxy',
'parseInt',
'parseFloat',
]);
const NOISE_CALLEES = new Set([
'require',
'import',
'console',
'log',
'warn',
'error',
'info',
'debug',
'setTimeout',
'setInterval',
'clearTimeout',
'clearInterval',
'requestAnimationFrame',
'cancelAnimationFrame',
'alert',
'confirm',
'prompt',
'print',
'len',
'range',
'enumerate',
'zip',
'map',
'filter',
'isinstance',
'issubclass',
'hasattr',
'getattr',
'setattr',
'str',
'int',
'float',
'bool',
'list',
'dict',
'tuple',
'set',
'type',
'super',
'property',
'staticmethod',
'classmethod',
]);
if (receiver && NOISE_RECEIVERS.has(receiver)) {
return true;
}
if (callee && NOISE_CALLEES.has(callee)) {
return true;
}
return false;
}