autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
359 lines (358 loc) • 13.8 kB
JavaScript
/**
* @module lang-python
* @description Python AST Walker 插件
*
* 提取: class, function, import, decorator, docstring, module-level assignments
* 模式: Singleton, Factory, Context Manager, Decorator pattern, Data Class
*
* Phase 5: 新增 ImportRecord 结构化导入 + extractCallSites 调用点提取
*/
import { extractCallSitesPython } from '../analysis/CallSiteExtractor.js';
import { ImportRecord } from '../analysis/ImportRecord.js';
function walkPython(root, ctx) {
_walkPyNode(root, ctx, null);
}
function _walkPyNode(node, ctx, parentClassName) {
for (let i = 0; i < node.namedChildCount; i++) {
const child = node.namedChild(i);
switch (child.type) {
case 'import_statement': {
const modNode = child.namedChildren.find((c) => c.type === 'dotted_name');
if (modNode) {
// import mod / import mod as alias
const aliasNode = child.namedChildren.find((c) => c.type === 'aliased_import');
const alias = aliasNode?.namedChildren?.find((c) => c.type === 'identifier')?.text || null;
ctx.imports.push(new ImportRecord(modNode.text, {
symbols: ['*'],
alias: alias || modNode.text.split('.').pop(),
kind: 'namespace',
}));
}
break;
}
case 'import_from_statement': {
const modNode = child.namedChildren.find((c) => c.type === 'dotted_name' || c.type === 'relative_import');
if (modNode) {
const importPath = modNode.text;
// from mod import A, B, C
const importedNames = [];
for (const c of child.namedChildren) {
if (c.type === 'dotted_name' && c !== modNode) {
importedNames.push(c.text);
}
else if (c.type === 'aliased_import') {
const nameNode = c.namedChildren.find((n) => n.type === 'dotted_name' || n.type === 'identifier');
if (nameNode) {
importedNames.push(nameNode.text);
}
}
else if (c.type === 'wildcard_import') {
importedNames.push('*');
}
}
ctx.imports.push(new ImportRecord(importPath, {
symbols: importedNames.length > 0 ? importedNames : [],
kind: importedNames.includes('*') ? 'namespace' : 'named',
}));
}
break;
}
case 'class_definition': {
const classInfo = _parsePyClass(child);
ctx.classes.push(classInfo);
// 递归遍历类体
const body = child.namedChildren.find((c) => c.type === 'block');
if (body) {
_walkPyClassBody(body, ctx, classInfo.name);
}
break;
}
case 'function_definition': {
const funcInfo = _parsePyFunction(child, parentClassName);
ctx.methods.push(funcInfo);
break;
}
case 'decorated_definition': {
// decorated_definition 包含 decorator + 实际定义
const actualDef = child.namedChildren.find((c) => c.type === 'class_definition' || c.type === 'function_definition');
const decorators = child.namedChildren
.filter((c) => c.type === 'decorator')
.map((d) => d.text);
if (actualDef?.type === 'class_definition') {
const classInfo = _parsePyClass(actualDef);
classInfo.decorators = decorators;
if (decorators.some((d) => d.includes('dataclass'))) {
classInfo.isDataclass = true;
}
ctx.classes.push(classInfo);
const body = actualDef.namedChildren.find((c) => c.type === 'block');
if (body) {
_walkPyClassBody(body, ctx, classInfo.name);
}
}
else if (actualDef?.type === 'function_definition') {
const funcInfo = _parsePyFunction(actualDef, parentClassName);
funcInfo.decorators = decorators;
ctx.methods.push(funcInfo);
}
break;
}
case 'expression_statement': {
// 模块级赋值 → properties
if (!parentClassName) {
const assignNode = child.namedChildren.find((c) => c.type === 'assignment');
if (assignNode) {
const nameNode = assignNode.namedChildren.find((c) => c.type === 'identifier');
if (nameNode && /^[A-Z_][A-Z_0-9]*$/.test(nameNode.text)) {
ctx.properties.push({
name: nameNode.text,
className: null,
isModuleLevel: true,
line: child.startPosition.row + 1,
});
}
}
}
break;
}
default: {
if (child.namedChildCount > 0 && child.type !== 'block') {
_walkPyNode(child, ctx, parentClassName);
}
}
}
}
}
function _walkPyClassBody(body, ctx, className) {
for (let i = 0; i < body.namedChildCount; i++) {
const child = body.namedChild(i);
if (child.type === 'function_definition') {
const func = _parsePyFunction(child, className);
ctx.methods.push(func);
}
else if (child.type === 'decorated_definition') {
const actualDef = child.namedChildren.find((c) => c.type === 'function_definition');
const decorators = child.namedChildren
.filter((c) => c.type === 'decorator')
.map((d) => d.text);
if (actualDef) {
const func = _parsePyFunction(actualDef, className);
func.decorators = decorators;
if (decorators.some((d) => d.includes('staticmethod'))) {
func.isStaticMethod = true;
}
if (decorators.some((d) => d.includes('classmethod'))) {
func.isClassMethod = true;
}
if (decorators.some((d) => d.includes('property'))) {
// 作为属性处理
ctx.properties.push({
name: func.name,
className,
isProperty: true,
line: func.line,
});
}
ctx.methods.push(func);
}
}
else if (child.type === 'expression_statement') {
// 类级别赋值
const assign = child.namedChildren.find((c) => c.type === 'assignment');
if (assign) {
const nameNode = assign.namedChildren.find((c) => c.type === 'identifier');
if (nameNode) {
ctx.properties.push({
name: nameNode.text,
className,
line: child.startPosition.row + 1,
});
}
}
}
}
}
function _parsePyClass(node) {
const name = node.namedChildren.find((c) => c.type === 'identifier')?.text || 'Unknown';
const _superclass = null;
const protocols = [];
// bases
const argList = node.namedChildren.find((c) => c.type === 'argument_list');
if (argList) {
const bases = argList.namedChildren.filter((c) => c.type === 'identifier' || c.type === 'attribute');
for (let i = 0; i < bases.length; i++) {
const baseName = bases[i].text;
protocols.push(baseName);
}
}
// 约定: 第一个 base 可能是 superclass(如果不是 Protocol/ABC/Mixin)
let detectedSuper = null;
if (protocols.length > 0) {
const first = protocols[0];
if (!/Protocol$|ABC$|Mixin$|Base$/i.test(first)) {
detectedSuper = first;
}
}
return {
name,
kind: 'class',
superclass: detectedSuper,
protocols,
line: node.startPosition.row + 1,
endLine: node.endPosition.row + 1,
};
}
function _parsePyFunction(node, className) {
const name = node.namedChildren.find((c) => c.type === 'identifier')?.text || 'unknown';
const body = node.namedChildren.find((c) => c.type === 'block');
const bodyLines = body ? body.endPosition.row - body.startPosition.row + 1 : 0;
const complexity = body ? _estimateComplexity(body) : 1;
const nestingDepth = body ? _maxNesting(body, 0) : 0;
// 检查是否有 self 参数
const params = node.namedChildren.find((c) => c.type === 'parameters');
let hasSelf = false;
const isAsync = node.text.trimStart().startsWith('async');
if (params) {
const firstParam = params.namedChildren.find((c) => c.type === 'identifier');
hasSelf = firstParam?.text === 'self';
}
return {
name,
className,
isClassMethod: false,
isInstanceMethod: hasSelf && !!className,
isAsync,
bodyLines,
complexity,
nestingDepth,
line: node.startPosition.row + 1,
kind: 'definition',
};
}
// ── Python 模式检测 ──
function detectPyPatterns(root, lang, methods, properties, classes) {
const patterns = [];
// Singleton: 模块级实例
// Factory: create_xxx / build_xxx / make_xxx
for (const m of methods) {
if (/^create_|^build_|^make_|^get_instance$/.test(m.name)) {
patterns.push({
type: 'factory',
className: m.className,
methodName: m.name,
line: m.line,
confidence: 0.8,
});
}
}
// Context Manager: __enter__ + __exit__
const classMethodMap = {};
for (const m of methods) {
if (m.className) {
if (!classMethodMap[m.className]) {
classMethodMap[m.className] = [];
}
classMethodMap[m.className].push(m.name);
}
}
for (const [cls, methodNames] of Object.entries(classMethodMap)) {
if (methodNames.includes('__enter__') &&
methodNames.includes('__exit__')) {
patterns.push({ type: 'context-manager', className: cls, confidence: 0.95 });
}
if (methodNames.includes('__iter__') &&
methodNames.includes('__next__')) {
patterns.push({ type: 'iterator', className: cls, confidence: 0.9 });
}
}
// Data Class
for (const cls of classes) {
if (cls.isDataclass) {
patterns.push({ type: 'dataclass', className: cls.name, line: cls.line, confidence: 0.95 });
}
if (cls.protocols?.some((p) => p === 'BaseModel' || p.endsWith('.BaseModel'))) {
patterns.push({
type: 'pydantic-model',
className: cls.name,
line: cls.line,
confidence: 0.9,
});
}
}
// Decorator pattern: 函数名以 _ 开头且内部定义了 wrapper
for (const m of methods) {
if (m.decorators?.some((d) => d.includes('app.route') ||
d.includes('app.get') ||
d.includes('app.post') ||
d.includes('router.'))) {
patterns.push({
type: 'route-handler',
className: m.className,
methodName: m.name,
line: m.line,
confidence: 0.9,
});
}
}
return patterns;
}
// ── 工具函数 ──
function _estimateComplexity(node) {
let complexity = 1;
const BRANCH_TYPES = new Set([
'if_statement',
'for_statement',
'while_statement',
'elif_clause',
'except_clause',
'with_statement',
'conditional_expression',
'list_comprehension',
]);
function walk(n) {
if (BRANCH_TYPES.has(n.type)) {
complexity++;
}
if (n.type === 'boolean_operator') {
complexity++;
}
for (let i = 0; i < n.namedChildCount; i++) {
walk(n.namedChild(i));
}
}
walk(node);
return complexity;
}
function _maxNesting(node, depth) {
const NESTING_TYPES = new Set([
'if_statement',
'for_statement',
'while_statement',
'with_statement',
'try_statement',
]);
let max = depth;
const nextDepth = NESTING_TYPES.has(node.type) ? depth + 1 : depth;
for (let i = 0; i < node.namedChildCount; i++) {
const childMax = _maxNesting(node.namedChild(i), nextDepth);
if (childMax > max) {
max = childMax;
}
}
return max;
}
// ── 插件导出 ──
let _grammar = null;
function getGrammar() {
return _grammar;
}
export function setGrammar(grammar) {
_grammar = grammar;
}
export const plugin = {
getGrammar,
walk: walkPython,
detectPatterns: detectPyPatterns,
extractCallSites: extractCallSitesPython,
extensions: ['.py'],
};