autosnippet
Version:
Extract code patterns into a knowledge base for AI coding assistants
312 lines (311 loc) • 11 kB
JavaScript
/**
* @module PythonDiscoverer
* @description Python 项目结构发现器
*
* 检测信号: pyproject.toml, setup.py, setup.cfg, requirements.txt, *.py
* 支持: pyproject.toml (PEP 621), setup.py, src 布局, 平铺布局
*/
import { existsSync, readdirSync, readFileSync } from 'node:fs';
import { basename, join, relative } from 'node:path';
import { ProjectDiscoverer, } from './ProjectDiscoverer.js';
const EXCLUDE_DIRS = new Set([
'__pycache__',
'.venv',
'venv',
'.tox',
'.git',
'.mypy_cache',
'.pytest_cache',
'.eggs',
'dist',
'build',
'node_modules',
'.nox',
'.ruff_cache',
'.cursor',
]);
export class PythonDiscoverer extends ProjectDiscoverer {
#projectRoot = null;
#targets = [];
#depGraph = { nodes: [], edges: [] };
#projectName = null;
get id() {
return 'python';
}
get displayName() {
return 'Python (pip/poetry/pdm)';
}
async detect(projectRoot) {
let confidence = 0;
const reasons = [];
if (existsSync(join(projectRoot, 'pyproject.toml'))) {
confidence = 0.9;
reasons.push('pyproject.toml exists');
}
if (existsSync(join(projectRoot, 'setup.py'))) {
confidence = Math.max(confidence, 0.8);
reasons.push('setup.py exists');
}
if (existsSync(join(projectRoot, 'setup.cfg'))) {
confidence = Math.max(confidence, 0.8);
reasons.push('setup.cfg exists');
}
if (existsSync(join(projectRoot, 'requirements.txt'))) {
confidence = Math.max(confidence, 0.6);
reasons.push('requirements.txt exists');
}
// 检查是否有 .py 文件
if (confidence === 0) {
try {
const entries = readdirSync(projectRoot);
if (entries.some((e) => e.endsWith('.py'))) {
confidence = 0.4;
reasons.push('*.py files found at root');
}
}
catch {
/* skip */
}
}
return {
match: confidence > 0,
confidence: Math.min(confidence, 1.0),
reason: reasons.join(', ') || 'No Python markers found',
};
}
async load(projectRoot) {
this.#projectRoot = projectRoot;
this.#targets = [];
this.#depGraph = { nodes: [], edges: [] };
// 解析 pyproject.toml(简易 TOML 解析)
const pyprojectPath = join(projectRoot, 'pyproject.toml');
let pyproject = null;
if (existsSync(pyprojectPath)) {
pyproject = this.#parsePyprojectToml(readFileSync(pyprojectPath, 'utf8'));
}
this.#projectName = pyproject?.project?.name || basename(projectRoot);
// 发现包目录
const packages = this.#discoverPackages(projectRoot, pyproject);
for (const pkg of packages) {
const framework = this.#detectFramework(projectRoot, pyproject);
this.#targets.push({
name: pkg.name,
path: pkg.path,
type: pkg.isTest ? 'test' : 'library',
language: 'python',
framework,
metadata: { pyproject },
});
this.#depGraph.nodes.push(pkg.name);
}
// 如果没有发现任何包, 以项目根为兜底
if (this.#targets.length === 0) {
this.#targets.push({
name: this.#projectName ?? basename(projectRoot),
path: projectRoot,
type: 'library',
language: 'python',
framework: this.#detectFramework(projectRoot, pyproject),
metadata: { pyproject },
});
this.#depGraph.nodes.push(this.#projectName ?? basename(projectRoot));
}
// 解析依赖
this.#parseDependencies(projectRoot, pyproject);
}
async listTargets() {
return this.#targets;
}
async getTargetFiles(target) {
const targetPath = typeof target === 'string'
? this.#targets.find((t) => t.name === target)?.path || this.#projectRoot
: target.path;
if (!targetPath || !existsSync(targetPath)) {
return [];
}
const files = [];
this.#collectPyFiles(targetPath, targetPath, files);
return files;
}
async getDependencyGraph() {
return this.#depGraph;
}
// ── 内部实现 ──
#discoverPackages(projectRoot, pyproject) {
const packages = [];
// src/ 布局优先
const srcDir = join(projectRoot, 'src');
if (existsSync(srcDir)) {
try {
const entries = readdirSync(srcDir, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory() && !entry.name.startsWith('.') && !entry.name.startsWith('_')) {
const pkgDir = join(srcDir, entry.name);
if (existsSync(join(pkgDir, '__init__.py'))) {
packages.push({ name: entry.name, path: pkgDir, isTest: false });
}
}
}
}
catch {
/* skip */
}
}
// 平铺布局: 含 __init__.py 的顶层目录
if (packages.length === 0) {
try {
const entries = readdirSync(projectRoot, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory() && !entry.name.startsWith('.') && !EXCLUDE_DIRS.has(entry.name)) {
const pkgDir = join(projectRoot, entry.name);
if (existsSync(join(pkgDir, '__init__.py'))) {
const isTest = /^tests?$/.test(entry.name);
packages.push({ name: entry.name, path: pkgDir, isTest });
}
}
}
}
catch {
/* skip */
}
}
// 检测 tests/ 目录
for (const testDir of ['tests', 'test']) {
const testPath = join(projectRoot, testDir);
if (existsSync(testPath) && !packages.some((p) => p.name === testDir)) {
packages.push({ name: testDir, path: testPath, isTest: true });
}
}
return packages;
}
#detectFramework(projectRoot, pyproject) {
const deps = this.#extractDependencyNames(projectRoot, pyproject);
if (deps.has('django')) {
return 'django';
}
if (deps.has('flask')) {
return 'flask';
}
if (deps.has('fastapi')) {
return 'fastapi';
}
if (deps.has('langchain') ||
deps.has('langchain-core') ||
deps.has('langgraph') ||
deps.has('llama-index') ||
deps.has('llama_index')) {
return 'langchain';
}
if (deps.has('torch') || deps.has('tensorflow')) {
return 'ml';
}
if (deps.has('scrapy')) {
return 'scrapy';
}
if (deps.has('celery')) {
return 'celery';
}
return null;
}
#extractDependencyNames(projectRoot, pyproject) {
const names = new Set();
// From pyproject.toml
if (pyproject?.project?.dependencies) {
for (const dep of pyproject.project.dependencies) {
const name = dep
.replace(/[>=<![\]~;@\s].*/g, '')
.trim()
.toLowerCase();
if (name) {
names.add(name);
}
}
}
// From requirements.txt
const reqPath = join(projectRoot, 'requirements.txt');
if (existsSync(reqPath)) {
try {
const content = readFileSync(reqPath, 'utf8');
for (const line of content.split('\n')) {
const trimmed = line.trim();
if (trimmed && !trimmed.startsWith('#') && !trimmed.startsWith('-')) {
const name = trimmed
.replace(/[>=<![\]~;@\s].*/g, '')
.trim()
.toLowerCase();
if (name) {
names.add(name);
}
}
}
}
catch {
/* skip */
}
}
return names;
}
#parseDependencies(projectRoot, pyproject) {
const names = this.#extractDependencyNames(projectRoot, pyproject);
const rootTarget = this.#targets[0]?.name;
if (!rootTarget) {
return;
}
for (const dep of names) {
this.#depGraph.edges.push({ from: rootTarget, to: dep, type: 'depends_on' });
}
}
#collectPyFiles(dir, rootDir, files, depth = 0) {
if (depth > 15) {
return;
}
try {
const entries = readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
if (entry.name.startsWith('.')) {
continue;
}
if (EXCLUDE_DIRS.has(entry.name)) {
continue;
}
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
this.#collectPyFiles(fullPath, rootDir, files, depth + 1);
}
else if (entry.isFile() && entry.name.endsWith('.py')) {
files.push({
name: entry.name,
path: fullPath,
relativePath: relative(rootDir, fullPath),
language: 'python',
});
}
}
}
catch {
/* skip */
}
}
/**
* 简易 TOML 解析器 — 仅提取本项目需要的字段
* 不做完整 TOML 解析, 只用正则提取关键信息
*/
#parsePyprojectToml(content) {
const result = { project: {} };
// [project] name
const nameMatch = content.match(/\[project\][\s\S]*?name\s*=\s*["']([^"']+)["']/);
if (nameMatch) {
result.project.name = nameMatch[1];
}
// [project] dependencies — 简化提取数组
const depsMatch = content.match(/\[project\][\s\S]*?dependencies\s*=\s*\[([\s\S]*?)\]/);
if (depsMatch) {
result.project.dependencies = [];
const items = depsMatch[1].matchAll(/["']([^"']+)["']/g);
for (const m of items) {
result.project.dependencies.push(m[1]);
}
}
return result;
}
}