UNPKG

scai

Version:

> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.

214 lines (213 loc) 9.97 kB
import { parse } from 'java-parser'; import path from 'path'; import { log } from '../../utils/log.js'; import { getDbForRepo } from '../client.js'; import { markFileAsSkippedTemplate, markFileAsExtractedTemplate, markFileAsFailedTemplate, insertFunctionTemplate, insertGraphClassTemplate, insertEdgeTemplate, insertGraphEntityTagTemplate, insertGraphTagTemplate, selectGraphTagIdTemplate, } from '../sqlTemplates.js'; import { kgModule } from '../../pipeline/modules/kgModule.js'; import { BUILTINS } from '../../fileRules/builtins.js'; import { getUniqueId } from '../../utils/sharedUtils.js'; export async function extractFromJava(filePath, content, fileId) { const db = getDbForRepo(); const normalizedPath = path.normalize(filePath).replace(/\\/g, '/'); try { const cst = parse(content); log(`🧩 First-level CST node types:`, cst.children ? Object.keys(cst.children) : []); const functions = []; const classes = []; // --- Traverse CST to collect functions and classes --- function traverse(node, parentClassName) { if (!node) return; // Classes if (node.node === 'normalClassDeclaration') { const name = node.children?.Identifier?.[0]?.image ?? `${path.basename(filePath)}:<anon-class>`; const superClass = node.children?.superclass?.[0]?.children?.Identifier?.[0]?.image ?? null; const startLine = node.location?.startLine ?? 0; const endLine = node.location?.endLine ?? 0; const code = content.split('\n').slice(startLine - 1, endLine).join('\n'); const unique_id = getUniqueId(name, filePath, startLine, node.location?.startOffset ?? 0, code); classes.push({ name, start_line: startLine, end_line: endLine, content: code, superClass, unique_id }); const classBody = node.children?.classBody?.[0]?.children; if (classBody) classBody.forEach((child) => traverse(child, name)); return; } // Methods/Constructors if (node.node === 'methodDeclaration' || node.node === 'constructorDeclaration') { const name = node.children?.Identifier?.[0]?.image ?? '<anon>'; const startLine = node.location?.startLine ?? 0; const endLine = node.location?.endLine ?? 0; const code = content.split('\n').slice(startLine - 1, endLine).join('\n'); const unique_id = getUniqueId(name, filePath, startLine, node.location?.startOffset ?? 0, code); functions.push({ name, start_line: startLine, end_line: endLine, content: code, unique_id }); return; } // Traverse children if (node.children) { Object.values(node.children).forEach((child) => { if (Array.isArray(child)) child.forEach(c => traverse(c, parentClassName)); else traverse(child, parentClassName); }); } } traverse(cst); if (functions.length === 0 && classes.length === 0) { log(`⚠️ No functions/classes found in Java file: ${filePath}`); try { db.prepare(markFileAsSkippedTemplate).run({ id: fileId }); } catch { } return false; } log(`🔍 Found ${functions.length} methods and ${classes.length} classes in ${filePath}`); // --- Knowledge Graph tagging --- try { const kgInput = { fileId, filepath: filePath, summary: undefined }; const kgResult = await kgModule.run(kgInput, content); if (kgResult.entities?.length) { const insertTagStmt = db.prepare(insertGraphTagTemplate); const getTagIdStmt = db.prepare(selectGraphTagIdTemplate); const insertEntityTagStmt = db.prepare(insertGraphEntityTagTemplate); const persistTag = (tag) => { try { insertTagStmt.run({ name: tag }); const tagRow = getTagIdStmt.get({ name: tag }); return tagRow?.id; } catch { return undefined; } }; const persistEntityTags = (entity) => { if (!entity.type || !Array.isArray(entity.tags) || !entity.tags.length) return; for (const tag of entity.tags) { if (!tag) continue; const tagId = persistTag(tag); if (!tagId) continue; const matchedUniqueId = functions.find(f => f.name === entity.name)?.unique_id || classes.find(c => c.name === entity.name)?.unique_id || `${entity.name}@${filePath}`; try { insertEntityTagStmt.run({ entity_type: entity.type, entity_unique_id: matchedUniqueId, tag_id: tagId }); } catch { } } }; kgResult.entities.forEach(persistEntityTags); } } catch (kgErr) { console.warn(`⚠️ KG tagging failed for ${filePath}:`, kgErr instanceof Error ? kgErr.message : kgErr); } const seenEdges = new Set(); const javaBuiltins = BUILTINS.java; // --- Insert functions with call edges --- for (const fn of functions) { db.prepare(insertFunctionTemplate).run({ file_id: fileId, name: fn.name ?? '<anon>', start_line: fn.start_line ?? -1, end_line: fn.end_line ?? -1, content: fn.content ?? '', embedding: null, lang: 'java', unique_id: fn.unique_id, }); const containsEdgeKey = `file->${fn.unique_id}`; if (!seenEdges.has(containsEdgeKey)) { db.prepare(insertEdgeTemplate).run({ source_type: 'file', source_unique_id: normalizedPath, target_type: 'function', target_unique_id: fn.unique_id, relation: 'contains', }); seenEdges.add(containsEdgeKey); } // --- Function call edges --- const callRegex = /([A-Za-z_$][A-Za-z0-9_$]*)\s*\(/g; const callSet = new Set(); let match; while ((match = callRegex.exec(fn.content)) !== null) { const callee = match[1]; if (!callee || javaBuiltins.has(callee) || callSet.has(callee)) continue; callSet.add(callee); const targetUniqueId = `${callee}@${normalizedPath}`; db.prepare(insertEdgeTemplate).run({ source_type: 'function', source_unique_id: fn.unique_id, target_type: 'function', target_unique_id: targetUniqueId, relation: 'calls', }); } } // --- Insert classes with inheritance edges --- for (const cls of classes) { db.prepare(insertGraphClassTemplate).run({ file_id: fileId, name: cls.name, start_line: cls.start_line ?? -1, end_line: cls.end_line ?? -1, content: cls.content ?? '', embedding: null, lang: 'java', unique_id: cls.unique_id, }); const containsEdgeKey = `file->${cls.unique_id}`; if (!seenEdges.has(containsEdgeKey)) { db.prepare(insertEdgeTemplate).run({ source_type: 'file', source_unique_id: normalizedPath, target_type: 'class', target_unique_id: cls.unique_id, relation: 'contains', }); seenEdges.add(containsEdgeKey); } if (cls.superClass) { db.prepare(insertEdgeTemplate).run({ source_type: 'class', source_unique_id: cls.unique_id, target_type: 'class', target_unique_id: `unresolved:${cls.superClass}`, relation: 'inherits', }); } } // --- Java imports as edges --- const importRegex = /^\s*import\s+([A-Za-z0-9_.]+);/gm; let impMatch; while ((impMatch = importRegex.exec(content)) !== null) { const importedClass = impMatch[1]; if (!importedClass) continue; const targetUniqueId = `imported:${importedClass}`; db.prepare(insertEdgeTemplate).run({ source_type: 'file', source_unique_id: normalizedPath, target_type: 'class', target_unique_id: targetUniqueId, relation: 'imports', }); } db.prepare(markFileAsExtractedTemplate).run({ id: fileId }); log(`📊 Extraction summary for ${filePath}: ${functions.length} methods, ${classes.length} classes`); return true; } catch (err) { log(`❌ Failed to extract from Java file: ${filePath}`); log(` ↳ ${err.message}`); try { db.prepare(markFileAsFailedTemplate).run({ id: fileId }); } catch { } return false; } }