scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.
214 lines (213 loc) • 9.97 kB
JavaScript
import { parse } from 'java-parser';
import path from 'path';
import { log } from '../../utils/log.js';
import { getDbForRepo } from '../client.js';
import { markFileAsSkippedTemplate, markFileAsExtractedTemplate, markFileAsFailedTemplate, insertFunctionTemplate, insertGraphClassTemplate, insertEdgeTemplate, insertGraphEntityTagTemplate, insertGraphTagTemplate, selectGraphTagIdTemplate, } from '../sqlTemplates.js';
import { kgModule } from '../../pipeline/modules/kgModule.js';
import { BUILTINS } from '../../fileRules/builtins.js';
import { getUniqueId } from '../../utils/sharedUtils.js';
export async function extractFromJava(filePath, content, fileId) {
const db = getDbForRepo();
const normalizedPath = path.normalize(filePath).replace(/\\/g, '/');
try {
const cst = parse(content);
log(`🧩 First-level CST node types:`, cst.children ? Object.keys(cst.children) : []);
const functions = [];
const classes = [];
// --- Traverse CST to collect functions and classes ---
function traverse(node, parentClassName) {
if (!node)
return;
// Classes
if (node.node === 'normalClassDeclaration') {
const name = node.children?.Identifier?.[0]?.image ?? `${path.basename(filePath)}:<anon-class>`;
const superClass = node.children?.superclass?.[0]?.children?.Identifier?.[0]?.image ?? null;
const startLine = node.location?.startLine ?? 0;
const endLine = node.location?.endLine ?? 0;
const code = content.split('\n').slice(startLine - 1, endLine).join('\n');
const unique_id = getUniqueId(name, filePath, startLine, node.location?.startOffset ?? 0, code);
classes.push({ name, start_line: startLine, end_line: endLine, content: code, superClass, unique_id });
const classBody = node.children?.classBody?.[0]?.children;
if (classBody)
classBody.forEach((child) => traverse(child, name));
return;
}
// Methods/Constructors
if (node.node === 'methodDeclaration' || node.node === 'constructorDeclaration') {
const name = node.children?.Identifier?.[0]?.image ?? '<anon>';
const startLine = node.location?.startLine ?? 0;
const endLine = node.location?.endLine ?? 0;
const code = content.split('\n').slice(startLine - 1, endLine).join('\n');
const unique_id = getUniqueId(name, filePath, startLine, node.location?.startOffset ?? 0, code);
functions.push({ name, start_line: startLine, end_line: endLine, content: code, unique_id });
return;
}
// Traverse children
if (node.children) {
Object.values(node.children).forEach((child) => {
if (Array.isArray(child))
child.forEach(c => traverse(c, parentClassName));
else
traverse(child, parentClassName);
});
}
}
traverse(cst);
if (functions.length === 0 && classes.length === 0) {
log(`⚠️ No functions/classes found in Java file: ${filePath}`);
try {
db.prepare(markFileAsSkippedTemplate).run({ id: fileId });
}
catch { }
return false;
}
log(`🔍 Found ${functions.length} methods and ${classes.length} classes in ${filePath}`);
// --- Knowledge Graph tagging ---
try {
const kgInput = { fileId, filepath: filePath, summary: undefined };
const kgResult = await kgModule.run(kgInput, content);
if (kgResult.entities?.length) {
const insertTagStmt = db.prepare(insertGraphTagTemplate);
const getTagIdStmt = db.prepare(selectGraphTagIdTemplate);
const insertEntityTagStmt = db.prepare(insertGraphEntityTagTemplate);
const persistTag = (tag) => {
try {
insertTagStmt.run({ name: tag });
const tagRow = getTagIdStmt.get({ name: tag });
return tagRow?.id;
}
catch {
return undefined;
}
};
const persistEntityTags = (entity) => {
if (!entity.type || !Array.isArray(entity.tags) || !entity.tags.length)
return;
for (const tag of entity.tags) {
if (!tag)
continue;
const tagId = persistTag(tag);
if (!tagId)
continue;
const matchedUniqueId = functions.find(f => f.name === entity.name)?.unique_id ||
classes.find(c => c.name === entity.name)?.unique_id ||
`${entity.name}@${filePath}`;
try {
insertEntityTagStmt.run({ entity_type: entity.type, entity_unique_id: matchedUniqueId, tag_id: tagId });
}
catch { }
}
};
kgResult.entities.forEach(persistEntityTags);
}
}
catch (kgErr) {
console.warn(`⚠️ KG tagging failed for ${filePath}:`, kgErr instanceof Error ? kgErr.message : kgErr);
}
const seenEdges = new Set();
const javaBuiltins = BUILTINS.java;
// --- Insert functions with call edges ---
for (const fn of functions) {
db.prepare(insertFunctionTemplate).run({
file_id: fileId,
name: fn.name ?? '<anon>',
start_line: fn.start_line ?? -1,
end_line: fn.end_line ?? -1,
content: fn.content ?? '',
embedding: null,
lang: 'java',
unique_id: fn.unique_id,
});
const containsEdgeKey = `file->${fn.unique_id}`;
if (!seenEdges.has(containsEdgeKey)) {
db.prepare(insertEdgeTemplate).run({
source_type: 'file',
source_unique_id: normalizedPath,
target_type: 'function',
target_unique_id: fn.unique_id,
relation: 'contains',
});
seenEdges.add(containsEdgeKey);
}
// --- Function call edges ---
const callRegex = /([A-Za-z_$][A-Za-z0-9_$]*)\s*\(/g;
const callSet = new Set();
let match;
while ((match = callRegex.exec(fn.content)) !== null) {
const callee = match[1];
if (!callee || javaBuiltins.has(callee) || callSet.has(callee))
continue;
callSet.add(callee);
const targetUniqueId = `${callee}@${normalizedPath}`;
db.prepare(insertEdgeTemplate).run({
source_type: 'function',
source_unique_id: fn.unique_id,
target_type: 'function',
target_unique_id: targetUniqueId,
relation: 'calls',
});
}
}
// --- Insert classes with inheritance edges ---
for (const cls of classes) {
db.prepare(insertGraphClassTemplate).run({
file_id: fileId,
name: cls.name,
start_line: cls.start_line ?? -1,
end_line: cls.end_line ?? -1,
content: cls.content ?? '',
embedding: null,
lang: 'java',
unique_id: cls.unique_id,
});
const containsEdgeKey = `file->${cls.unique_id}`;
if (!seenEdges.has(containsEdgeKey)) {
db.prepare(insertEdgeTemplate).run({
source_type: 'file',
source_unique_id: normalizedPath,
target_type: 'class',
target_unique_id: cls.unique_id,
relation: 'contains',
});
seenEdges.add(containsEdgeKey);
}
if (cls.superClass) {
db.prepare(insertEdgeTemplate).run({
source_type: 'class',
source_unique_id: cls.unique_id,
target_type: 'class',
target_unique_id: `unresolved:${cls.superClass}`,
relation: 'inherits',
});
}
}
// --- Java imports as edges ---
const importRegex = /^\s*import\s+([A-Za-z0-9_.]+);/gm;
let impMatch;
while ((impMatch = importRegex.exec(content)) !== null) {
const importedClass = impMatch[1];
if (!importedClass)
continue;
const targetUniqueId = `imported:${importedClass}`;
db.prepare(insertEdgeTemplate).run({
source_type: 'file',
source_unique_id: normalizedPath,
target_type: 'class',
target_unique_id: targetUniqueId,
relation: 'imports',
});
}
db.prepare(markFileAsExtractedTemplate).run({ id: fileId });
log(`📊 Extraction summary for ${filePath}: ${functions.length} methods, ${classes.length} classes`);
return true;
}
catch (err) {
log(`❌ Failed to extract from Java file: ${filePath}`);
log(` ↳ ${err.message}`);
try {
db.prepare(markFileAsFailedTemplate).run({ id: fileId });
}
catch { }
return false;
}
}