codevault
Version:
AI-powered semantic code search via Model Context Protocol
181 lines • 6.02 kB
JavaScript
import { batchAnalyzeCodeSize } from './token-counter.js';
function getSizeLimits(profile) {
if (profile.useTokens && profile.tokenCounter) {
return {
optimal: profile.optimalTokens,
min: profile.minChunkTokens,
max: profile.maxChunkTokens,
overlap: profile.overlapTokens,
unit: 'tokens'
};
}
return {
optimal: profile.optimalChars,
min: profile.minChunkChars,
max: profile.maxChunkChars,
overlap: profile.overlapChars,
unit: 'characters'
};
}
async function batchAnalyzeNodesInternal(nodes, source, profile) {
const codes = nodes.map(node => source.slice(node.startIndex, node.endIndex));
const limits = getSizeLimits(profile);
if (profile.useTokens && profile.tokenCounter) {
const analyses = await batchAnalyzeCodeSize(codes, limits, profile.tokenCounter, false);
return nodes.map((node, i) => ({
node,
size: analyses[i].size,
code: codes[i]
}));
}
return nodes.map((node, i) => ({
node,
size: codes[i].length,
code: codes[i]
}));
}
function isContainerNode(node, rule) {
const containerTypes = [
'class_declaration',
'class_definition',
'interface_declaration',
'module_declaration',
'namespace_declaration',
'trait_declaration',
'enum_declaration'
];
return containerTypes.includes(node.type);
}
function identifySemanticGroups(nodes, source, nodeAnalyses, rule) {
const groups = [];
let currentGroup = {
type: 'file_section',
nodes: [],
analyses: [],
parentNode: null
};
for (let i = 0; i < nodes.length; i++) {
const node = nodes[i];
const analysis = nodeAnalyses[i];
if (isContainerNode(node, rule)) {
if (currentGroup.nodes.length > 0) {
groups.push(currentGroup);
}
currentGroup = {
type: 'container',
containerType: node.type,
nodes: [node],
analyses: [analysis],
parentNode: node
};
groups.push(currentGroup);
currentGroup = {
type: 'file_section',
nodes: [],
analyses: [],
parentNode: null
};
}
else {
currentGroup.nodes.push(node);
currentGroup.analyses.push(analysis);
}
}
if (currentGroup.nodes.length > 0) {
groups.push(currentGroup);
}
return groups;
}
async function combineGroupsToOptimalSize(semanticGroups, source, profile, limits) {
const optimalGroups = [];
let currentCombinedGroup = {
nodes: [],
totalSize: 0,
groupInfo: []
};
for (const group of semanticGroups) {
const groupTotalSize = group.analyses.reduce((sum, a) => sum + a.size, 0);
if (groupTotalSize > limits.optimal) {
if (currentCombinedGroup.nodes.length > 0) {
optimalGroups.push(currentCombinedGroup);
}
optimalGroups.push({
nodes: group.nodes,
totalSize: groupTotalSize,
groupInfo: [group]
});
currentCombinedGroup = {
nodes: [],
totalSize: 0,
groupInfo: []
};
continue;
}
if (currentCombinedGroup.totalSize + groupTotalSize > limits.max) {
if (currentCombinedGroup.nodes.length > 0) {
optimalGroups.push(currentCombinedGroup);
}
currentCombinedGroup = {
nodes: group.nodes,
totalSize: groupTotalSize,
groupInfo: [group]
};
continue;
}
currentCombinedGroup.nodes.push(...group.nodes);
currentCombinedGroup.totalSize += groupTotalSize;
currentCombinedGroup.groupInfo.push(group);
if (currentCombinedGroup.totalSize >= limits.optimal * 0.9) {
optimalGroups.push(currentCombinedGroup);
currentCombinedGroup = {
nodes: [],
totalSize: 0,
groupInfo: []
};
}
}
if (currentCombinedGroup.nodes.length > 0) {
optimalGroups.push(currentCombinedGroup);
}
return optimalGroups;
}
export async function groupNodesForChunking(nodes, source, profile, rule) {
if (!nodes || nodes.length === 0)
return [];
const limits = getSizeLimits(profile);
if (nodes.length <= 10) {
return nodes.map(node => ({
nodes: [node],
totalSize: 0,
groupInfo: []
}));
}
const nodeAnalyses = await batchAnalyzeNodesInternal(nodes, source, profile);
const semanticGroups = identifySemanticGroups(nodes, source, nodeAnalyses, rule);
const optimalGroups = await combineGroupsToOptimalSize(semanticGroups, source, profile, limits);
return optimalGroups;
}
export function createCombinedChunk(nodeGroup, source, filerel) {
if (!nodeGroup.nodes || nodeGroup.nodes.length === 0) {
return null;
}
const codes = nodeGroup.nodes.map(node => source.slice(node.startIndex, node.endIndex));
const combinedCode = codes.join('\n\n');
const firstNode = nodeGroup.nodes[0];
const lastNode = nodeGroup.nodes[nodeGroup.nodes.length - 1];
return {
code: combinedCode,
node: {
...firstNode,
type: `${firstNode.type}_group_${nodeGroup.nodes.length}`,
endIndex: lastNode.endIndex
},
metadata: {
isGroup: true,
nodeCount: nodeGroup.nodes.length,
totalSize: nodeGroup.totalSize,
groupTypes: nodeGroup.groupInfo?.map(g => g.type) || ['combined']
}
};
}
//# sourceMappingURL=file-grouper.js.map