scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** 100% local, private, GDPR-friendly, made in Denmark/EU with ❤️.
89 lines (88 loc) • 3.25 kB
JavaScript
import { encode } from 'gpt-3-encoder';
export function splitCodeIntoChunks(text, maxTokens = 1500, hardLimitMultiplier = 1.8) {
const lines = text.split('\n');
const chunks = [];
let currentChunkLines = [];
let currentTokens = 0;
let inMultiComment = false;
let inFunction = false;
let inTryBlock = false;
let globalBraceDepth = 0;
let functionBraceDepth = 0;
let parenDepth = 0;
let bracketDepth = 0;
for (const line of lines) {
const trimmed = line.trim();
// ---------- comments ----------
if (trimmed.includes('/*') && !trimmed.includes('*/'))
inMultiComment = true;
if (trimmed.includes('*/'))
inMultiComment = false;
// ---------- function start ----------
const isFunctionStart = trimmed.startsWith('function ') ||
trimmed.startsWith('async function ') ||
trimmed.startsWith('class ') ||
trimmed.match(/^\w+\s*=\s*\(.*\)\s*=>\s*{/);
if (!inFunction && isFunctionStart) {
inFunction = true;
functionBraceDepth = 0;
}
// ---------- try/catch ----------
if (trimmed.startsWith('try {'))
inTryBlock = true;
if (trimmed.startsWith('catch') || trimmed.startsWith('finally'))
inTryBlock = false;
// ---------- depth tracking ----------
for (const char of line) {
if (char === '{') {
globalBraceDepth++;
if (inFunction)
functionBraceDepth++;
}
else if (char === '}') {
globalBraceDepth = Math.max(0, globalBraceDepth - 1);
if (inFunction)
functionBraceDepth = Math.max(0, functionBraceDepth - 1);
}
else if (char === '(') {
parenDepth++;
}
else if (char === ')') {
parenDepth = Math.max(0, parenDepth - 1);
}
else if (char === '[') {
bracketDepth++;
}
else if (char === ']') {
bracketDepth = Math.max(0, bracketDepth - 1);
}
}
// ---------- add line ----------
currentChunkLines.push(line);
currentTokens += encode(line + '\n').length;
// ---------- split decision ----------
const softLimitReached = currentTokens >= maxTokens;
const hardLimitReached = currentTokens >= maxTokens * hardLimitMultiplier;
const safeToSplit = !inMultiComment &&
!inTryBlock &&
functionBraceDepth === 0 &&
parenDepth === 0 &&
bracketDepth === 0;
if ((softLimitReached && safeToSplit) || hardLimitReached) {
chunks.push(currentChunkLines.join('\n'));
currentChunkLines = [];
currentTokens = 0;
}
// ---------- function end ----------
if (inFunction && functionBraceDepth === 0) {
inFunction = false;
}
}
if (currentChunkLines.length > 0) {
chunks.push(currentChunkLines.join('\n'));
}
return chunks;
}
export function countTokens(text) {
return encode(text).length;
}