scai
Version:
> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** > **100% local • No token cost • Private by design • GDPR-friendly** — made in Denmark/EU with ❤️.
142 lines (141 loc) • 5.21 kB
JavaScript
import { encode } from "gpt-3-encoder";
/**
* Split code into semantically safer chunks.
*
* Strategy:
* - Aim for `softLimit` tokens
* - Allow overflow up to `hardLimit` to finish structural blocks
* - Never split inside functions, try-chains, comments, or open delimiters
*/
export function splitCodeIntoChunks(text, softLimit = 2200, // ⬅️ increased from 1000
hardLimitMultiplier = 1.35 // ⬅️ allows structural completion (~3000)
) {
const hardLimit = Math.floor(softLimit * hardLimitMultiplier);
const lines = text.split("\n");
const chunks = [];
let currentChunkLines = [];
let currentTokens = 0;
// ───────────── State tracking ─────────────
let inMultiComment = false;
let inFunction = false;
let functionBraceDepth = 0;
let justClosedFunction = false;
let inTryBlock = false;
let inTryChain = false;
let tryChainHasHandler = false;
let tryBraceDepth = 0;
let justClosedTryBlock = false;
let globalBraceDepth = 0;
let parenDepth = 0;
let bracketDepth = 0;
for (const line of lines) {
const trimmed = line.trim();
justClosedFunction = false;
justClosedTryBlock = false;
// ---------- multi-line comments ----------
if (trimmed.includes("/*") && !trimmed.includes("*/"))
inMultiComment = true;
if (trimmed.includes("*/"))
inMultiComment = false;
// ---------- function / class start ----------
const isFunctionStart = trimmed.startsWith("function ") ||
trimmed.startsWith("async function ") ||
trimmed.startsWith("class ") ||
/^\w+\s*=\s*\(.*\)\s*=>\s*{/.test(trimmed);
if (!inFunction && isFunctionStart) {
inFunction = true;
functionBraceDepth = 0;
}
// ---------- try / catch / finally ----------
if (!inTryChain && trimmed.startsWith("try")) {
inTryChain = true;
inTryBlock = true;
tryBraceDepth = 0;
}
if (inTryChain &&
(trimmed.startsWith("catch") || trimmed.startsWith("finally"))) {
inTryBlock = true;
tryChainHasHandler = true;
}
// ---------- depth tracking ----------
for (const char of line) {
switch (char) {
case "{":
globalBraceDepth++;
if (inFunction)
functionBraceDepth++;
if (inTryChain)
tryBraceDepth++;
break;
case "}":
globalBraceDepth = Math.max(0, globalBraceDepth - 1);
if (inFunction) {
functionBraceDepth = Math.max(0, functionBraceDepth - 1);
if (functionBraceDepth === 0)
justClosedFunction = true;
}
if (inTryChain) {
tryBraceDepth = Math.max(0, tryBraceDepth - 1);
if (tryBraceDepth === 0)
justClosedTryBlock = true;
}
break;
case "(":
parenDepth++;
break;
case ")":
parenDepth = Math.max(0, parenDepth - 1);
break;
case "[":
bracketDepth++;
break;
case "]":
bracketDepth = Math.max(0, bracketDepth - 1);
break;
}
}
if (justClosedFunction)
inFunction = false;
if (justClosedTryBlock) {
inTryBlock = false;
if (tryChainHasHandler) {
inTryChain = false;
tryChainHasHandler = false;
}
}
// ---------- add line ----------
currentChunkLines.push(line);
currentTokens += encode(line + "\n").length;
// ---------- split decision ----------
const softLimitReached = currentTokens >= softLimit;
const hardLimitReached = currentTokens >= hardLimit;
/**
* Safe split means:
* - no open comments
* - not inside function or try-chain
* - no dangling delimiters
* - not immediately after a structural close
*/
const safeToSplit = !inMultiComment &&
!inFunction &&
!inTryChain &&
!justClosedFunction &&
!justClosedTryBlock &&
functionBraceDepth === 0 &&
parenDepth === 0 &&
bracketDepth === 0;
// Prefer soft limit + safety, but enforce hard limit regardless
if ((softLimitReached && safeToSplit) || hardLimitReached) {
chunks.push(currentChunkLines.join("\n"));
currentChunkLines = [];
currentTokens = 0;
}
}
if (currentChunkLines.length > 0) {
chunks.push(currentChunkLines.join("\n"));
}
return chunks;
}
export function countTokens(text) {
return encode(text).length;
}