UNPKG

scai

Version:

> **AI-powered CLI for local code analysis, commit message suggestions, and natural-language queries.** > **100% local • No token cost • Private by design • GDPR-friendly** — made in Denmark/EU with ❤️.

142 lines (141 loc) 5.21 kB
import { encode } from "gpt-3-encoder"; /** * Split code into semantically safer chunks. * * Strategy: * - Aim for `softLimit` tokens * - Allow overflow up to `hardLimit` to finish structural blocks * - Never split inside functions, try-chains, comments, or open delimiters */ export function splitCodeIntoChunks(text, softLimit = 2200, // ⬅️ increased from 1000 hardLimitMultiplier = 1.35 // ⬅️ allows structural completion (~3000) ) { const hardLimit = Math.floor(softLimit * hardLimitMultiplier); const lines = text.split("\n"); const chunks = []; let currentChunkLines = []; let currentTokens = 0; // ───────────── State tracking ───────────── let inMultiComment = false; let inFunction = false; let functionBraceDepth = 0; let justClosedFunction = false; let inTryBlock = false; let inTryChain = false; let tryChainHasHandler = false; let tryBraceDepth = 0; let justClosedTryBlock = false; let globalBraceDepth = 0; let parenDepth = 0; let bracketDepth = 0; for (const line of lines) { const trimmed = line.trim(); justClosedFunction = false; justClosedTryBlock = false; // ---------- multi-line comments ---------- if (trimmed.includes("/*") && !trimmed.includes("*/")) inMultiComment = true; if (trimmed.includes("*/")) inMultiComment = false; // ---------- function / class start ---------- const isFunctionStart = trimmed.startsWith("function ") || trimmed.startsWith("async function ") || trimmed.startsWith("class ") || /^\w+\s*=\s*\(.*\)\s*=>\s*{/.test(trimmed); if (!inFunction && isFunctionStart) { inFunction = true; functionBraceDepth = 0; } // ---------- try / catch / finally ---------- if (!inTryChain && trimmed.startsWith("try")) { inTryChain = true; inTryBlock = true; tryBraceDepth = 0; } if (inTryChain && (trimmed.startsWith("catch") || trimmed.startsWith("finally"))) { inTryBlock = true; tryChainHasHandler = true; } // ---------- depth tracking ---------- for (const char of line) { switch (char) { case "{": globalBraceDepth++; if (inFunction) functionBraceDepth++; if (inTryChain) tryBraceDepth++; break; case "}": globalBraceDepth = Math.max(0, globalBraceDepth - 1); if (inFunction) { functionBraceDepth = Math.max(0, functionBraceDepth - 1); if (functionBraceDepth === 0) justClosedFunction = true; } if (inTryChain) { tryBraceDepth = Math.max(0, tryBraceDepth - 1); if (tryBraceDepth === 0) justClosedTryBlock = true; } break; case "(": parenDepth++; break; case ")": parenDepth = Math.max(0, parenDepth - 1); break; case "[": bracketDepth++; break; case "]": bracketDepth = Math.max(0, bracketDepth - 1); break; } } if (justClosedFunction) inFunction = false; if (justClosedTryBlock) { inTryBlock = false; if (tryChainHasHandler) { inTryChain = false; tryChainHasHandler = false; } } // ---------- add line ---------- currentChunkLines.push(line); currentTokens += encode(line + "\n").length; // ---------- split decision ---------- const softLimitReached = currentTokens >= softLimit; const hardLimitReached = currentTokens >= hardLimit; /** * Safe split means: * - no open comments * - not inside function or try-chain * - no dangling delimiters * - not immediately after a structural close */ const safeToSplit = !inMultiComment && !inFunction && !inTryChain && !justClosedFunction && !justClosedTryBlock && functionBraceDepth === 0 && parenDepth === 0 && bracketDepth === 0; // Prefer soft limit + safety, but enforce hard limit regardless if ((softLimitReached && safeToSplit) || hardLimitReached) { chunks.push(currentChunkLines.join("\n")); currentChunkLines = []; currentTokens = 0; } } if (currentChunkLines.length > 0) { chunks.push(currentChunkLines.join("\n")); } return chunks; } export function countTokens(text) { return encode(text).length; }