scai
Version:
> AI-powered CLI tool for commit messages **and** pull request reviews — using local models.
56 lines (55 loc) • 2.15 kB
JavaScript
import { encode } from 'gpt-3-encoder';
export function splitCodeIntoChunks(text, maxTokens) {
const lines = text.split('\n');
const chunks = [];
let currentChunkLines = [];
let currentTokens = 0;
let inMultiComment = false;
const start = '/*';
const end = '*/';
for (const line of lines) {
const trimmed = line.trim();
// --- Track multi-line comments ---
if (trimmed.includes(start) && !trimmed.includes(end)) {
// Starts a block comment but does not end on the same line
inMultiComment = true;
}
else if (trimmed.includes(start) && trimmed.includes(end)) {
// Inline comment: "/* ... */" on same line → ignore, don't toggle state
// do nothing with inMultiComment
}
else if (trimmed.includes(end)) {
// End of a block comment
inMultiComment = false;
}
const lineTokens = encode(line + '\n').length;
if (currentTokens + lineTokens > maxTokens) {
// Split at natural points but never inside a multi-line comment
let splitIndex = currentChunkLines.length;
for (let i = currentChunkLines.length - 1; i >= 0; i--) {
const t = currentChunkLines[i].trim();
if (!inMultiComment &&
(t === '' ||
t.startsWith('function ') ||
t.startsWith('class ') ||
t.endsWith('}') ||
t.endsWith(';'))) {
splitIndex = i + 1;
break;
}
}
chunks.push(currentChunkLines.slice(0, splitIndex).join('\n'));
currentChunkLines = currentChunkLines.slice(splitIndex);
currentTokens = encode(currentChunkLines.join('\n')).length;
}
currentChunkLines.push(line);
currentTokens += lineTokens;
}
if (currentChunkLines.length > 0) {
chunks.push(currentChunkLines.join('\n'));
}
return chunks;
}
export function countTokens(text) {
return encode(text).length;
}