UNPKG

scai

Version:

> AI-powered CLI tool for commit messages **and** pull request reviews — using local models.

56 lines (55 loc) 2.15 kB
import { encode } from 'gpt-3-encoder'; export function splitCodeIntoChunks(text, maxTokens) { const lines = text.split('\n'); const chunks = []; let currentChunkLines = []; let currentTokens = 0; let inMultiComment = false; const start = '/*'; const end = '*/'; for (const line of lines) { const trimmed = line.trim(); // --- Track multi-line comments --- if (trimmed.includes(start) && !trimmed.includes(end)) { // Starts a block comment but does not end on the same line inMultiComment = true; } else if (trimmed.includes(start) && trimmed.includes(end)) { // Inline comment: "/* ... */" on same line → ignore, don't toggle state // do nothing with inMultiComment } else if (trimmed.includes(end)) { // End of a block comment inMultiComment = false; } const lineTokens = encode(line + '\n').length; if (currentTokens + lineTokens > maxTokens) { // Split at natural points but never inside a multi-line comment let splitIndex = currentChunkLines.length; for (let i = currentChunkLines.length - 1; i >= 0; i--) { const t = currentChunkLines[i].trim(); if (!inMultiComment && (t === '' || t.startsWith('function ') || t.startsWith('class ') || t.endsWith('}') || t.endsWith(';'))) { splitIndex = i + 1; break; } } chunks.push(currentChunkLines.slice(0, splitIndex).join('\n')); currentChunkLines = currentChunkLines.slice(splitIndex); currentTokens = encode(currentChunkLines.join('\n')).length; } currentChunkLines.push(line); currentTokens += lineTokens; } if (currentChunkLines.length > 0) { chunks.push(currentChunkLines.join('\n')); } return chunks; } export function countTokens(text) { return encode(text).length; }