@yogesh0333/yogiway-prompt
Version:
Free & Open Source Prompt Optimization Library - Save 30-50% on AI API costs. Multi-language, multi-platform support.
174 lines (173 loc) • 5.86 kB
JavaScript
;
/**
* Context Window Manager
* Intelligently manages context windows for large prompts
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.manageContextWindow = manageContextWindow;
const tokenizer_1 = require("./tokenizer");
/**
* Smart context window management
*/
function manageContextWindow(prompt, options) {
const { maxTokens, reserveTokens = 1000, priorityScorer, overlap = 0, } = options;
const availableTokens = maxTokens - reserveTokens;
const totalTokens = (0, tokenizer_1.countTokens)(prompt);
// If prompt fits, return as-is
if (totalTokens <= availableTokens) {
return {
chunks: [prompt],
totalTokens,
chunkTokens: [totalTokens],
metadata: {
originalSize: prompt.length,
optimizedSize: prompt.length,
reduction: 0,
},
};
}
// Need to chunk
const chunks = smartChunk(prompt, availableTokens, overlap, priorityScorer);
const chunkTokens = chunks.map((chunk) => (0, tokenizer_1.countTokens)(chunk));
return {
chunks,
totalTokens,
chunkTokens,
metadata: {
originalSize: prompt.length,
optimizedSize: prompt.length, // Same size, just chunked
reduction: 0,
},
};
}
/**
* Smart chunking with priority scoring
* Improved: Better handling of large paragraphs and code blocks
*/
function smartChunk(text, maxTokens, overlap, priorityScorer) {
// First, preserve code blocks
const codeBlocks = [];
let processedText = text.replace(/```[\s\S]*?```/g, (match) => {
codeBlocks.push(match);
return `__CODE_${codeBlocks.length - 1}__`;
});
// Split by paragraphs first (preserve structure)
const paragraphs = processedText.split(/\n\s*\n/);
const chunks = [];
let currentChunk = '';
for (const paragraph of paragraphs) {
const paraTokens = (0, tokenizer_1.countTokens)(paragraph);
const currentTokens = (0, tokenizer_1.countTokens)(currentChunk);
// If paragraph alone exceeds limit, split it
if (paraTokens > maxTokens) {
// Save current chunk if not empty
if (currentChunk) {
chunks.push(currentChunk);
currentChunk = '';
}
// Split large paragraph by sentences
const sentences = paragraph.split(/[.!?]+\s+/);
for (const sentence of sentences) {
const sentTokens = (0, tokenizer_1.countTokens)(sentence);
if ((0, tokenizer_1.countTokens)(currentChunk) + sentTokens > maxTokens) {
if (currentChunk) {
chunks.push(currentChunk);
currentChunk = sentence;
}
else {
// Even single sentence is too large, split by words
chunks.push(...splitByWords(sentence, maxTokens));
}
}
else {
currentChunk += (currentChunk ? ' ' : '') + sentence;
}
}
}
else if (currentTokens + paraTokens > maxTokens) {
// Current chunk + paragraph exceeds limit
if (currentChunk) {
chunks.push(currentChunk);
// Add overlap if specified
if (overlap > 0 && chunks.length > 0) {
const overlapText = getOverlapText(currentChunk, overlap);
currentChunk = overlapText + '\n\n' + paragraph;
}
else {
currentChunk = paragraph;
}
}
else {
currentChunk = paragraph;
}
}
else {
// Add to current chunk
currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
}
}
// Add remaining chunk
if (currentChunk) {
chunks.push(currentChunk);
}
// Restore code blocks
chunks.forEach((chunk, i) => {
codeBlocks.forEach((block, j) => {
chunks[i] = chunks[i].replace(`__CODE_${j}__`, block);
});
});
// Sort by priority if scorer provided
if (priorityScorer) {
chunks.sort((a, b) => priorityScorer(b) - priorityScorer(a));
}
return chunks;
}
/**
* Split text by words when sentences are too large
*/
function splitByWords(text, maxTokens) {
const words = text.split(/\s+/);
const chunks = [];
let currentChunk = '';
for (const word of words) {
const testChunk = currentChunk + (currentChunk ? ' ' : '') + word;
if ((0, tokenizer_1.countTokens)(testChunk) > maxTokens) {
if (currentChunk) {
chunks.push(currentChunk);
currentChunk = word;
}
else {
// Single word exceeds limit (rare), just add it
chunks.push(word);
}
}
else {
currentChunk = testChunk;
}
}
if (currentChunk) {
chunks.push(currentChunk);
}
return chunks;
}
/**
* Get overlap text from end of chunk
*/
function getOverlapText(text, overlapTokens) {
const words = text.split(/\s+/);
let overlapText = '';
let tokens = 0;
// Get last N words that fit in overlap
for (let i = words.length - 1; i >= 0 && tokens < overlapTokens; i--) {
const word = words[i];
const wordTokens = (0, tokenizer_1.countTokens)(word);
if (tokens + wordTokens <= overlapTokens) {
overlapText = word + (overlapText ? ' ' : '') + overlapText;
tokens += wordTokens;
}
else {
break;
}
}
return overlapText;
}