UNPKG

@yogesh0333/yogiway-prompt

Version:

Free & Open Source Prompt Optimization Library - Save 30-50% on AI API costs. Multi-language, multi-platform support.

174 lines (173 loc) 5.86 kB
"use strict"; /** * Context Window Manager * Intelligently manages context windows for large prompts */ Object.defineProperty(exports, "__esModule", { value: true }); exports.manageContextWindow = manageContextWindow; const tokenizer_1 = require("./tokenizer"); /** * Smart context window management */ function manageContextWindow(prompt, options) { const { maxTokens, reserveTokens = 1000, priorityScorer, overlap = 0, } = options; const availableTokens = maxTokens - reserveTokens; const totalTokens = (0, tokenizer_1.countTokens)(prompt); // If prompt fits, return as-is if (totalTokens <= availableTokens) { return { chunks: [prompt], totalTokens, chunkTokens: [totalTokens], metadata: { originalSize: prompt.length, optimizedSize: prompt.length, reduction: 0, }, }; } // Need to chunk const chunks = smartChunk(prompt, availableTokens, overlap, priorityScorer); const chunkTokens = chunks.map((chunk) => (0, tokenizer_1.countTokens)(chunk)); return { chunks, totalTokens, chunkTokens, metadata: { originalSize: prompt.length, optimizedSize: prompt.length, // Same size, just chunked reduction: 0, }, }; } /** * Smart chunking with priority scoring * Improved: Better handling of large paragraphs and code blocks */ function smartChunk(text, maxTokens, overlap, priorityScorer) { // First, preserve code blocks const codeBlocks = []; let processedText = text.replace(/```[\s\S]*?```/g, (match) => { codeBlocks.push(match); return `__CODE_${codeBlocks.length - 1}__`; }); // Split by paragraphs first (preserve structure) const paragraphs = processedText.split(/\n\s*\n/); const chunks = []; let currentChunk = ''; for (const paragraph of paragraphs) { const paraTokens = (0, tokenizer_1.countTokens)(paragraph); const currentTokens = (0, tokenizer_1.countTokens)(currentChunk); // If paragraph alone exceeds limit, split it if (paraTokens > maxTokens) { // Save current chunk if not empty if (currentChunk) { chunks.push(currentChunk); currentChunk = ''; } // Split large paragraph by sentences const sentences = paragraph.split(/[.!?]+\s+/); for (const sentence of sentences) { const sentTokens = (0, tokenizer_1.countTokens)(sentence); if ((0, tokenizer_1.countTokens)(currentChunk) + sentTokens > maxTokens) { if (currentChunk) { chunks.push(currentChunk); currentChunk = sentence; } else { // Even single sentence is too large, split by words chunks.push(...splitByWords(sentence, maxTokens)); } } else { currentChunk += (currentChunk ? ' ' : '') + sentence; } } } else if (currentTokens + paraTokens > maxTokens) { // Current chunk + paragraph exceeds limit if (currentChunk) { chunks.push(currentChunk); // Add overlap if specified if (overlap > 0 && chunks.length > 0) { const overlapText = getOverlapText(currentChunk, overlap); currentChunk = overlapText + '\n\n' + paragraph; } else { currentChunk = paragraph; } } else { currentChunk = paragraph; } } else { // Add to current chunk currentChunk += (currentChunk ? '\n\n' : '') + paragraph; } } // Add remaining chunk if (currentChunk) { chunks.push(currentChunk); } // Restore code blocks chunks.forEach((chunk, i) => { codeBlocks.forEach((block, j) => { chunks[i] = chunks[i].replace(`__CODE_${j}__`, block); }); }); // Sort by priority if scorer provided if (priorityScorer) { chunks.sort((a, b) => priorityScorer(b) - priorityScorer(a)); } return chunks; } /** * Split text by words when sentences are too large */ function splitByWords(text, maxTokens) { const words = text.split(/\s+/); const chunks = []; let currentChunk = ''; for (const word of words) { const testChunk = currentChunk + (currentChunk ? ' ' : '') + word; if ((0, tokenizer_1.countTokens)(testChunk) > maxTokens) { if (currentChunk) { chunks.push(currentChunk); currentChunk = word; } else { // Single word exceeds limit (rare), just add it chunks.push(word); } } else { currentChunk = testChunk; } } if (currentChunk) { chunks.push(currentChunk); } return chunks; } /** * Get overlap text from end of chunk */ function getOverlapText(text, overlapTokens) { const words = text.split(/\s+/); let overlapText = ''; let tokens = 0; // Get last N words that fit in overlap for (let i = words.length - 1; i >= 0 && tokens < overlapTokens; i--) { const word = words[i]; const wordTokens = (0, tokenizer_1.countTokens)(word); if (tokens + wordTokens <= overlapTokens) { overlapText = word + (overlapText ? ' ' : '') + overlapText; tokens += wordTokens; } else { break; } } return overlapText; }