UNPKG

@jackhua/mini-langchain

Version:

A lightweight TypeScript implementation of LangChain with cost optimization features

402 lines (400 loc) 15.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.PromptOptimizer = exports.OptimizationStrategy = void 0; /** * Optimization strategies */ var OptimizationStrategy; (function (OptimizationStrategy) { OptimizationStrategy["REMOVE_REDUNDANCY"] = "remove_redundancy"; OptimizationStrategy["COMPRESS_INSTRUCTIONS"] = "compress_instructions"; OptimizationStrategy["SIMPLIFY_LANGUAGE"] = "simplify_language"; OptimizationStrategy["REMOVE_FILLER"] = "remove_filler"; OptimizationStrategy["STRUCTURE_OPTIMIZATION"] = "structure_optimization"; OptimizationStrategy["SEMANTIC_COMPRESSION"] = "semantic_compression"; })(OptimizationStrategy || (exports.OptimizationStrategy = OptimizationStrategy = {})); /** * Built-in Prompt Optimizer * Optimizes prompts to reduce tokens while maintaining effectiveness */ class PromptOptimizer { constructor(config) { this.config = { strategies: [ OptimizationStrategy.REMOVE_REDUNDANCY, OptimizationStrategy.REMOVE_FILLER, OptimizationStrategy.COMPRESS_INSTRUCTIONS ], targetReduction: 30, preserveExamples: true, language: 'en', ...config }; this.stopWords = this.initializeStopWords(); } /** * Initialize stop words based on language */ initializeStopWords() { const baseStopWords = [ 'the', 'is', 'at', 'which', 'on', 'a', 'an', 'as', 'are', 'been', 'be', 'have', 'has', 'had', 'were', 'was', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'to', 'of', 'in', 'for', 'with', 'by', 'from', 'about', 'into', 'that', 'this', 'these', 'those' ]; const fillerWords = [ 'really', 'very', 'quite', 'rather', 'somewhat', 'basically', 'actually', 'literally', 'simply', 'just', 'maybe', 'perhaps', 'probably', 'possibly' ]; const redundantPhrases = [ 'in order to', 'at this point in time', 'due to the fact that', 'in the event that', 'for the purpose of', 'with regard to' ]; const allStopWords = new Set([ ...baseStopWords, ...fillerWords, ...(this.config.customStopWords || []) ]); return allStopWords; } /** * Optimize a prompt */ async optimize(prompt) { let optimizedPrompt = prompt; const appliedStrategies = []; const warnings = []; // Track original token count const originalTokenCount = this.estimateTokens(prompt); // Apply each optimization strategy for (const strategy of this.config.strategies || []) { const result = await this.applyStrategy(optimizedPrompt, strategy); if (result.modified) { optimizedPrompt = result.prompt; appliedStrategies.push(strategy); if (result.warning) { warnings.push(result.warning); } } } // Apply LLM-based optimization if available if (this.config.llm && this.shouldUseLLMOptimization(prompt)) { const llmResult = await this.optimizeWithLLM(optimizedPrompt); if (llmResult.modified) { optimizedPrompt = llmResult.prompt; appliedStrategies.push(OptimizationStrategy.SEMANTIC_COMPRESSION); } } // Calculate final metrics const optimizedTokenCount = this.estimateTokens(optimizedPrompt); const reductionPercentage = ((originalTokenCount - optimizedTokenCount) / originalTokenCount) * 100; return { optimizedPrompt, originalTokenCount, optimizedTokenCount, reductionPercentage, appliedStrategies, warnings: warnings.length > 0 ? warnings : undefined }; } /** * Apply a specific optimization strategy */ async applyStrategy(prompt, strategy) { switch (strategy) { case OptimizationStrategy.REMOVE_REDUNDANCY: return this.removeRedundancy(prompt); case OptimizationStrategy.REMOVE_FILLER: return this.removeFillerWords(prompt); case OptimizationStrategy.COMPRESS_INSTRUCTIONS: return this.compressInstructions(prompt); case OptimizationStrategy.SIMPLIFY_LANGUAGE: return this.simplifyLanguage(prompt); case OptimizationStrategy.STRUCTURE_OPTIMIZATION: return this.optimizeStructure(prompt); default: return { prompt, modified: false }; } } /** * Remove redundant words and phrases */ removeRedundancy(prompt) { let modified = false; let result = prompt; // Remove duplicate words result = result.replace(/\b(\w+)\s+\1\b/gi, '$1'); if (result !== prompt) modified = true; // Replace redundant phrases const redundantPhrases = new Map([ ['in order to', 'to'], ['at this point in time', 'now'], ['due to the fact that', 'because'], ['in the event that', 'if'], ['for the purpose of', 'for'], ['with regard to', 'about'], ['in terms of', 'regarding'], ['as a matter of fact', 'actually'], ['at the end of the day', 'ultimately'], ['in light of the fact that', 'since'] ]); for (const [verbose, concise] of redundantPhrases) { const regex = new RegExp(verbose, 'gi'); if (regex.test(result)) { result = result.replace(regex, concise); modified = true; } } return { prompt: result, modified }; } /** * Remove filler words */ removeFillerWords(prompt) { let modified = false; let result = prompt; // Preserve sections that should not be modified const preservedSections = this.extractPreservedSections(prompt); // Remove filler words from non-preserved sections const words = result.split(/\s+/); const filteredWords = words.filter((word, index) => { // Check if this word is in a preserved section if (this.isInPreservedSection(index, preservedSections)) { return true; } const cleanWord = word.toLowerCase().replace(/[.,!?;:]$/, ''); if (this.stopWords.has(cleanWord)) { // Keep stop words that are important for sentence structure const prevWord = words[index - 1]?.toLowerCase(); const nextWord = words[index + 1]?.toLowerCase(); // Keep articles before nouns if (['a', 'an', 'the'].includes(cleanWord) && nextWord && !this.stopWords.has(nextWord)) { return true; } // Keep prepositions that provide important context if (['in', 'on', 'at', 'by', 'for', 'with'].includes(cleanWord)) { return true; } modified = true; return false; } return true; }); result = filteredWords.join(' '); // Clean up extra spaces result = result.replace(/\s+/g, ' ').trim(); return { prompt: result, modified, warning: modified ? 'Some filler words removed. Review for clarity.' : undefined }; } /** * Compress instructions to be more concise */ compressInstructions(prompt) { let modified = false; let result = prompt; // Common instruction patterns that can be compressed const compressionRules = [ // "Please make sure to..." → "Ensure..." { pattern: /please make sure to/gi, replacement: 'ensure' }, // "I would like you to..." → "Please..." { pattern: /I would like you to/gi, replacement: 'Please' }, // "Can you please..." → "Please..." { pattern: /Can you please/gi, replacement: 'Please' }, // "It is important that..." → "Important:" { pattern: /It is important that/gi, replacement: 'Important:' }, // "You should..." → "Must..." (for strong requirements) { pattern: /You should always/gi, replacement: 'Always' }, // "Make sure that..." → "Ensure..." { pattern: /Make sure that/gi, replacement: 'Ensure' }, // "In addition to..." → "Also..." { pattern: /In addition to/gi, replacement: 'Also' }, // "Do not forget to..." → "Remember to..." { pattern: /Do not forget to/gi, replacement: 'Remember to' } ]; for (const rule of compressionRules) { if (rule.pattern.test(result)) { result = result.replace(rule.pattern, rule.replacement); modified = true; } } // Compress lists result = this.compressLists(result); if (result !== prompt) modified = true; return { prompt: result, modified }; } /** * Simplify complex language */ simplifyLanguage(prompt) { let modified = false; let result = prompt; // Replace complex words with simpler alternatives const simplifications = new Map([ ['utilize', 'use'], ['implement', 'do'], ['facilitate', 'help'], ['endeavor', 'try'], ['commence', 'start'], ['terminate', 'end'], ['subsequent', 'next'], ['prior to', 'before'], ['in lieu of', 'instead of'], ['notwithstanding', 'despite'] ]); for (const [complex, simple] of simplifications) { const regex = new RegExp(`\\b${complex}\\b`, 'gi'); if (regex.test(result)) { result = result.replace(regex, simple); modified = true; } } return { prompt: result, modified }; } /** * Optimize prompt structure */ optimizeStructure(prompt) { let modified = false; let result = prompt; // Convert bullet points to concise format result = result.replace(/^[\s]*[-•*]\s*/gm, '• '); // Remove excessive newlines result = result.replace(/\n{3,}/g, '\n\n'); // Trim whitespace from lines result = result.split('\n').map(line => line.trim()).join('\n'); if (result !== prompt) modified = true; return { prompt: result, modified }; } /** * Use LLM for semantic compression */ async optimizeWithLLM(prompt) { if (!this.config.llm) { return { prompt, modified: false }; } try { const optimizationPrompt = `Compress the following prompt while preserving its meaning and all important details. Make it more concise without losing critical information: Original prompt: """ ${prompt} """ Compressed prompt:`; const result = await this.config.llm.call(optimizationPrompt); // Verify the optimization is actually shorter if (this.estimateTokens(result) < this.estimateTokens(prompt)) { return { prompt: result.trim(), modified: true }; } } catch (error) { console.warn('LLM optimization failed:', error); } return { prompt, modified: false }; } /** * Determine if LLM optimization should be used */ shouldUseLLMOptimization(prompt) { // Use LLM for longer, complex prompts const tokenCount = this.estimateTokens(prompt); return tokenCount > 200; } /** * Extract sections that should be preserved (examples, code, etc.) */ extractPreservedSections(prompt) { const sections = []; // Preserve code blocks const codeBlockRegex = /```[\s\S]*?```/g; let match; while ((match = codeBlockRegex.exec(prompt)) !== null) { sections.push({ start: match.index, end: match.index + match[0].length, type: 'code' }); } // Preserve quoted text const quoteRegex = /"[^"]+"/g; while ((match = quoteRegex.exec(prompt)) !== null) { sections.push({ start: match.index, end: match.index + match[0].length, type: 'quote' }); } // Preserve examples if configured if (this.config.preserveExamples) { const exampleRegex = /example[s]?:[\s\S]*?(?=\n\n|$)/gi; while ((match = exampleRegex.exec(prompt)) !== null) { sections.push({ start: match.index, end: match.index + match[0].length, type: 'example' }); } } return sections; } /** * Check if a position is within a preserved section */ isInPreservedSection(position, sections) { return sections.some(section => position >= section.start && position <= section.end); } /** * Compress lists in the prompt */ compressLists(prompt) { // Convert verbose lists to concise format let result = prompt; // "First, ... Second, ... Third, ..." → "1) ... 2) ... 3) ..." result = result.replace(/First,/gi, '1)'); result = result.replace(/Second,/gi, '2)'); result = result.replace(/Third,/gi, '3)'); result = result.replace(/Fourth,/gi, '4)'); result = result.replace(/Fifth,/gi, '5)'); return result; } /** * Estimate token count (rough approximation) */ estimateTokens(text) { // Rough estimation: 1 token ≈ 4 characters // This is a simplified version - in production, use proper tokenizer return Math.ceil(text.length / 4); } /** * Get optimization statistics */ getOptimizationStats(original, optimized) { const originalChars = original.length; const optimizedChars = optimized.length; const originalWords = original.split(/\s+/).length; const optimizedWords = optimized.split(/\s+/).length; const originalLines = original.split('\n').length; const optimizedLines = optimized.split('\n').length; const charReduction = ((originalChars - optimizedChars) / originalChars) * 100; const wordReduction = ((originalWords - optimizedWords) / originalWords) * 100; const lineReduction = ((originalLines - optimizedLines) / originalLines) * 100; // Estimate cost saving (assuming $0.01 per 1K tokens) const tokenReduction = this.estimateTokens(original) - this.estimateTokens(optimized); const estimatedCostSaving = (tokenReduction / 1000) * 0.01; return { characterReduction: Math.round(charReduction), wordReduction: Math.round(wordReduction), lineReduction: Math.round(lineReduction), estimatedCostSaving: Number(estimatedCostSaving.toFixed(4)) }; } } exports.PromptOptimizer = PromptOptimizer; //# sourceMappingURL=prompt-optimizer.js.map