@jackhua/mini-langchain
Version:
A lightweight TypeScript implementation of LangChain with cost optimization features
402 lines (400 loc) • 15.6 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.PromptOptimizer = exports.OptimizationStrategy = void 0;
/**
* Optimization strategies
*/
var OptimizationStrategy;
(function (OptimizationStrategy) {
OptimizationStrategy["REMOVE_REDUNDANCY"] = "remove_redundancy";
OptimizationStrategy["COMPRESS_INSTRUCTIONS"] = "compress_instructions";
OptimizationStrategy["SIMPLIFY_LANGUAGE"] = "simplify_language";
OptimizationStrategy["REMOVE_FILLER"] = "remove_filler";
OptimizationStrategy["STRUCTURE_OPTIMIZATION"] = "structure_optimization";
OptimizationStrategy["SEMANTIC_COMPRESSION"] = "semantic_compression";
})(OptimizationStrategy || (exports.OptimizationStrategy = OptimizationStrategy = {}));
/**
* Built-in Prompt Optimizer
* Optimizes prompts to reduce tokens while maintaining effectiveness
*/
class PromptOptimizer {
constructor(config) {
this.config = {
strategies: [
OptimizationStrategy.REMOVE_REDUNDANCY,
OptimizationStrategy.REMOVE_FILLER,
OptimizationStrategy.COMPRESS_INSTRUCTIONS
],
targetReduction: 30,
preserveExamples: true,
language: 'en',
...config
};
this.stopWords = this.initializeStopWords();
}
/**
* Initialize stop words based on language
*/
initializeStopWords() {
const baseStopWords = [
'the', 'is', 'at', 'which', 'on', 'a', 'an',
'as', 'are', 'been', 'be', 'have', 'has', 'had',
'were', 'was', 'will', 'would', 'could', 'should',
'may', 'might', 'must', 'shall', 'to', 'of', 'in',
'for', 'with', 'by', 'from', 'about', 'into',
'that', 'this', 'these', 'those'
];
const fillerWords = [
'really', 'very', 'quite', 'rather', 'somewhat',
'basically', 'actually', 'literally', 'simply',
'just', 'maybe', 'perhaps', 'probably', 'possibly'
];
const redundantPhrases = [
'in order to', 'at this point in time', 'due to the fact that',
'in the event that', 'for the purpose of', 'with regard to'
];
const allStopWords = new Set([
...baseStopWords,
...fillerWords,
...(this.config.customStopWords || [])
]);
return allStopWords;
}
/**
* Optimize a prompt
*/
async optimize(prompt) {
let optimizedPrompt = prompt;
const appliedStrategies = [];
const warnings = [];
// Track original token count
const originalTokenCount = this.estimateTokens(prompt);
// Apply each optimization strategy
for (const strategy of this.config.strategies || []) {
const result = await this.applyStrategy(optimizedPrompt, strategy);
if (result.modified) {
optimizedPrompt = result.prompt;
appliedStrategies.push(strategy);
if (result.warning) {
warnings.push(result.warning);
}
}
}
// Apply LLM-based optimization if available
if (this.config.llm && this.shouldUseLLMOptimization(prompt)) {
const llmResult = await this.optimizeWithLLM(optimizedPrompt);
if (llmResult.modified) {
optimizedPrompt = llmResult.prompt;
appliedStrategies.push(OptimizationStrategy.SEMANTIC_COMPRESSION);
}
}
// Calculate final metrics
const optimizedTokenCount = this.estimateTokens(optimizedPrompt);
const reductionPercentage = ((originalTokenCount - optimizedTokenCount) / originalTokenCount) * 100;
return {
optimizedPrompt,
originalTokenCount,
optimizedTokenCount,
reductionPercentage,
appliedStrategies,
warnings: warnings.length > 0 ? warnings : undefined
};
}
/**
* Apply a specific optimization strategy
*/
async applyStrategy(prompt, strategy) {
switch (strategy) {
case OptimizationStrategy.REMOVE_REDUNDANCY:
return this.removeRedundancy(prompt);
case OptimizationStrategy.REMOVE_FILLER:
return this.removeFillerWords(prompt);
case OptimizationStrategy.COMPRESS_INSTRUCTIONS:
return this.compressInstructions(prompt);
case OptimizationStrategy.SIMPLIFY_LANGUAGE:
return this.simplifyLanguage(prompt);
case OptimizationStrategy.STRUCTURE_OPTIMIZATION:
return this.optimizeStructure(prompt);
default:
return { prompt, modified: false };
}
}
/**
* Remove redundant words and phrases
*/
removeRedundancy(prompt) {
let modified = false;
let result = prompt;
// Remove duplicate words
result = result.replace(/\b(\w+)\s+\1\b/gi, '$1');
if (result !== prompt)
modified = true;
// Replace redundant phrases
const redundantPhrases = new Map([
['in order to', 'to'],
['at this point in time', 'now'],
['due to the fact that', 'because'],
['in the event that', 'if'],
['for the purpose of', 'for'],
['with regard to', 'about'],
['in terms of', 'regarding'],
['as a matter of fact', 'actually'],
['at the end of the day', 'ultimately'],
['in light of the fact that', 'since']
]);
for (const [verbose, concise] of redundantPhrases) {
const regex = new RegExp(verbose, 'gi');
if (regex.test(result)) {
result = result.replace(regex, concise);
modified = true;
}
}
return { prompt: result, modified };
}
/**
* Remove filler words
*/
removeFillerWords(prompt) {
let modified = false;
let result = prompt;
// Preserve sections that should not be modified
const preservedSections = this.extractPreservedSections(prompt);
// Remove filler words from non-preserved sections
const words = result.split(/\s+/);
const filteredWords = words.filter((word, index) => {
// Check if this word is in a preserved section
if (this.isInPreservedSection(index, preservedSections)) {
return true;
}
const cleanWord = word.toLowerCase().replace(/[.,!?;:]$/, '');
if (this.stopWords.has(cleanWord)) {
// Keep stop words that are important for sentence structure
const prevWord = words[index - 1]?.toLowerCase();
const nextWord = words[index + 1]?.toLowerCase();
// Keep articles before nouns
if (['a', 'an', 'the'].includes(cleanWord) && nextWord && !this.stopWords.has(nextWord)) {
return true;
}
// Keep prepositions that provide important context
if (['in', 'on', 'at', 'by', 'for', 'with'].includes(cleanWord)) {
return true;
}
modified = true;
return false;
}
return true;
});
result = filteredWords.join(' ');
// Clean up extra spaces
result = result.replace(/\s+/g, ' ').trim();
return {
prompt: result,
modified,
warning: modified ? 'Some filler words removed. Review for clarity.' : undefined
};
}
/**
* Compress instructions to be more concise
*/
compressInstructions(prompt) {
let modified = false;
let result = prompt;
// Common instruction patterns that can be compressed
const compressionRules = [
// "Please make sure to..." → "Ensure..."
{ pattern: /please make sure to/gi, replacement: 'ensure' },
// "I would like you to..." → "Please..."
{ pattern: /I would like you to/gi, replacement: 'Please' },
// "Can you please..." → "Please..."
{ pattern: /Can you please/gi, replacement: 'Please' },
// "It is important that..." → "Important:"
{ pattern: /It is important that/gi, replacement: 'Important:' },
// "You should..." → "Must..." (for strong requirements)
{ pattern: /You should always/gi, replacement: 'Always' },
// "Make sure that..." → "Ensure..."
{ pattern: /Make sure that/gi, replacement: 'Ensure' },
// "In addition to..." → "Also..."
{ pattern: /In addition to/gi, replacement: 'Also' },
// "Do not forget to..." → "Remember to..."
{ pattern: /Do not forget to/gi, replacement: 'Remember to' }
];
for (const rule of compressionRules) {
if (rule.pattern.test(result)) {
result = result.replace(rule.pattern, rule.replacement);
modified = true;
}
}
// Compress lists
result = this.compressLists(result);
if (result !== prompt)
modified = true;
return { prompt: result, modified };
}
/**
* Simplify complex language
*/
simplifyLanguage(prompt) {
let modified = false;
let result = prompt;
// Replace complex words with simpler alternatives
const simplifications = new Map([
['utilize', 'use'],
['implement', 'do'],
['facilitate', 'help'],
['endeavor', 'try'],
['commence', 'start'],
['terminate', 'end'],
['subsequent', 'next'],
['prior to', 'before'],
['in lieu of', 'instead of'],
['notwithstanding', 'despite']
]);
for (const [complex, simple] of simplifications) {
const regex = new RegExp(`\\b${complex}\\b`, 'gi');
if (regex.test(result)) {
result = result.replace(regex, simple);
modified = true;
}
}
return { prompt: result, modified };
}
/**
* Optimize prompt structure
*/
optimizeStructure(prompt) {
let modified = false;
let result = prompt;
// Convert bullet points to concise format
result = result.replace(/^[\s]*[-•*]\s*/gm, '• ');
// Remove excessive newlines
result = result.replace(/\n{3,}/g, '\n\n');
// Trim whitespace from lines
result = result.split('\n').map(line => line.trim()).join('\n');
if (result !== prompt)
modified = true;
return { prompt: result, modified };
}
/**
* Use LLM for semantic compression
*/
async optimizeWithLLM(prompt) {
if (!this.config.llm) {
return { prompt, modified: false };
}
try {
const optimizationPrompt = `Compress the following prompt while preserving its meaning and all important details. Make it more concise without losing critical information:
Original prompt:
"""
${prompt}
"""
Compressed prompt:`;
const result = await this.config.llm.call(optimizationPrompt);
// Verify the optimization is actually shorter
if (this.estimateTokens(result) < this.estimateTokens(prompt)) {
return { prompt: result.trim(), modified: true };
}
}
catch (error) {
console.warn('LLM optimization failed:', error);
}
return { prompt, modified: false };
}
/**
* Determine if LLM optimization should be used
*/
shouldUseLLMOptimization(prompt) {
// Use LLM for longer, complex prompts
const tokenCount = this.estimateTokens(prompt);
return tokenCount > 200;
}
/**
* Extract sections that should be preserved (examples, code, etc.)
*/
extractPreservedSections(prompt) {
const sections = [];
// Preserve code blocks
const codeBlockRegex = /```[\s\S]*?```/g;
let match;
while ((match = codeBlockRegex.exec(prompt)) !== null) {
sections.push({
start: match.index,
end: match.index + match[0].length,
type: 'code'
});
}
// Preserve quoted text
const quoteRegex = /"[^"]+"/g;
while ((match = quoteRegex.exec(prompt)) !== null) {
sections.push({
start: match.index,
end: match.index + match[0].length,
type: 'quote'
});
}
// Preserve examples if configured
if (this.config.preserveExamples) {
const exampleRegex = /example[s]?:[\s\S]*?(?=\n\n|$)/gi;
while ((match = exampleRegex.exec(prompt)) !== null) {
sections.push({
start: match.index,
end: match.index + match[0].length,
type: 'example'
});
}
}
return sections;
}
/**
* Check if a position is within a preserved section
*/
isInPreservedSection(position, sections) {
return sections.some(section => position >= section.start && position <= section.end);
}
/**
* Compress lists in the prompt
*/
compressLists(prompt) {
// Convert verbose lists to concise format
let result = prompt;
// "First, ... Second, ... Third, ..." → "1) ... 2) ... 3) ..."
result = result.replace(/First,/gi, '1)');
result = result.replace(/Second,/gi, '2)');
result = result.replace(/Third,/gi, '3)');
result = result.replace(/Fourth,/gi, '4)');
result = result.replace(/Fifth,/gi, '5)');
return result;
}
/**
* Estimate token count (rough approximation)
*/
estimateTokens(text) {
// Rough estimation: 1 token ≈ 4 characters
// This is a simplified version - in production, use proper tokenizer
return Math.ceil(text.length / 4);
}
/**
* Get optimization statistics
*/
getOptimizationStats(original, optimized) {
const originalChars = original.length;
const optimizedChars = optimized.length;
const originalWords = original.split(/\s+/).length;
const optimizedWords = optimized.split(/\s+/).length;
const originalLines = original.split('\n').length;
const optimizedLines = optimized.split('\n').length;
const charReduction = ((originalChars - optimizedChars) / originalChars) * 100;
const wordReduction = ((originalWords - optimizedWords) / originalWords) * 100;
const lineReduction = ((originalLines - optimizedLines) / originalLines) * 100;
// Estimate cost saving (assuming $0.01 per 1K tokens)
const tokenReduction = this.estimateTokens(original) - this.estimateTokens(optimized);
const estimatedCostSaving = (tokenReduction / 1000) * 0.01;
return {
characterReduction: Math.round(charReduction),
wordReduction: Math.round(wordReduction),
lineReduction: Math.round(lineReduction),
estimatedCostSaving: Number(estimatedCostSaving.toFixed(4))
};
}
}
exports.PromptOptimizer = PromptOptimizer;
//# sourceMappingURL=prompt-optimizer.js.map