UNPKG

@ooples/token-optimizer-mcp

Version:

Intelligent context window optimization for Claude Code - store content externally via caching and compression, freeing up your context window for what matters

443 lines 17.6 kB
/** * Smart Grep Tool - 80% Token Reduction * * Achieves token reduction through: * 1. Match-only output (line numbers + matched text, not full files) * 2. Context line control (configurable before/after lines) * 3. Pattern caching (reuse search results) * 4. Result pagination (limit matches returned) * 5. Smart file filtering (skip binary, node_modules, etc.) * * Target: 80% reduction vs returning full file contents */ import { readFileSync, statSync } from 'fs'; import { globSync } from 'glob'; import { relative, join } from 'path'; import { homedir } from 'os'; import { CacheEngine } from '../../core/cache-engine.js'; import { TokenCounter } from '../../core/token-counter.js'; import { MetricsCollector } from '../../core/metrics.js'; import { generateCacheKey } from '../shared/hash-utils.js'; import { detectFileType } from '../shared/syntax-utils.js'; export class SmartGrepTool { cache; tokenCounter; metrics; constructor(cache, tokenCounter, metrics) { this.cache = cache; this.tokenCounter = tokenCounter; this.metrics = metrics; } /** * Smart grep with match-only output and context control */ async grep(pattern, options = {}) { const startTime = Date.now(); // Default options const opts = { cwd: options.cwd ?? process.cwd(), files: options.files ?? ['**/*'], caseSensitive: options.caseSensitive ?? false, wholeWord: options.wholeWord ?? false, regex: options.regex ?? false, extensions: options.extensions ?? [], excludeExtensions: options.excludeExtensions ?? [ '.min.js', '.map', '.lock', ], skipBinary: options.skipBinary ?? true, ignore: options.ignore ?? [ '**/node_modules/**', '**/.git/**', '**/dist/**', '**/build/**', ], includeContext: options.includeContext ?? false, contextBefore: options.contextBefore ?? 0, contextAfter: options.contextAfter ?? 0, includeColumn: options.includeColumn ?? false, maxMatchesPerFile: options.maxMatchesPerFile ?? Infinity, limit: options.limit ?? Infinity, offset: options.offset ?? 0, filesWithMatches: options.filesWithMatches ?? false, count: options.count ?? false, useCache: options.useCache ?? true, ttl: options.ttl ?? 300, maxFileSize: options.maxFileSize ?? 10 * 1024 * 1024, // 10MB default encoding: options.encoding ?? 'utf-8', }; try { // Check cache first const cacheKey = generateCacheKey('grep', { pattern, options: opts }); if (opts.useCache) { const cached = this.cache.get(cacheKey); if (cached) { const result = JSON.parse(cached.toString()); result.metadata.cacheHit = true; const duration = Date.now() - startTime; this.metrics.record({ operation: 'smart_grep', duration, inputTokens: result.metadata.tokenCount, outputTokens: 0, cachedTokens: result.metadata.originalTokenCount, savedTokens: result.metadata.tokensSaved, success: true, cacheHit: true, }); return result; } } // Build search pattern const searchPattern = this.buildPattern(pattern, opts); // Find files to search let filesToSearch = []; for (const filePattern of opts.files) { const matches = globSync(filePattern, { cwd: opts.cwd, absolute: true, ignore: opts.ignore, nodir: true, }); filesToSearch.push(...matches); } // Filter files by extension and size filesToSearch = filesToSearch.filter((file) => { try { // Extension filter if (opts.extensions.length > 0) { const hasAllowedExt = opts.extensions.some((ext) => file.endsWith(ext)); if (!hasAllowedExt) return false; } const hasExcludedExt = opts.excludeExtensions.some((ext) => file.endsWith(ext)); if (hasExcludedExt) return false; // Size filter const stats = statSync(file); if (stats.size > opts.maxFileSize) return false; // Binary file filter if (opts.skipBinary && this.isBinaryFile(file)) return false; return true; } catch { return false; } }); const filesSearched = filesToSearch.length; // Search files const allMatches = []; const filesWithMatches = new Set(); const matchCounts = new Map(); for (const file of filesToSearch) { try { const content = readFileSync(file, opts.encoding); const lines = content.split('\n'); const fileMatches = []; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const matches = [...line.matchAll(searchPattern)]; for (const match of matches) { if (fileMatches.length >= opts.maxMatchesPerFile) break; const grepMatch = { file: relative(opts.cwd, file), lineNumber: i + 1, // 1-based line: line, match: match[0], }; // Add column if requested if (opts.includeColumn && match.index !== undefined) { grepMatch.column = match.index; } // Add context if requested if (opts.includeContext) { if (opts.contextBefore > 0) { const start = Math.max(0, i - opts.contextBefore); grepMatch.before = lines.slice(start, i); } if (opts.contextAfter > 0) { const end = Math.min(lines.length, i + opts.contextAfter + 1); grepMatch.after = lines.slice(i + 1, end); } } fileMatches.push(grepMatch); } } if (fileMatches.length > 0) { filesWithMatches.add(relative(opts.cwd, file)); matchCounts.set(relative(opts.cwd, file), fileMatches.length); allMatches.push(...fileMatches); } } catch { // Skip files we can't read continue; } } // Apply pagination const totalMatches = allMatches.length; const paginatedMatches = allMatches.slice(opts.offset, opts.offset + opts.limit); const truncated = totalMatches > paginatedMatches.length + opts.offset; // Build result based on mode let resultData; let resultTokens; if (opts.count) { // Count mode: return counts only resultData = { counts: Object.fromEntries(matchCounts) }; resultTokens = this.tokenCounter.count(JSON.stringify(resultData)).tokens; } else if (opts.filesWithMatches) { // Files-with-matches mode: return filenames only resultData = { files: Array.from(filesWithMatches) }; resultTokens = this.tokenCounter.count(JSON.stringify(resultData)).tokens; } else { // Normal mode: return matches resultData = { matches: paginatedMatches }; resultTokens = this.tokenCounter.count(JSON.stringify(resultData)).tokens; } // Estimate original tokens (if we had returned all file contents) let originalTokens = resultTokens; if (opts.count || opts.filesWithMatches) { // Count/files mode: estimate content would be 100x more tokens originalTokens = resultTokens * 100; } else if (!opts.includeContext) { // Match-only mode: estimate content would be 20x more tokens originalTokens = resultTokens * 20; } else { // Context mode: estimate content would be 5x more tokens originalTokens = resultTokens * 5; } const tokensSaved = originalTokens - resultTokens; const compressionRatio = resultTokens / originalTokens; // Build result const result = { success: true, pattern, metadata: { totalMatches, filesSearched, filesWithMatches: filesWithMatches.size, returnedMatches: opts.count || opts.filesWithMatches ? 0 : paginatedMatches.length, truncated, tokensSaved, tokenCount: resultTokens, originalTokenCount: originalTokens, compressionRatio, duration: 0, // Will be set below cacheHit: false, }, ...(opts.count ? { counts: matchCounts } : {}), ...(opts.filesWithMatches ? { files: Array.from(filesWithMatches) } : {}), ...(!opts.count && !opts.filesWithMatches ? { matches: paginatedMatches } : {}), }; // Cache result if (opts.useCache) { const resultString = JSON.stringify(result); const resultSize = Buffer.from(resultString, 'utf-8').length; this.cache.set(cacheKey, resultString, resultSize, resultSize); } // Record metrics const duration = Date.now() - startTime; result.metadata.duration = duration; this.metrics.record({ operation: 'smart_grep', duration, inputTokens: resultTokens, outputTokens: 0, cachedTokens: 0, savedTokens: tokensSaved, success: true, cacheHit: false, }); return result; } catch (error) { const duration = Date.now() - startTime; this.metrics.record({ operation: 'smart_grep', duration, inputTokens: 0, outputTokens: 0, cachedTokens: 0, savedTokens: 0, success: false, cacheHit: false, }); return { success: false, pattern, metadata: { totalMatches: 0, filesSearched: 0, filesWithMatches: 0, returnedMatches: 0, truncated: false, tokensSaved: 0, tokenCount: 0, originalTokenCount: 0, compressionRatio: 0, duration, cacheHit: false, }, error: error instanceof Error ? error.message : String(error), }; } } /** * Build search pattern from string */ buildPattern(pattern, opts) { let regexPattern = pattern; // Escape regex special characters if not in regex mode if (!opts.regex) { regexPattern = pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } // Add word boundary if whole word mode if (opts.wholeWord) { regexPattern = `\\b${regexPattern}\\b`; } // Build flags const flags = opts.caseSensitive ? 'g' : 'gi'; return new RegExp(regexPattern, flags); } /** * Check if a file is binary */ isBinaryFile(filePath) { try { // Read first 8KB to check for binary content const buffer = readFileSync(filePath, { encoding: null }).slice(0, 8192); // Check for null bytes (common in binary files) for (let i = 0; i < buffer.length; i++) { if (buffer[i] === 0) { return true; } } // Check file type const fileType = detectFileType(filePath); const binaryTypes = ['image', 'video', 'audio', 'binary', 'archive']; return binaryTypes.includes(fileType || ''); } catch { return false; } } /** * Get grep statistics */ getStats() { const grepMetrics = this.metrics.getOperations(0, 'smart_grep'); const totalSearches = grepMetrics.length; const cacheHits = grepMetrics.filter((m) => m.cacheHit).length; const totalTokensSaved = grepMetrics.reduce((sum, m) => sum + (m.savedTokens || 0), 0); const totalInputTokens = grepMetrics.reduce((sum, m) => sum + (m.inputTokens || 0), 0); const totalOriginalTokens = totalInputTokens + totalTokensSaved; const averageReduction = totalOriginalTokens > 0 ? (totalTokensSaved / totalOriginalTokens) * 100 : 0; return { totalSearches, cacheHits, totalTokensSaved, averageReduction, }; } } /** * Get smart grep tool instance */ export function getSmartGrepTool(cache, tokenCounter, metrics) { return new SmartGrepTool(cache, tokenCounter, metrics); } /** * CLI function - Creates resources and uses factory */ export async function runSmartGrep(pattern, options = {}) { const cache = new CacheEngine(join(homedir(), '.hypercontext', 'cache'), 100); const tokenCounter = new TokenCounter(); const metrics = new MetricsCollector(); const tool = getSmartGrepTool(cache, tokenCounter, metrics); return tool.grep(pattern, options); } /** * MCP Tool Definition */ export const SMART_GREP_TOOL_DEFINITION = { name: 'smart_grep', description: 'Search file contents with 80% token reduction through match-only output and smart filtering', inputSchema: { type: 'object', properties: { pattern: { type: 'string', description: 'Search pattern (string or regex)', }, cwd: { type: 'string', description: 'Working directory for search', }, files: { type: 'array', items: { type: 'string' }, description: 'File patterns to search (glob patterns)', }, caseSensitive: { type: 'boolean', description: 'Case-sensitive search', default: false, }, regex: { type: 'boolean', description: 'Treat pattern as regex', default: false, }, extensions: { type: 'array', items: { type: 'string' }, description: 'Search only these file extensions', }, includeContext: { type: 'boolean', description: 'Include context lines around matches', default: false, }, contextBefore: { type: 'number', description: 'Lines of context before match', default: 0, }, contextAfter: { type: 'number', description: 'Lines of context after match', default: 0, }, limit: { type: 'number', description: 'Maximum matches to return', }, filesWithMatches: { type: 'boolean', description: 'Only return filenames, not matches', default: false, }, count: { type: 'boolean', description: 'Only return match counts per file', default: false, }, }, required: ['pattern'], }, }; //# sourceMappingURL=smart-grep.js.map