@ooples/token-optimizer-mcp
Version:
Intelligent context window optimization for Claude Code - store content externally via caching and compression, freeing up your context window for what matters
443 lines • 17.6 kB
JavaScript
/**
* Smart Grep Tool - 80% Token Reduction
*
* Achieves token reduction through:
* 1. Match-only output (line numbers + matched text, not full files)
* 2. Context line control (configurable before/after lines)
* 3. Pattern caching (reuse search results)
* 4. Result pagination (limit matches returned)
* 5. Smart file filtering (skip binary, node_modules, etc.)
*
* Target: 80% reduction vs returning full file contents
*/
import { readFileSync, statSync } from 'fs';
import { globSync } from 'glob';
import { relative, join } from 'path';
import { homedir } from 'os';
import { CacheEngine } from '../../core/cache-engine.js';
import { TokenCounter } from '../../core/token-counter.js';
import { MetricsCollector } from '../../core/metrics.js';
import { generateCacheKey } from '../shared/hash-utils.js';
import { detectFileType } from '../shared/syntax-utils.js';
export class SmartGrepTool {
cache;
tokenCounter;
metrics;
constructor(cache, tokenCounter, metrics) {
this.cache = cache;
this.tokenCounter = tokenCounter;
this.metrics = metrics;
}
/**
* Smart grep with match-only output and context control
*/
async grep(pattern, options = {}) {
const startTime = Date.now();
// Default options
const opts = {
cwd: options.cwd ?? process.cwd(),
files: options.files ?? ['**/*'],
caseSensitive: options.caseSensitive ?? false,
wholeWord: options.wholeWord ?? false,
regex: options.regex ?? false,
extensions: options.extensions ?? [],
excludeExtensions: options.excludeExtensions ?? [
'.min.js',
'.map',
'.lock',
],
skipBinary: options.skipBinary ?? true,
ignore: options.ignore ?? [
'**/node_modules/**',
'**/.git/**',
'**/dist/**',
'**/build/**',
],
includeContext: options.includeContext ?? false,
contextBefore: options.contextBefore ?? 0,
contextAfter: options.contextAfter ?? 0,
includeColumn: options.includeColumn ?? false,
maxMatchesPerFile: options.maxMatchesPerFile ?? Infinity,
limit: options.limit ?? Infinity,
offset: options.offset ?? 0,
filesWithMatches: options.filesWithMatches ?? false,
count: options.count ?? false,
useCache: options.useCache ?? true,
ttl: options.ttl ?? 300,
maxFileSize: options.maxFileSize ?? 10 * 1024 * 1024, // 10MB default
encoding: options.encoding ?? 'utf-8',
};
try {
// Check cache first
const cacheKey = generateCacheKey('grep', { pattern, options: opts });
if (opts.useCache) {
const cached = this.cache.get(cacheKey);
if (cached) {
const result = JSON.parse(cached.toString());
result.metadata.cacheHit = true;
const duration = Date.now() - startTime;
this.metrics.record({
operation: 'smart_grep',
duration,
inputTokens: result.metadata.tokenCount,
outputTokens: 0,
cachedTokens: result.metadata.originalTokenCount,
savedTokens: result.metadata.tokensSaved,
success: true,
cacheHit: true,
});
return result;
}
}
// Build search pattern
const searchPattern = this.buildPattern(pattern, opts);
// Find files to search
let filesToSearch = [];
for (const filePattern of opts.files) {
const matches = globSync(filePattern, {
cwd: opts.cwd,
absolute: true,
ignore: opts.ignore,
nodir: true,
});
filesToSearch.push(...matches);
}
// Filter files by extension and size
filesToSearch = filesToSearch.filter((file) => {
try {
// Extension filter
if (opts.extensions.length > 0) {
const hasAllowedExt = opts.extensions.some((ext) => file.endsWith(ext));
if (!hasAllowedExt)
return false;
}
const hasExcludedExt = opts.excludeExtensions.some((ext) => file.endsWith(ext));
if (hasExcludedExt)
return false;
// Size filter
const stats = statSync(file);
if (stats.size > opts.maxFileSize)
return false;
// Binary file filter
if (opts.skipBinary && this.isBinaryFile(file))
return false;
return true;
}
catch {
return false;
}
});
const filesSearched = filesToSearch.length;
// Search files
const allMatches = [];
const filesWithMatches = new Set();
const matchCounts = new Map();
for (const file of filesToSearch) {
try {
const content = readFileSync(file, opts.encoding);
const lines = content.split('\n');
const fileMatches = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const matches = [...line.matchAll(searchPattern)];
for (const match of matches) {
if (fileMatches.length >= opts.maxMatchesPerFile)
break;
const grepMatch = {
file: relative(opts.cwd, file),
lineNumber: i + 1, // 1-based
line: line,
match: match[0],
};
// Add column if requested
if (opts.includeColumn && match.index !== undefined) {
grepMatch.column = match.index;
}
// Add context if requested
if (opts.includeContext) {
if (opts.contextBefore > 0) {
const start = Math.max(0, i - opts.contextBefore);
grepMatch.before = lines.slice(start, i);
}
if (opts.contextAfter > 0) {
const end = Math.min(lines.length, i + opts.contextAfter + 1);
grepMatch.after = lines.slice(i + 1, end);
}
}
fileMatches.push(grepMatch);
}
}
if (fileMatches.length > 0) {
filesWithMatches.add(relative(opts.cwd, file));
matchCounts.set(relative(opts.cwd, file), fileMatches.length);
allMatches.push(...fileMatches);
}
}
catch {
// Skip files we can't read
continue;
}
}
// Apply pagination
const totalMatches = allMatches.length;
const paginatedMatches = allMatches.slice(opts.offset, opts.offset + opts.limit);
const truncated = totalMatches > paginatedMatches.length + opts.offset;
// Build result based on mode
let resultData;
let resultTokens;
if (opts.count) {
// Count mode: return counts only
resultData = { counts: Object.fromEntries(matchCounts) };
resultTokens = this.tokenCounter.count(JSON.stringify(resultData)).tokens;
}
else if (opts.filesWithMatches) {
// Files-with-matches mode: return filenames only
resultData = { files: Array.from(filesWithMatches) };
resultTokens = this.tokenCounter.count(JSON.stringify(resultData)).tokens;
}
else {
// Normal mode: return matches
resultData = { matches: paginatedMatches };
resultTokens = this.tokenCounter.count(JSON.stringify(resultData)).tokens;
}
// Estimate original tokens (if we had returned all file contents)
let originalTokens = resultTokens;
if (opts.count || opts.filesWithMatches) {
// Count/files mode: estimate content would be 100x more tokens
originalTokens = resultTokens * 100;
}
else if (!opts.includeContext) {
// Match-only mode: estimate content would be 20x more tokens
originalTokens = resultTokens * 20;
}
else {
// Context mode: estimate content would be 5x more tokens
originalTokens = resultTokens * 5;
}
const tokensSaved = originalTokens - resultTokens;
const compressionRatio = resultTokens / originalTokens;
// Build result
const result = {
success: true,
pattern,
metadata: {
totalMatches,
filesSearched,
filesWithMatches: filesWithMatches.size,
returnedMatches: opts.count || opts.filesWithMatches ? 0 : paginatedMatches.length,
truncated,
tokensSaved,
tokenCount: resultTokens,
originalTokenCount: originalTokens,
compressionRatio,
duration: 0, // Will be set below
cacheHit: false,
},
...(opts.count ? { counts: matchCounts } : {}),
...(opts.filesWithMatches
? { files: Array.from(filesWithMatches) }
: {}),
...(!opts.count && !opts.filesWithMatches
? { matches: paginatedMatches }
: {}),
};
// Cache result
if (opts.useCache) {
const resultString = JSON.stringify(result);
const resultSize = Buffer.from(resultString, 'utf-8').length;
this.cache.set(cacheKey, resultString, resultSize, resultSize);
}
// Record metrics
const duration = Date.now() - startTime;
result.metadata.duration = duration;
this.metrics.record({
operation: 'smart_grep',
duration,
inputTokens: resultTokens,
outputTokens: 0,
cachedTokens: 0,
savedTokens: tokensSaved,
success: true,
cacheHit: false,
});
return result;
}
catch (error) {
const duration = Date.now() - startTime;
this.metrics.record({
operation: 'smart_grep',
duration,
inputTokens: 0,
outputTokens: 0,
cachedTokens: 0,
savedTokens: 0,
success: false,
cacheHit: false,
});
return {
success: false,
pattern,
metadata: {
totalMatches: 0,
filesSearched: 0,
filesWithMatches: 0,
returnedMatches: 0,
truncated: false,
tokensSaved: 0,
tokenCount: 0,
originalTokenCount: 0,
compressionRatio: 0,
duration,
cacheHit: false,
},
error: error instanceof Error ? error.message : String(error),
};
}
}
/**
* Build search pattern from string
*/
buildPattern(pattern, opts) {
let regexPattern = pattern;
// Escape regex special characters if not in regex mode
if (!opts.regex) {
regexPattern = pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
// Add word boundary if whole word mode
if (opts.wholeWord) {
regexPattern = `\\b${regexPattern}\\b`;
}
// Build flags
const flags = opts.caseSensitive ? 'g' : 'gi';
return new RegExp(regexPattern, flags);
}
/**
* Check if a file is binary
*/
isBinaryFile(filePath) {
try {
// Read first 8KB to check for binary content
const buffer = readFileSync(filePath, { encoding: null }).slice(0, 8192);
// Check for null bytes (common in binary files)
for (let i = 0; i < buffer.length; i++) {
if (buffer[i] === 0) {
return true;
}
}
// Check file type
const fileType = detectFileType(filePath);
const binaryTypes = ['image', 'video', 'audio', 'binary', 'archive'];
return binaryTypes.includes(fileType || '');
}
catch {
return false;
}
}
/**
* Get grep statistics
*/
getStats() {
const grepMetrics = this.metrics.getOperations(0, 'smart_grep');
const totalSearches = grepMetrics.length;
const cacheHits = grepMetrics.filter((m) => m.cacheHit).length;
const totalTokensSaved = grepMetrics.reduce((sum, m) => sum + (m.savedTokens || 0), 0);
const totalInputTokens = grepMetrics.reduce((sum, m) => sum + (m.inputTokens || 0), 0);
const totalOriginalTokens = totalInputTokens + totalTokensSaved;
const averageReduction = totalOriginalTokens > 0
? (totalTokensSaved / totalOriginalTokens) * 100
: 0;
return {
totalSearches,
cacheHits,
totalTokensSaved,
averageReduction,
};
}
}
/**
* Get smart grep tool instance
*/
export function getSmartGrepTool(cache, tokenCounter, metrics) {
return new SmartGrepTool(cache, tokenCounter, metrics);
}
/**
* CLI function - Creates resources and uses factory
*/
export async function runSmartGrep(pattern, options = {}) {
const cache = new CacheEngine(join(homedir(), '.hypercontext', 'cache'), 100);
const tokenCounter = new TokenCounter();
const metrics = new MetricsCollector();
const tool = getSmartGrepTool(cache, tokenCounter, metrics);
return tool.grep(pattern, options);
}
/**
* MCP Tool Definition
*/
export const SMART_GREP_TOOL_DEFINITION = {
name: 'smart_grep',
description: 'Search file contents with 80% token reduction through match-only output and smart filtering',
inputSchema: {
type: 'object',
properties: {
pattern: {
type: 'string',
description: 'Search pattern (string or regex)',
},
cwd: {
type: 'string',
description: 'Working directory for search',
},
files: {
type: 'array',
items: { type: 'string' },
description: 'File patterns to search (glob patterns)',
},
caseSensitive: {
type: 'boolean',
description: 'Case-sensitive search',
default: false,
},
regex: {
type: 'boolean',
description: 'Treat pattern as regex',
default: false,
},
extensions: {
type: 'array',
items: { type: 'string' },
description: 'Search only these file extensions',
},
includeContext: {
type: 'boolean',
description: 'Include context lines around matches',
default: false,
},
contextBefore: {
type: 'number',
description: 'Lines of context before match',
default: 0,
},
contextAfter: {
type: 'number',
description: 'Lines of context after match',
default: 0,
},
limit: {
type: 'number',
description: 'Maximum matches to return',
},
filesWithMatches: {
type: 'boolean',
description: 'Only return filenames, not matches',
default: false,
},
count: {
type: 'boolean',
description: 'Only return match counts per file',
default: false,
},
},
required: ['pattern'],
},
};
//# sourceMappingURL=smart-grep.js.map