context-rag
Version:
Get relevant project context for AI agents to save 90% of tokens. Lightweight CLI tool for semantic search on project codebases.
357 lines (293 loc) • 10.6 kB
JavaScript
const { spawn, exec } = require('child_process');
const path = require('path');
const fs = require('fs');
const chalk = require('chalk');
const { promisify } = require('util');
const execAsync = promisify(exec);
class EmbeddingService {
constructor(config) {
this.config = config;
this.pythonPath = 'python3';
this.fastEmbedderScript = path.join(__dirname, '../../python/fast_embedder.py');
this.detectedEngine = null;
}
/**
* Auto-detect available embedding engines in priority order:
* 1. Rust
* 2. Python
* 3. Node.js
*/
async detectEmbeddingEngine() {
if (this.detectedEngine) {
return this.detectedEngine;
}
// Check for manual override in config
if (this.config.embedder?.type) {
console.log(chalk.blue('🔧 Using configured embedding engine...'));
this.detectedEngine = this.config.embedder.type;
// Validate the configured engine
const engineNames = {
'rust': 'Rust embedder',
'python-fast': 'fast Python embedder',
'nodejs': 'Node.js embedder'
};
console.log(chalk.green(`✅ Using ${engineNames[this.detectedEngine] || this.detectedEngine}`));
return this.detectedEngine;
}
console.log(chalk.blue('🔍 Auto-detecting embedding engine...'));
// Priority 1: Check for Rust embedder
try {
await execAsync('cargo --version');
// Check if our Rust embedder is compiled
const rustEmbedderPath = path.join(__dirname, '../../target/release/context-rag-embedder');
try {
await execAsync(`${rustEmbedderPath} --version`);
console.log(chalk.green('✅ Using Rust embedder'));
this.detectedEngine = 'rust';
return 'rust';
} catch (error) {
console.log(chalk.yellow('⚠️ Rust toolchain found but embedder not compiled'));
}
} catch (error) {
console.log(chalk.gray(' Rust not available'));
}
// Priority 2: Check for fast Python embedder
try {
await execAsync('python3 --version');
console.log(chalk.yellow('✅ Using fast Python embedder'));
this.detectedEngine = 'python-fast';
return 'python-fast';
} catch (error) {
console.log(chalk.gray(' Python not available'));
}
// Priority 3: Fallback to Node.js
console.log(chalk.yellow('⚠️ Using Node.js fallback'));
console.log(chalk.gray(' For better results, install Python (fast embedder) or Rust'));
this.detectedEngine = 'nodejs';
return 'nodejs';
}
async generateEmbeddings(chunks) {
const engine = await this.detectEmbeddingEngine();
switch (engine) {
case 'rust':
return await this.generateRustEmbeddings(chunks);
case 'python-fast':
return await this.generateFastPythonEmbeddings(chunks);
case 'nodejs':
return await this.generateNodeJsEmbeddings(chunks);
default:
throw new Error('No embedding engine available');
}
}
async generateRustEmbeddings(chunks) {
return new Promise((resolve, reject) => {
const rustEmbedderPath = path.join(__dirname, '../../target/release/context-rag-embedder');
const process = spawn(rustEmbedderPath, ['--model', this.config.embedder.model], {
stdio: ['pipe', 'pipe', 'pipe']
});
let output = '';
let errorOutput = '';
process.stdout.on('data', (data) => {
output += data.toString();
});
process.stderr.on('data', (data) => {
errorOutput += data.toString();
});
process.on('close', (code) => {
if (code === 0) {
try {
const result = JSON.parse(output);
resolve(result.chunks);
} catch (error) {
reject(new Error(`Failed to parse Rust embedder output: ${error.message}`));
}
} else {
reject(new Error(`Rust embedder failed: ${errorOutput}`));
}
});
process.on('error', (error) => {
reject(new Error(`Failed to start Rust embedder: ${error.message}`));
});
// Send chunks to embedder
process.stdin.write(JSON.stringify({ chunks, model: this.config.embedder.model }));
process.stdin.end();
});
}
async generateFastPythonEmbeddings(chunks) {
console.log(chalk.gray('⚡ Generating fast Python embeddings for', chunks.length, 'chunks'));
return new Promise((resolve, reject) => {
const process = spawn(this.pythonPath, [
this.fastEmbedderScript,
'--model', 'fast-embedder'
]);
let stdout = '';
let stderr = '';
process.stdout.on('data', (data) => {
stdout += data.toString();
});
process.stderr.on('data', (data) => {
stderr += data.toString();
});
process.on('close', (code) => {
if (code === 0) {
try {
const result = JSON.parse(stdout);
resolve(result.chunks);
} catch (error) {
reject(new Error(`Failed to parse fast embedding results: ${error.message}`));
}
} else {
reject(new Error(`Fast Python embedder failed: ${stderr}`));
}
});
process.on('error', (error) => {
reject(new Error(`Failed to start fast Python embedder: ${error.message}`));
});
// Send chunks data to Python process
process.stdin.write(JSON.stringify({ chunks }));
process.stdin.end();
});
}
async embedTextFastPython(text) {
return new Promise((resolve, reject) => {
const process = spawn(this.pythonPath, [
this.fastEmbedderScript,
'--text', text,
'--model', 'fast-embedder'
]);
let stdout = '';
let stderr = '';
process.stdout.on('data', (data) => {
stdout += data.toString();
});
process.stderr.on('data', (data) => {
stderr += data.toString();
});
process.on('close', (code) => {
if (code === 0) {
try {
const result = JSON.parse(stdout);
resolve(result.embedding);
} catch (error) {
reject(new Error(`Failed to parse fast embedding result: ${error.message}`));
}
} else {
reject(new Error(`Fast Python embedder failed: ${stderr}`));
}
});
process.on('error', (error) => {
reject(new Error(`Failed to start fast Python embedder: ${error.message}`));
});
});
}
async generateNodeJsEmbeddings(chunks) {
console.log(chalk.gray('📝 Generating Node.js embeddings for', chunks.length, 'chunks'));
return chunks.map(chunk => ({
...chunk,
embedding: this.createEnhancedEmbedding(chunk.content)
}));
}
createEnhancedEmbedding(text) {
const words = text.toLowerCase().split(/\s+/);
const embedding = new Array(384).fill(0); // Standard embedding dimension
// Create keyword-based features
const keywordFeatures = this.extractKeywordFeatures(text);
// Map features to embedding dimensions
keywordFeatures.forEach((score, keyword) => {
const hash = this.simpleHash(keyword);
const embeddingIndex = hash % 384;
embedding[embeddingIndex] += score * 0.1;
});
// Add positional and structural features
words.forEach((word, wordIndex) => {
for (let i = 0; i < Math.min(word.length, 5); i++) {
const charCode = word.charCodeAt(i);
const embeddingIndex = (charCode + wordIndex * 7 + i * 13) % 384;
embedding[embeddingIndex] += (charCode / 255.0) * 0.05;
}
});
// Normalize the embedding
const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
if (magnitude > 0) {
for (let i = 0; i < embedding.length; i++) {
embedding[i] /= magnitude;
}
}
return embedding;
}
extractKeywordFeatures(text) {
const features = new Map();
const words = text.toLowerCase().split(/\s+/);
// Programming keywords get higher scores
const programmingKeywords = [
'function', 'class', 'method', 'variable', 'const', 'let', 'var',
'import', 'export', 'require', 'module', 'component', 'service',
'api', 'endpoint', 'route', 'middleware', 'auth', 'database',
'query', 'model', 'controller', 'view', 'template', 'config'
];
words.forEach(word => {
const cleanWord = word.replace(/[^\w]/g, '');
if (cleanWord.length > 2) {
const score = programmingKeywords.includes(cleanWord) ? 2.0 : 1.0;
features.set(cleanWord, (features.get(cleanWord) || 0) + score);
}
});
return features;
}
simpleHash(str) {
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
}
return Math.abs(hash);
}
generateMockEmbeddings(chunks) {
// Deprecated - use generateNodeJsEmbeddings instead
return this.generateNodeJsEmbeddings(chunks);
}
async embedText(text) {
const engine = await this.detectEmbeddingEngine();
switch (engine) {
case 'rust':
return await this.embedTextRust(text);
case 'python-fast':
return await this.embedTextFastPython(text);
case 'nodejs':
return this.createEnhancedEmbedding(text);
default:
throw new Error('No embedding engine available');
}
}
async embedTextRust(text) {
return new Promise((resolve, reject) => {
const rustEmbedderPath = path.join(__dirname, '../../target/release/context-rag-embedder');
const process = spawn(rustEmbedderPath, ['--text', text, '--model', this.config.embedder.model]);
let stdout = '';
let stderr = '';
process.stdout.on('data', (data) => {
stdout += data.toString();
});
process.stderr.on('data', (data) => {
stderr += data.toString();
});
process.on('close', (code) => {
if (code === 0) {
try {
const result = JSON.parse(stdout);
resolve(result.embedding);
} catch (error) {
reject(new Error(`Failed to parse Rust embedding result: ${error.message}`));
}
} else {
reject(new Error(`Rust embedder failed: ${stderr}`));
}
});
process.on('error', (error) => {
reject(new Error(`Failed to start Rust embedder: ${error.message}`));
});
});
}
}
module.exports = { EmbeddingService };