@wildcard-ai/deepcontext
Version:
Advanced codebase indexing and semantic search MCP server
351 lines • 15.3 kB
JavaScript
/**
* Turbopuffer Vector Store Service
* Handles all Turbopuffer API operations including vector storage, querying, and hybrid search
*/
import { Logger } from '../utils/Logger.js';
import { fetchMirrored } from '../utils/wildcardFetch.js';
export class TurbopufferService {
apiKey;
configurationService;
baseUrl = 'https://gcp-us-central1.turbopuffer.com/v2';
logger;
constructor(apiKey, configurationService, loggerName = 'TurbopufferService') {
this.apiKey = apiKey;
this.configurationService = configurationService;
this.logger = new Logger(loggerName);
// Allow empty API key if Wildcard backend is available
const config = configurationService.getConfig();
const hasWildcardKey = !!(config.wildcardApiKey && config.wildcardApiKey !== 'test');
if (!apiKey && !hasWildcardKey) {
throw new Error('Turbopuffer API key is required when not using Wildcard backend');
}
}
/**
* Upsert vectors to Turbopuffer namespace
*/
async upsert(namespace, vectors) {
const response = await fetchMirrored(`${this.baseUrl}/namespaces/${namespace}`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
upsert_rows: vectors,
distance_metric: 'cosine_distance',
schema: {
content: {
type: 'string',
full_text_search: true
}
}
})
}, `/vectordb/turbopuffer/namespaces/${namespace}`);
if (!response.ok) {
const error = await response.text();
throw new Error(`Turbopuffer upsert error (${response.status}): ${error}`);
}
}
/**
* Query Turbopuffer namespace with various options
*/
async query(namespace, options) {
// Get search configuration
const searchConfig = this.configurationService.getSearchConfig();
const requestBody = {
include_attributes: options.include_attributes || ['content', 'filePath', 'startLine', 'endLine', 'language'],
top_k: options.limit || searchConfig.defaultResultLimit
};
// Handle different search types based on options
if (options.rank_by) {
// Direct rank_by specification (for hybrid search)
requestBody.rank_by = options.rank_by;
}
else if (options.embedding) {
// Vector search
requestBody.rank_by = ['vector', 'ANN', options.embedding];
}
else if (options.query) {
// BM25 text search (use array format like hybrid search)
requestBody.rank_by = ['content', 'BM25', options.query];
}
// Add filters if provided
if (options.filters) {
requestBody.filters = options.filters;
}
const response = await fetchMirrored(`${this.baseUrl}/namespaces/${namespace}/query`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(requestBody)
}, `/vectordb/turbopuffer/namespaces/${namespace}/query`, { method: 'POST', body: JSON.stringify(requestBody) });
if (!response.ok) {
const error = await response.text();
throw new Error(`Turbopuffer query error: ${error}`);
}
const data = await response.json();
return (data.rows || []).map((row) => ({
id: row.id,
score: row.score || row._distance || row.$dist || 0,
metadata: row.attributes || row
}));
}
/**
* Basic search implementation for TurbopufferStore interface
*/
async search(namespace, options) {
return await this.query(namespace, options);
}
/**
* Advanced hybrid search combining vector similarity and BM25 with RRF fusion
*/
async hybridSearch(namespace, options) {
// Get search configuration
const searchConfig = this.configurationService.getSearchConfig();
const limit = options.limit || searchConfig.defaultResultLimit;
const vectorWeight = options.vectorWeight || searchConfig.defaultVectorWeight;
const bm25Weight = options.bm25Weight || searchConfig.defaultBm25Weight;
// Use Turbopuffer's queries array format (same as backend implementation)
const response = await fetchMirrored(`${this.baseUrl}/namespaces/${namespace}/query`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
},
body: JSON.stringify({
queries: [
{
rank_by: ['vector', 'ANN', options.embedding],
top_k: Math.min(limit * 2, 50),
include_attributes: true
},
{
rank_by: ['content', 'BM25', options.query],
top_k: Math.min(limit * 2, 50),
include_attributes: true
}
]
})
}, `/vectordb/turbopuffer/namespaces/${namespace}/query`, {
method: 'POST',
body: JSON.stringify({
queries: [
{ rank_by: ['vector', 'ANN', options.embedding], top_k: Math.min(limit * 2, 50), include_attributes: true },
{ rank_by: ['content', 'BM25', options.query], top_k: Math.min(limit * 2, 50), include_attributes: true }
]
})
});
if (!response.ok) {
const error = await response.text();
throw new Error(`Turbopuffer hybrid search failed: ${response.status} ${error}`);
}
const data = await response.json();
// Use the same fusion logic as backend
return this.fuseHybridResults(data, limit, { vectorWeight, bm25Weight });
}
/**
* Check if a namespace exists
*/
async checkNamespaceExists(namespace) {
try {
const res = await fetchMirrored(`${this.baseUrl}/namespaces/${namespace}`, { method: 'GET', headers: { 'Authorization': `Bearer ${this.apiKey}` } }, `/vectordb/turbopuffer/namespaces/${namespace}`, { method: 'GET' });
return res.ok;
}
catch {
return false;
}
}
/**
* Clear/delete an entire namespace
*/
async clearNamespace(namespace) {
try {
const res = await fetchMirrored(`${this.baseUrl}/namespaces/${namespace}`, { method: 'DELETE', headers: { 'Authorization': `Bearer ${this.apiKey}` } }, `/vectordb/turbopuffer/namespaces/${namespace}`, { method: 'DELETE' });
if (res.ok) {
this.logger.info(`✅ Cleared namespace: ${namespace}`);
}
}
catch (error) {
this.logger.warn(`Failed to clear namespace ${namespace}:`, error);
}
}
/**
* Get chunk IDs for a specific file (for atomic updates)
*/
async getChunkIdsForFile(namespace, filePath) {
try {
const res = await fetchMirrored(`${this.baseUrl}/namespaces/${namespace}/query`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
filters: [['filePath', 'Eq', filePath]],
top_k: 1000,
include_attributes: false
})
}, `/vectordb/turbopuffer/namespaces/${namespace}/query`, {
method: 'POST',
body: JSON.stringify({
filters: [['filePath', 'Eq', filePath]],
top_k: 1000,
include_attributes: false
})
});
if (!res.ok) {
if (res.status === 422) {
return [];
}
throw new Error(`Query failed: ${res.status}`);
}
const queryData = await res.json();
return (queryData.rows || []).map((row) => row.id);
}
catch (error) {
this.logger.warn(`Failed to get existing chunk IDs for ${filePath}: ${error}`);
return [];
}
}
/**
* Delete chunks by their IDs (for atomic updates)
*/
async deleteChunksByIds(namespace, chunkIds) {
if (chunkIds.length === 0)
return 0;
try {
let totalDeleted = 0;
for (let i = 0; i < chunkIds.length; i += 1000) {
const batch = chunkIds.slice(i, i + 1000);
const res = await fetchMirrored(`${this.baseUrl}/namespaces/${namespace}`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ deletes: batch })
}, `/vectordb/turbopuffer/namespaces/${namespace}`, { method: 'POST', body: JSON.stringify({ deletes: batch }) });
if (!res.ok) {
const text = await res.text();
throw new Error(`Delete batch failed: ${res.status} ${text}`);
}
totalDeleted += batch.length;
}
return totalDeleted;
}
catch (error) {
throw new Error(`Failed to delete chunks: ${error}`);
}
}
/**
* Fuse hybrid search results using optimized RRF (Reciprocal Rank Fusion)
* Enhanced formula for better score distribution: score = weight * (base / (k + rank))
* where base is scaled for better differentiation
*/
fuseHybridResults(multiQueryResults, limit, weights) {
const scores = new Map();
const metadata = new Map();
// Optimized RRF parameters for better score distribution
const k = 10; // Reduced from 60 for less compression
const scoreBase = 100; // Scale up scores for better differentiation
const minScoreThreshold = 0.01; // Minimum meaningful score
// Extract results from Turbopuffer response: { results: [{ rows: [...] }, { rows: [...] }] }
const vectorResults = multiQueryResults.results?.[0]?.rows || [];
const bm25Results = multiQueryResults.results?.[1]?.rows || [];
this.logger.info(`Hybrid search - Vector: ${vectorResults.length}, BM25: ${bm25Results.length}`);
// Process vector search results (first query)
vectorResults.forEach((item, rank) => {
const reciprocalRank = weights.vectorWeight * scoreBase / (k + rank + 1);
scores.set(item.id, (scores.get(item.id) || 0) + reciprocalRank);
if (!metadata.has(item.id)) {
metadata.set(item.id, {
content: item.content || '',
symbols: item.symbols || '',
filePath: item.filePath || '',
startLine: item.startLine || 0,
endLine: item.endLine || 0,
language: item.language || ''
});
}
});
// Process BM25 search results (second query)
bm25Results.forEach((item, rank) => {
const reciprocalRank = weights.bm25Weight * scoreBase / (k + rank + 1);
scores.set(item.id, (scores.get(item.id) || 0) + reciprocalRank);
if (!metadata.has(item.id)) {
metadata.set(item.id, {
content: item.content || '',
symbols: item.symbols || '',
filePath: item.filePath || '',
startLine: item.startLine || 0,
endLine: item.endLine || 0,
language: item.language || ''
});
}
});
const finalResults = Array.from(scores.entries())
.sort(([, a], [, b]) => b - a)
.map(([id, score]) => [id, score])
.filter(([id, score]) => {
// Only filter out very low scores, keep all legitimate files
return score >= minScoreThreshold;
})
.map(([id, score], index, array) => {
const meta = metadata.get(id);
// Enhanced score normalization for better differentiation
const maxScore = array[0]?.[1] || 1;
const normalizedScore = Math.min(1.0, score / maxScore);
// Apply rank bonus to prevent score compression
const rankBonus = Math.max(0, (array.length - index) / array.length * 0.2);
const finalScore = Math.min(1.0, normalizedScore + rankBonus);
return {
id,
score: finalScore,
metadata: meta
};
})
// Remove duplicate chunks from the same file/function
.filter((result, index, array) => {
const current = result.metadata;
const key = `${current?.filePath}:${current?.startLine}-${current?.endLine}`;
// Keep first occurrence, remove subsequent duplicates
return array.findIndex(r => {
const m = r.metadata;
const compareKey = `${m?.filePath}:${m?.startLine}-${m?.endLine}`;
return compareKey === key;
}) === index;
})
// Remove overlapping chunks (same function with different line ranges)
.filter((result, index, array) => {
const current = result.metadata;
if (!current?.filePath)
return true;
// Check if this chunk overlaps significantly with a higher-scored chunk
return !array.slice(0, index).some(r => {
const other = r.metadata;
if (other?.filePath !== current.filePath)
return false;
// Calculate overlap
const overlapStart = Math.max(current.startLine || 0, other.startLine || 0);
const overlapEnd = Math.min(current.endLine || 0, other.endLine || 0);
const overlap = Math.max(0, overlapEnd - overlapStart);
// If >70% overlap, consider it duplicate
const currentSize = (current.endLine || 0) - (current.startLine || 0);
const overlapRatio = currentSize > 0 ? overlap / currentSize : 0;
return overlapRatio > 0.7;
});
})
.slice(0, limit);
this.logger.info(`Fusion completed - Final: ${finalResults.length} results`);
return finalResults;
}
/**
* Check if the service is available (API key provided)
*/
isAvailable() {
return !!this.apiKey && this.apiKey !== 'test';
}
}
//# sourceMappingURL=TurbopufferService.js.map