git-contextor
Version:
A code context tool with vector search and real-time monitoring, with optional Git integration.
347 lines (317 loc) • 11.2 kB
JavaScript
const { QdrantClient } = require('@qdrant/js-client-rest');
const { v4: uuidv4 } = require('uuid');
const crypto = require('crypto');
const logger = require('../cli/utils/logger');
const { getEmbedding } = require('../utils/embeddings');
/**
* Manages interactions with the Qdrant vector database.
* Handles collection creation, embedding generation, and vector operations.
*/
class VectorStore {
/**
* @param {object} config - The application configuration object.
*/
constructor(config) {
this.config = config;
// Use vectorStore config or fallback to services config for backwards compatibility
const qdrantConfig = config.vectorStore?.qdrant || {
host: config.services?.qdrantHost || 'localhost',
port: config.services?.qdrantPort || 6333
};
this.client = new QdrantClient({
url: `http://${qdrantConfig.host}:${qdrantConfig.port}`,
});
// Create a unique, stable ID for the repository based on its absolute path
const repoId = crypto.createHash('sha256').update(this.config.repository.path).digest('hex').substring(0, 12);
const repoName = this.config.repository.name.replace(/[^a-zA-Z0-9-]/g, '_');
this.collectionName = `gctx-${repoName}-${repoId}`.toLowerCase();
}
/**
* Validates if the existing Qdrant collection's vector dimension matches the current config.
* @returns {Promise<boolean>} True if config matches or collection doesn't exist, false on mismatch.
*/
async validateCollectionConfig() {
try {
const collectionInfo = await this.client.getCollection(this.collectionName);
const collectionDimensions = collectionInfo.vectors_config?.params?.size;
const configDimensions = this.config.embedding.dimensions;
if (collectionDimensions && collectionDimensions !== configDimensions) {
logger.warn(`Configuration Mismatch: Qdrant collection '${this.collectionName}' has dimension ${collectionDimensions}, but config expects ${configDimensions}.`);
return false;
}
return true;
} catch (error) {
if (error.status === 404) {
// Collection doesn't exist, so there's no mismatch. It will be created with the correct config.
return true;
}
logger.error('Failed to validate collection config:', error);
// Be safe and assume it's valid if the check itself fails for other reasons.
return true;
}
}
/**
* Ensures the Qdrant collection exists and is configured correctly.
*/
async ensureCollection() {
try {
const collections = await this.client.getCollections();
const collectionExists = collections.collections.some(c => c.name === this.collectionName);
if (!collectionExists) {
logger.info(`Creating Qdrant collection: ${this.collectionName}`);
await this.client.createCollection(this.collectionName, {
vectors: {
size: this.config.embedding.dimensions,
distance: 'Cosine',
},
});
await this.client.createPayloadIndex(this.collectionName, {
field_name: 'filePath',
field_schema: 'keyword',
wait: true,
});
}
} catch (error) {
logger.error('Failed to ensure Qdrant collection:', error);
throw error;
}
}
/**
* Adds or updates chunks in the vector store.
* @param {Array<object>} chunks - An array of chunk objects like { content, metadata }.
*/
async upsertChunks(chunks) {
if (!chunks || chunks.length === 0) return;
await this.ensureCollection();
const batchSize = 100; // Process in batches to avoid memory issues
for (let i = 0; i < chunks.length; i += batchSize) {
const batch = chunks.slice(i, i + batchSize);
const points = [];
for (const chunk of batch) {
try {
const embedding = await getEmbedding(chunk.content, this.config.embedding);
if (embedding && embedding.length > 0) {
points.push({
id: uuidv4(),
vector: embedding,
payload: { ...chunk.metadata, content: chunk.content },
});
}
} catch (error) {
logger.warn(`Failed to generate embedding for chunk: ${error.message}`);
continue; // Skip this chunk but continue with others
}
}
if (points.length > 0) {
try {
await this.client.upsert(this.collectionName, {
points: points,
wait: true,
});
logger.debug(`Upserted ${points.length} points to ${this.collectionName}.`);
} catch (error) {
logger.error('Failed to upsert points to Qdrant:', error);
throw error;
}
}
}
}
/**
* Removes all chunks associated with a specific file.
* @param {string} filePath - The path to the file to remove.
*/
async removeFile(filePath) {
await this.ensureCollection();
try {
await this.client.delete(this.collectionName, {
filter: {
must: [
{
key: 'filePath',
match: {
value: filePath,
},
},
],
},
wait: true,
});
logger.info(`Removed points for file: ${filePath}`);
} catch (error) {
logger.error(`Failed to remove points for ${filePath}:`, error);
}
}
/**
* Clears the entire collection. Used for re-indexing.
*/
async clearCollection() {
try {
await this.client.getCollection(this.collectionName);
await this.client.deleteCollection(this.collectionName);
} catch (e) {
if (e.status !== 404) {
logger.error(`Error clearing collection ${this.collectionName}:`, e);
throw e;
}
}
await this.ensureCollection();
logger.info(`Cleared and recreated collection: ${this.collectionName}`);
}
/**
* Deletes the collection from Qdrant.
*/
async deleteCollection() {
logger.info(`Attempting to delete collection: ${this.collectionName}`);
try {
const result = await this.client.deleteCollection(this.collectionName);
if (result) {
logger.info(`Collection '${this.collectionName}' deleted successfully.`);
}
} catch (error) {
if (error.status === 404) {
logger.info(`Collection '${this.collectionName}' did not exist, nothing to delete.`);
} else {
logger.error(`Failed to delete collection '${this.collectionName}':`, error);
// Do not re-throw, as we want shutdown to continue.
}
}
}
/**
* Performs a semantic search in the vector store.
* @param {Array<number>} queryVector - The vector representation of the search query.
* @param {number} limit - The maximum number of results to return.
* @param {object} filter - Optional Qdrant filter object.
* @returns {Promise<Array<object>>} Search results.
*/
async search(queryVector, limit = 10, filter = null) {
await this.ensureCollection();
try {
const results = await this.client.search(this.collectionName, {
vector: queryVector,
limit,
filter,
with_payload: true,
with_vector: false,
});
return results;
} catch (error) {
logger.error('Qdrant search failed:', error);
// Re-throw to allow upstream handlers to catch and handle specific cases like dimension mismatch.
throw error;
}
}
/**
* Gets status information from the vector store.
* @returns {Promise<object>} Status object.
*/
async getStatus() {
try {
const collectionInfo = await this.client.getCollection(this.collectionName);
return {
collectionName: this.collectionName,
vectorCount: collectionInfo.points_count,
};
} catch (error) {
if (error.status === 404) {
return { collectionName: this.collectionName, vectorCount: 0 };
}
logger.error('Failed to get VectorStore status:', error);
return { status: 'error', message: error.message };
}
}
/**
* Gets the count of unique files in the vector store.
* @returns {Promise<number>} Count of unique files.
*/
async getUniqueFileCount() {
await this.ensureCollection();
try {
const uniqueFiles = new Set();
let nextOffset = null;
do {
const page = await this.client.scroll(this.collectionName, {
offset: nextOffset,
limit: 1000,
with_payload: true,
with_vector: false,
});
page.points.forEach(point => {
if (point.payload && point.payload.filePath) {
uniqueFiles.add(point.payload.filePath);
}
});
nextOffset = page.next_page_offset;
} while (nextOffset);
return uniqueFiles.size;
} catch (error) {
if (error.status === 404) {
return 0;
}
logger.error('Failed to get unique file count:', error);
throw error;
}
}
async getPoints(filter) {
await this.ensureCollection();
try {
logger.info(`Retrieving points from Qdrant collection ${this.collectionName} with filter.`);
const allPoints = [];
let nextOffset = null;
do {
const page = await this.client.scroll(this.collectionName, {
filter,
offset: nextOffset,
limit: 1000,
with_payload: true,
with_vector: false,
});
allPoints.push(...page.points);
nextOffset = page.next_page_offset;
} while (nextOffset);
logger.info(`Retrieved ${allPoints.length} points with filter.`);
return allPoints;
} catch (error) {
if (error.status === 404) {
return [];
}
logger.error(`Error retrieving points from ${this.collectionName}:`, error);
throw error;
}
}
async getAllPoints() {
await this.ensureCollection();
try {
logger.info(`Retrieving all points from Qdrant collection: ${this.collectionName}`);
const allPoints = [];
let nextOffset = null;
do {
const page = await this.client.scroll(this.collectionName, {
offset: nextOffset,
limit: 1000,
with_payload: true,
with_vector: true,
});
allPoints.push(...page.points);
nextOffset = page.next_page_offset;
} while (nextOffset);
logger.info(`Retrieved ${allPoints.length} points.`);
return allPoints;
} catch (error) {
if (error.status === 404) {
return [];
}
logger.error(`Error retrieving all points from ${this.collectionName}:`, error);
throw error;
}
}
}
module.exports = VectorStore;
/**
* Checks if a Qdrant API error is due to a vector dimension mismatch.
* @param {object} error - The error object from a Qdrant client call.
* @returns {boolean} - True if it's a dimension mismatch error.
*/
module.exports.isDimensionMismatch = function(error) {
const errorMessage = error?.data?.status?.error || '';
return errorMessage.includes('Vector dimension error');
};