UNPKG

@chainlink/mcp-server

Version:
336 lines (335 loc) 14.1 kB
"use strict"; /** * @fileoverview Vector database implementation using LanceDB for document storage and retrieval * * Provides high-performance vector similarity search capabilities for Chainlink documentation. * Handles embedding generation, vector indexing, and semantic search operations with support * for multiple embedding providers (OpenAI, Ollama). */ Object.defineProperty(exports, "__esModule", { value: true }); exports.VectorDatabase = void 0; exports.getVectorDbDir = getVectorDbDir; const lancedb_1 = require("@lancedb/lancedb"); const schema_1 = require("./schema"); const logger_1 = require("../utils/logger"); const embedding_provider_1 = require("./embedding-provider"); const config_1 = require("../config"); const path_1 = require("path"); const os_1 = require("os"); const fs_1 = require("fs"); /** * Get the platform-appropriate directory for storing vector database files * * Uses XDG Base Directory specification on Unix-like systems and AppData * on Windows for storing vector database files in the user's data directory. * * @returns The full path to the vector database directory */ function getVectorDbDir() { // First, try to use bundled database in top-level data directory (for npm packages) // __dirname will be dist/vectordb, so go up two levels to find data/ const bundledDbPath = (0, path_1.join)(__dirname, "..", "..", "data"); if ((0, fs_1.existsSync)(bundledDbPath)) { logger_1.Logger.debug(`Using bundled vector database at: ${bundledDbPath}`); return bundledDbPath; } // Fall back to user data directory const dataDir = process.env.XDG_DATA_HOME || (0, path_1.join)((0, os_1.homedir)(), process.platform === "win32" ? "AppData/Local" : ".local/share"); const userDbPath = (0, path_1.join)(dataDir, "cll-mcp-vectordb"); logger_1.Logger.debug(`Using user vector database at: ${userDbPath}`); return userDbPath; } /** * Vector database implementation for Chainlink documentation storage and retrieval * * Provides semantic search capabilities using embeddings and vector similarity. * Supports multiple embedding providers and efficient * HNSW indexing for fast similarity search operations. * * Features: * - Multi-provider embedding support (OpenAI, Ollama) * - Vector similarity search with cosine distance * - Automatic schema management and indexing * - Cross-platform database storage * * @class VectorDatabase */ class VectorDatabase { db = null; embedManager = null; dbPath; tableName = "docs"; initialized = false; allowWrite = false; /** * Initialize a new VectorDatabase instance * * Sets up the database path using platform-appropriate storage location * but does not establish the database connection. Call initialize() to * set up the actual database connection and schema. * * By default, initializes in read-only mode for consuming pre-built databases. * Only use write mode when explicitly building or updating embeddings. * * @param options - Configuration options for the database * @param options.allowWrite - If true, allows writing/updating the database (default: false) */ constructor(options = {}) { this.dbPath = getVectorDbDir(); this.allowWrite = options.allowWrite || false; // Default to read-only unless explicitly allowing writes logger_1.Logger.log("info", `Vector database path: ${this.dbPath}`); if (this.allowWrite) { logger_1.Logger.log("info", "Vector database initialized in write mode"); } else { logger_1.Logger.log("info", "Vector database initialized in read-only mode"); } } /** * Initialize the vector database connection and schema * * Sets up the embedding provider (unless in read-only mode), establishes database connection, * and creates the necessary table structure if it doesn't exist. * This method is idempotent and safe to call multiple times. * * @throws {Error} When embedding provider initialization fails or database connection fails */ async initialize() { if (this.isInitialized()) { logger_1.Logger.log("info", "Database already initialized"); return; } const dbDir = getVectorDbDir(); // Early check in read-only mode: any DB present? const candidateFilenames = config_1.VECTOR_DB_ALL_FILENAMES; const anyDbExists = candidateFilenames.some((f) => (0, fs_1.existsSync)((0, path_1.join)(dbDir, f))); if (!this.allowWrite && !anyDbExists) { throw new Error("Read-only mode requires an existing database"); } // Initialize embedding manager for search operations (required for vector database functionality) this.embedManager = (0, embedding_provider_1.createEmbeddingManager)(); await this.embedManager.initialize(); const provider = this.embedManager.getProvider(); if (!provider) { throw new Error("Embedding provider is required for vector database operations. " + "Please configure one of: OPENAI_API_KEY, EMBEDDINGS_PROVIDER=ollama, " + "or set EMBEDDINGS_PROVIDER=openai with OPENAI_API_KEY"); } const dimensions = schema_1.EMBEDDING_DIMENSIONS[provider]; logger_1.Logger.log("info", `Using embedding provider: ${provider} (${dimensions}d)`); // Use provider-specific database filename to avoid dimension mismatches const providerDbFilename = config_1.VECTOR_DB_FILENAMES[provider]; const dbPath = (0, path_1.join)(dbDir, providerDbFilename); logger_1.Logger.log("info", `Initializing vector database at ${dbPath}`); // Check if provider-specific database exists (in read-only mode) const dbExists = (0, fs_1.existsSync)(dbPath); if (!this.allowWrite && !dbExists) { throw new Error("Read-only mode requires an existing database"); } this.db = await (0, lancedb_1.connect)(dbPath); this.dbPath = dbPath; // Check if table exists if (!(await this.tableExists())) { if (!this.allowWrite) { throw new Error("Read-only mode requires an existing database table"); } if (!this.embedManager) { throw new Error("Embedding manager required to create new table"); } const provider = this.embedManager.getProvider(); const dimensions = schema_1.EMBEDDING_DIMENSIONS[provider]; await this.createTableWithDimensions(dimensions); this.initialized = true; logger_1.Logger.log("info", "✅ Vector database initialized successfully"); } else { this.initialized = true; logger_1.Logger.log("info", "✅ Vector database loaded successfully"); } } /** * Get the appropriate schema for the current embedding provider * * @private * @returns Schema object configured for the current provider's embedding dimensions * @throws {Error} When embedding manager is not initialized */ getSchemaForProvider() { if (!this.embedManager) { throw new Error("Embedding manager not initialized"); } const provider = this.embedManager.getProvider(); if (!provider) { throw new Error("Failed to get embedding provider"); } const dimensions = schema_1.EMBEDDING_DIMENSIONS[provider]; return (0, schema_1.createCCIPDocTableSchema)(dimensions); } /** * Create an empty table with the specified vector dimensions * * @private * @param dimensions - Number of dimensions for the vector embeddings * @throws {Error} When database is not initialized */ async createTableWithDimensions(dimensions) { if (!this.db) { throw new Error("Database not initialized"); } const schema = (0, schema_1.createCCIPDocTableSchema)(dimensions); // Create an empty table with the proper schema await this.db.createTable(this.tableName, [], { schema }); logger_1.Logger.log("info", `Empty table ${this.tableName} created with ${dimensions}d vectors`); } /** * Create or overwrite the documents table with provided entries * * Creates a new table with the given document entries and sets up * a vector index for efficient similarity search using HNSW algorithm * with cosine distance metric. * * @param entries - Array of document entries to insert into the table * @param overwrite - Whether to overwrite existing table (default: true) * @returns Promise resolving to the created table instance * @throws {Error} When database is not initialized or table creation fails */ async createTable(entries, overwrite = true) { if (!this.db) { throw new Error("Database not initialized. Call initialize() first."); } try { const mode = overwrite ? "overwrite" : "create"; const schema = this.getSchemaForProvider(); const table = await this.db.createTable(this.tableName, entries, { mode, schema, }); // Create vector index for efficient similarity search await table.createIndex("vector", { config: lancedb_1.Index.hnswSq({ distanceType: "cosine", }), }); const rowCount = await table.countRows(); logger_1.Logger.log("info", `Table ${this.tableName} created with ${rowCount} rows`); return table; } catch (error) { logger_1.Logger.log("error", `Failed to create table: ${error}`); throw error; } } /** * Get a reference to the documents table * * @returns Promise resolving to the table instance * @throws {Error} When database is not initialized or table doesn't exist */ async getTable() { if (!this.db) { throw new Error("Database not initialized. Call initialize() first."); } try { return await this.db.openTable(this.tableName); } catch (error) { logger_1.Logger.log("error", `Failed to open table ${this.tableName}: ${error}`); throw error; } } /** * Check if the documents table exists in the database * * @returns Promise resolving to true if table exists, false otherwise */ async tableExists() { if (!this.db) { return false; } try { const tableNames = await this.db.tableNames(); return tableNames.includes(this.tableName); } catch (error) { logger_1.Logger.log("error", `Failed to check if table exists: ${error}`); return false; } } /** * Perform vector similarity search for documents * * Generates an embedding for the query text and searches for similar * documents using cosine similarity. In read-only mode without embedding * provider, throws an error prompting user to configure embedding provider. * * @param query - Text query to search for * @param limit - Maximum number of results to return (default: 8) * @returns Promise resolving to array of similar documents with similarity scores * @throws {Error} When embedding generation fails or search operation fails */ async searchSimilar(query, limit = 8) { try { const table = await this.getTable(); // Embedding manager is guaranteed to exist (validated at initialization) // Generate query embedding const queryEmbedding = await this.embedManager.getEmbedding(query); if (!queryEmbedding) { throw new Error("Failed to generate query embedding"); } // Build search query const searchQuery = table.search(queryEmbedding).limit(limit); const results = await searchQuery.toArray(); logger_1.Logger.log("info", `Vector search returned ${results.length} results for query: "${query}"`); return results; } catch (error) { logger_1.Logger.log("error", `Search failed: ${error}`); throw error; } } async getStats() { try { const exists = await this.tableExists(); if (!exists) { return { tableExists: false }; } const table = await this.getTable(); const rowCount = await table.countRows(); return { tableExists: true, rowCount }; } catch (error) { logger_1.Logger.log("error", `Failed to get database stats: ${error}`); return { tableExists: false }; } } getDbConnection() { return this.db; } isInitialized() { return this.initialized; } async search(query, topK = 10) { if (!this.db || !this.embedManager) { throw new Error("Database not initialized"); } try { // Generate embedding for the query const queryEmbedding = await this.embedManager.getEmbedding(query); if (!queryEmbedding) { throw new Error("Failed to generate query embedding"); } // Build search query const searchQuery = this.db.search(queryEmbedding).limit(topK); const results = await searchQuery.toArray(); logger_1.Logger.log("info", `Vector search returned ${results.length} results for query: "${query}"`); return results; } catch (error) { logger_1.Logger.log("error", `Vector search failed: ${error}`); throw error; } } } exports.VectorDatabase = VectorDatabase; //# sourceMappingURL=database.js.map