@chainlink/mcp-server
Version:
Prototype MCP Server for CLL
336 lines (335 loc) • 14.1 kB
JavaScript
"use strict";
/**
* @fileoverview Vector database implementation using LanceDB for document storage and retrieval
*
* Provides high-performance vector similarity search capabilities for Chainlink documentation.
* Handles embedding generation, vector indexing, and semantic search operations with support
* for multiple embedding providers (OpenAI, Ollama).
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.VectorDatabase = void 0;
exports.getVectorDbDir = getVectorDbDir;
const lancedb_1 = require("@lancedb/lancedb");
const schema_1 = require("./schema");
const logger_1 = require("../utils/logger");
const embedding_provider_1 = require("./embedding-provider");
const config_1 = require("../config");
const path_1 = require("path");
const os_1 = require("os");
const fs_1 = require("fs");
/**
* Get the platform-appropriate directory for storing vector database files
*
* Uses XDG Base Directory specification on Unix-like systems and AppData
* on Windows for storing vector database files in the user's data directory.
*
* @returns The full path to the vector database directory
*/
function getVectorDbDir() {
// First, try to use bundled database in top-level data directory (for npm packages)
// __dirname will be dist/vectordb, so go up two levels to find data/
const bundledDbPath = (0, path_1.join)(__dirname, "..", "..", "data");
if ((0, fs_1.existsSync)(bundledDbPath)) {
logger_1.Logger.debug(`Using bundled vector database at: ${bundledDbPath}`);
return bundledDbPath;
}
// Fall back to user data directory
const dataDir = process.env.XDG_DATA_HOME ||
(0, path_1.join)((0, os_1.homedir)(), process.platform === "win32" ? "AppData/Local" : ".local/share");
const userDbPath = (0, path_1.join)(dataDir, "cll-mcp-vectordb");
logger_1.Logger.debug(`Using user vector database at: ${userDbPath}`);
return userDbPath;
}
/**
* Vector database implementation for Chainlink documentation storage and retrieval
*
* Provides semantic search capabilities using embeddings and vector similarity.
* Supports multiple embedding providers and efficient
* HNSW indexing for fast similarity search operations.
*
* Features:
* - Multi-provider embedding support (OpenAI, Ollama)
* - Vector similarity search with cosine distance
* - Automatic schema management and indexing
* - Cross-platform database storage
*
* @class VectorDatabase
*/
class VectorDatabase {
db = null;
embedManager = null;
dbPath;
tableName = "docs";
initialized = false;
allowWrite = false;
/**
* Initialize a new VectorDatabase instance
*
* Sets up the database path using platform-appropriate storage location
* but does not establish the database connection. Call initialize() to
* set up the actual database connection and schema.
*
* By default, initializes in read-only mode for consuming pre-built databases.
* Only use write mode when explicitly building or updating embeddings.
*
* @param options - Configuration options for the database
* @param options.allowWrite - If true, allows writing/updating the database (default: false)
*/
constructor(options = {}) {
this.dbPath = getVectorDbDir();
this.allowWrite = options.allowWrite || false; // Default to read-only unless explicitly allowing writes
logger_1.Logger.log("info", `Vector database path: ${this.dbPath}`);
if (this.allowWrite) {
logger_1.Logger.log("info", "Vector database initialized in write mode");
}
else {
logger_1.Logger.log("info", "Vector database initialized in read-only mode");
}
}
/**
* Initialize the vector database connection and schema
*
* Sets up the embedding provider (unless in read-only mode), establishes database connection,
* and creates the necessary table structure if it doesn't exist.
* This method is idempotent and safe to call multiple times.
*
* @throws {Error} When embedding provider initialization fails or database connection fails
*/
async initialize() {
if (this.isInitialized()) {
logger_1.Logger.log("info", "Database already initialized");
return;
}
const dbDir = getVectorDbDir();
// Early check in read-only mode: any DB present?
const candidateFilenames = config_1.VECTOR_DB_ALL_FILENAMES;
const anyDbExists = candidateFilenames.some((f) => (0, fs_1.existsSync)((0, path_1.join)(dbDir, f)));
if (!this.allowWrite && !anyDbExists) {
throw new Error("Read-only mode requires an existing database");
}
// Initialize embedding manager for search operations (required for vector database functionality)
this.embedManager = (0, embedding_provider_1.createEmbeddingManager)();
await this.embedManager.initialize();
const provider = this.embedManager.getProvider();
if (!provider) {
throw new Error("Embedding provider is required for vector database operations. " +
"Please configure one of: OPENAI_API_KEY, EMBEDDINGS_PROVIDER=ollama, " +
"or set EMBEDDINGS_PROVIDER=openai with OPENAI_API_KEY");
}
const dimensions = schema_1.EMBEDDING_DIMENSIONS[provider];
logger_1.Logger.log("info", `Using embedding provider: ${provider} (${dimensions}d)`);
// Use provider-specific database filename to avoid dimension mismatches
const providerDbFilename = config_1.VECTOR_DB_FILENAMES[provider];
const dbPath = (0, path_1.join)(dbDir, providerDbFilename);
logger_1.Logger.log("info", `Initializing vector database at ${dbPath}`);
// Check if provider-specific database exists (in read-only mode)
const dbExists = (0, fs_1.existsSync)(dbPath);
if (!this.allowWrite && !dbExists) {
throw new Error("Read-only mode requires an existing database");
}
this.db = await (0, lancedb_1.connect)(dbPath);
this.dbPath = dbPath;
// Check if table exists
if (!(await this.tableExists())) {
if (!this.allowWrite) {
throw new Error("Read-only mode requires an existing database table");
}
if (!this.embedManager) {
throw new Error("Embedding manager required to create new table");
}
const provider = this.embedManager.getProvider();
const dimensions = schema_1.EMBEDDING_DIMENSIONS[provider];
await this.createTableWithDimensions(dimensions);
this.initialized = true;
logger_1.Logger.log("info", "✅ Vector database initialized successfully");
}
else {
this.initialized = true;
logger_1.Logger.log("info", "✅ Vector database loaded successfully");
}
}
/**
* Get the appropriate schema for the current embedding provider
*
* @private
* @returns Schema object configured for the current provider's embedding dimensions
* @throws {Error} When embedding manager is not initialized
*/
getSchemaForProvider() {
if (!this.embedManager) {
throw new Error("Embedding manager not initialized");
}
const provider = this.embedManager.getProvider();
if (!provider) {
throw new Error("Failed to get embedding provider");
}
const dimensions = schema_1.EMBEDDING_DIMENSIONS[provider];
return (0, schema_1.createCCIPDocTableSchema)(dimensions);
}
/**
* Create an empty table with the specified vector dimensions
*
* @private
* @param dimensions - Number of dimensions for the vector embeddings
* @throws {Error} When database is not initialized
*/
async createTableWithDimensions(dimensions) {
if (!this.db) {
throw new Error("Database not initialized");
}
const schema = (0, schema_1.createCCIPDocTableSchema)(dimensions);
// Create an empty table with the proper schema
await this.db.createTable(this.tableName, [], { schema });
logger_1.Logger.log("info", `Empty table ${this.tableName} created with ${dimensions}d vectors`);
}
/**
* Create or overwrite the documents table with provided entries
*
* Creates a new table with the given document entries and sets up
* a vector index for efficient similarity search using HNSW algorithm
* with cosine distance metric.
*
* @param entries - Array of document entries to insert into the table
* @param overwrite - Whether to overwrite existing table (default: true)
* @returns Promise resolving to the created table instance
* @throws {Error} When database is not initialized or table creation fails
*/
async createTable(entries, overwrite = true) {
if (!this.db) {
throw new Error("Database not initialized. Call initialize() first.");
}
try {
const mode = overwrite ? "overwrite" : "create";
const schema = this.getSchemaForProvider();
const table = await this.db.createTable(this.tableName, entries, {
mode,
schema,
});
// Create vector index for efficient similarity search
await table.createIndex("vector", {
config: lancedb_1.Index.hnswSq({
distanceType: "cosine",
}),
});
const rowCount = await table.countRows();
logger_1.Logger.log("info", `Table ${this.tableName} created with ${rowCount} rows`);
return table;
}
catch (error) {
logger_1.Logger.log("error", `Failed to create table: ${error}`);
throw error;
}
}
/**
* Get a reference to the documents table
*
* @returns Promise resolving to the table instance
* @throws {Error} When database is not initialized or table doesn't exist
*/
async getTable() {
if (!this.db) {
throw new Error("Database not initialized. Call initialize() first.");
}
try {
return await this.db.openTable(this.tableName);
}
catch (error) {
logger_1.Logger.log("error", `Failed to open table ${this.tableName}: ${error}`);
throw error;
}
}
/**
* Check if the documents table exists in the database
*
* @returns Promise resolving to true if table exists, false otherwise
*/
async tableExists() {
if (!this.db) {
return false;
}
try {
const tableNames = await this.db.tableNames();
return tableNames.includes(this.tableName);
}
catch (error) {
logger_1.Logger.log("error", `Failed to check if table exists: ${error}`);
return false;
}
}
/**
* Perform vector similarity search for documents
*
* Generates an embedding for the query text and searches for similar
* documents using cosine similarity. In read-only mode without embedding
* provider, throws an error prompting user to configure embedding provider.
*
* @param query - Text query to search for
* @param limit - Maximum number of results to return (default: 8)
* @returns Promise resolving to array of similar documents with similarity scores
* @throws {Error} When embedding generation fails or search operation fails
*/
async searchSimilar(query, limit = 8) {
try {
const table = await this.getTable();
// Embedding manager is guaranteed to exist (validated at initialization)
// Generate query embedding
const queryEmbedding = await this.embedManager.getEmbedding(query);
if (!queryEmbedding) {
throw new Error("Failed to generate query embedding");
}
// Build search query
const searchQuery = table.search(queryEmbedding).limit(limit);
const results = await searchQuery.toArray();
logger_1.Logger.log("info", `Vector search returned ${results.length} results for query: "${query}"`);
return results;
}
catch (error) {
logger_1.Logger.log("error", `Search failed: ${error}`);
throw error;
}
}
async getStats() {
try {
const exists = await this.tableExists();
if (!exists) {
return { tableExists: false };
}
const table = await this.getTable();
const rowCount = await table.countRows();
return { tableExists: true, rowCount };
}
catch (error) {
logger_1.Logger.log("error", `Failed to get database stats: ${error}`);
return { tableExists: false };
}
}
getDbConnection() {
return this.db;
}
isInitialized() {
return this.initialized;
}
async search(query, topK = 10) {
if (!this.db || !this.embedManager) {
throw new Error("Database not initialized");
}
try {
// Generate embedding for the query
const queryEmbedding = await this.embedManager.getEmbedding(query);
if (!queryEmbedding) {
throw new Error("Failed to generate query embedding");
}
// Build search query
const searchQuery = this.db.search(queryEmbedding).limit(topK);
const results = await searchQuery.toArray();
logger_1.Logger.log("info", `Vector search returned ${results.length} results for query: "${query}"`);
return results;
}
catch (error) {
logger_1.Logger.log("error", `Vector search failed: ${error}`);
throw error;
}
}
}
exports.VectorDatabase = VectorDatabase;
//# sourceMappingURL=database.js.map