UNPKG

dependency-context

Version:

MCP server for providing dependency documentation context to AI assistants

208 lines 8.53 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.__test__splitIntoChunks = __test__splitIntoChunks; exports.indexDocumentation = indexDocumentation; exports.searchVectorStore = searchVectorStore; const fs_extra_1 = __importDefault(require("fs-extra")); const path_1 = __importDefault(require("path")); const transformers_1 = require("@xenova/transformers"); // Cache for the embedding model let embeddingGeneratorCache = null; /** * Split a document into text chunks */ // Exposed for testing function __test__splitIntoChunks(document, config) { return splitIntoChunks(document, config); } function splitIntoChunks(document, config) { const text = document.content; const chunks = []; // Get chunk size from config or use defaults const minSize = config?.minChunkSize || 800; const maxSize = config?.maxChunkSize || 8000; // Split by markdown headers const headerSplits = text.split(/^#{1,6}\s+.+$/m); for (let split of headerSplits) { if (split.trim().length === 0) continue; // If the split is too small, add it as is if (split.length <= maxSize) { if (split.length >= minSize) { chunks.push(split.trim()); } else { // If it's too small, consider combining with another small chunk const lastChunk = chunks[chunks.length - 1]; if (lastChunk && lastChunk.length + split.length <= maxSize) { chunks[chunks.length - 1] = `${lastChunk}\n\n${split.trim()}`; } else { chunks.push(split.trim()); } } } else { // If the split is too large, split it further by paragraphs const paragraphs = split.split(/\n{2,}/); let currentChunk = ""; for (const paragraph of paragraphs) { if (paragraph.trim().length === 0) continue; // Check if adding this paragraph would exceed the max size if (currentChunk.length + paragraph.length <= maxSize) { currentChunk += (currentChunk ? "\n\n" : "") + paragraph; } else { // Save the current chunk if it's not empty and meets min size if (currentChunk && currentChunk.length >= minSize) { chunks.push(currentChunk.trim()); } // Start a new chunk with this paragraph currentChunk = paragraph; } } // Add the last chunk if it's not empty if (currentChunk && currentChunk.length >= minSize) { chunks.push(currentChunk.trim()); } } } return chunks; } /** * Generate embedding for a text chunk */ async function generateEmbedding(text, config) { // Lazy-load the embedding model if (!embeddingGeneratorCache) { // Use model specified in config or default to all-MiniLM-L6-v2 const modelName = config?.embeddingModel || "Xenova/all-MiniLM-L6-v2"; embeddingGeneratorCache = await (0, transformers_1.pipeline)("feature-extraction", modelName); } // Generate embedding const result = await embeddingGeneratorCache(text, { pooling: "mean", normalize: true, }); // Convert to regular array return Array.from(result.data); } /** * Index documentation for a dependency */ async function indexDocumentation(projectPath, dependency, repository, documents, config) { try { console.log(`Indexing documentation for ${dependency.name}...`); // Create storage directory for this project const storageDir = path_1.default.join(projectPath, ".dependency-context"); await fs_extra_1.default.ensureDir(storageDir); // Create or load existing vector store const vectorStorePath = path_1.default.join(storageDir, "vector-store.json"); let vectorStore; if (await fs_extra_1.default.pathExists(vectorStorePath)) { vectorStore = await fs_extra_1.default.readJson(vectorStorePath); } else { vectorStore = { entries: [] }; } // --- REMOVE existing entries for this dependency/repository --- const filesToIndex = new Set(documents.map((doc) => doc.path)); vectorStore.entries = vectorStore.entries.filter((entry) => { // Remove entries that match the current repository, dependency, and file return !(entry.metadata.repository === repository.url && entry.metadata.dependency === dependency.name && filesToIndex.has(entry.metadata.file)); }); // ------------------------------------------------------------- // Process each document for (const document of documents) { // Split document into chunks const chunks = splitIntoChunks(document, config); // Process each chunk for (const chunk of chunks) { if (!chunk.trim()) continue; // Generate embedding for the chunk const embedding = await generateEmbedding(chunk, config); // Add entry to vector store vectorStore.entries.push({ chunk, embedding, metadata: { repository: repository.url, file: document.path, dependency: dependency.name, }, }); } } // Save the updated vector store await fs_extra_1.default.writeJson(vectorStorePath, vectorStore); console.log(`Indexed ${documents.length} documents with ${vectorStore.entries.length} chunks for ${dependency.name}`); } catch (error) { console.error(`Error indexing documentation for ${dependency.name}:`, error); throw error; } } /** * Compute cosine similarity between two vectors */ function cosineSimilarity(vecA, vecB) { let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < vecA.length; i++) { dotProduct += vecA[i] * vecB[i]; normA += vecA[i] * vecA[i]; normB += vecB[i] * vecB[i]; } return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); } /** * Search the vector store for similar text chunks */ async function searchVectorStore(projectPath, query, repositoryContext, config) { try { // Generate embedding for the query const queryEmbedding = await generateEmbedding(query, config); // Load vector store const storageDir = path_1.default.join(projectPath, ".dependency-context"); const vectorStorePath = path_1.default.join(storageDir, "vector-store.json"); if (!(await fs_extra_1.default.pathExists(vectorStorePath))) { console.error(`Vector store not found for project: ${projectPath}`); return []; } const vectorStore = await fs_extra_1.default.readJson(vectorStorePath); // Calculate similarity scores for each entry let results = vectorStore.entries.map((entry) => { return { text_chunk: entry.chunk, source_repository: entry.metadata.repository, source_file: entry.metadata.file, similarity_score: cosineSimilarity(queryEmbedding, entry.embedding), }; }); // Filter by repository context if provided if (repositoryContext) { results = results.filter((result) => { return (result.source_repository.includes(repositoryContext) || result.source_file.includes(repositoryContext)); }); } // Sort by similarity score (descending) results.sort((a, b) => b.similarity_score - a.similarity_score); // Return top N results based on config (default is 5) const numResults = config?.chunksReturned || 5; return results.slice(0, numResults); } catch (error) { console.error(`Error searching vector store:`, error); return []; } } //# sourceMappingURL=index.js.map