UNPKG

mcp-orchestrator

Version:

MCP Orchestrator - Discover and install MCPs with automatic OAuth support. Uses Claude CLI for OAuth MCPs (Canva, Asana, etc). 34 trusted MCPs from Claude Partners.

347 lines (346 loc) • 12.3 kB
/** * Vector-Based MCP Discovery - Working Prototype * This is a real, implementable solution using local vectors * No external dependencies needed for MVP! */ import * as fs from 'fs'; /** * Simple embedding generator using keyword extraction * (In production, use OpenAI/Ollama/HuggingFace for real embeddings) */ class SimpleEmbedder { vocabulary = new Map(); constructor() { // Build vocabulary from common MCP-related terms const terms = [ 'file', 'data', 'api', 'database', 'web', 'csv', 'json', 'sql', 'git', 'github', 'analyze', 'scrape', 'automate', 'cloud', 'local', 'read', 'write', 'query', 'search', 'fetch', 'process', 'transform', 'python', 'node', 'browser', 'server', 'client', 'tool', 'integration' ]; terms.forEach((term, index) => { this.vocabulary.set(term, index); }); } embed(text) { const vector = new Array(this.vocabulary.size).fill(0); const words = text.toLowerCase().split(/\W+/); // Simple TF-IDF style embedding words.forEach(word => { if (this.vocabulary.has(word)) { vector[this.vocabulary.get(word)] += 1; } }); // Normalize const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)); return magnitude > 0 ? vector.map(v => v / magnitude) : vector; } cosineSimilarity(a, b) { let dotProduct = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; } return dotProduct; } } /** * The Vector-Based Discovery Engine */ export class VectorDiscoveryEngine { index = []; embedder; indexPath; constructor(indexPath = './mcp-vector-index.json') { this.embedder = new SimpleEmbedder(); this.indexPath = indexPath; } /** * Initialize with crawled MCP data */ async initialize() { // Try to load existing index if (fs.existsSync(this.indexPath)) { console.log('šŸ“š Loading existing MCP index...'); const data = fs.readFileSync(this.indexPath, 'utf-8'); this.index = JSON.parse(data); console.log(`āœ… Loaded ${this.index.length} MCPs from index`); } else { console.log('šŸ”Ø Building new index...'); await this.buildIndex(); } } /** * Build/rebuild the index from multiple sources */ async buildIndex() { console.log('šŸš€ Starting MCP index build...'); // 1. Crawl NPM const npmMCPs = await this.crawlNPM(); console.log(`šŸ“¦ Found ${npmMCPs.length} MCPs on npm`); // 2. Crawl GitHub (mock for now) const githubMCPs = await this.crawlGitHub(); console.log(`šŸ™ Found ${githubMCPs.length} MCPs on GitHub`); // 3. Add known good MCPs const knownMCPs = this.getKnownMCPs(); console.log(`āœ… Added ${knownMCPs.length} verified MCPs`); // 4. Merge and deduplicate const allMCPs = [...npmMCPs, ...githubMCPs, ...knownMCPs]; const uniqueMCPs = this.deduplicateMCPs(allMCPs); // 5. Generate embeddings for each MCP this.index = uniqueMCPs.map(mcp => ({ ...mcp, embedding: this.embedder.embed(`${mcp.name} ${mcp.description} ${mcp.keywords.join(' ')}`) })); // 6. Save index await this.saveIndex(); console.log(`šŸ’¾ Indexed ${this.index.length} MCPs successfully!`); } /** * Crawl NPM for MCP packages */ async crawlNPM() { // In real implementation, use npm registry API // For now, return mock data return [ { id: 'filesystem', name: 'Filesystem MCP', description: 'Read, write, and manage files and directories', packageName: '@modelcontextprotocol/server-filesystem', embedding: [], keywords: ['file', 'read', 'write', 'directory'], metadata: { downloads: 5000, runtime: 'node', category: ['storage', 'local'] } }, { id: 'notion', name: 'Notion MCP', description: 'Official Notion API integration for pages and databases', packageName: '@notionhq/notion-mcp-server', embedding: [], keywords: ['notion', 'api', 'database', 'pages'], metadata: { downloads: 3000, runtime: 'node', category: ['productivity', 'api'] } } // In reality, this would return 1000+ entries ]; } /** * Crawl GitHub for MCP repositories */ async crawlGitHub() { // Use GitHub API to find MCP repos // For now, return mock data return [ { id: 'pandas-analysis', name: 'Pandas Analysis MCP', description: 'Data analysis and manipulation with Pandas', packageName: 'mcp-pandas-server', embedding: [], keywords: ['pandas', 'data', 'csv', 'analysis', 'statistics'], metadata: { stars: 150, runtime: 'python', category: ['data', 'analysis'] } } ]; } /** * Get known good MCPs */ getKnownMCPs() { // These are verified, high-quality MCPs return [ { id: 'puppeteer', name: 'Puppeteer MCP', description: 'Browser automation for screenshots, PDFs, and scraping', packageName: 'puppeteer-mcp-server', embedding: [], keywords: ['browser', 'automation', 'screenshot', 'pdf', 'scrape'], metadata: { downloads: 2000, runtime: 'node', category: ['automation', 'browser'] } }, { id: 'sqlite', name: 'SQLite MCP', description: 'Query and manage SQLite databases locally', packageName: 'sqlite-mcp-server', embedding: [], keywords: ['sqlite', 'database', 'sql', 'query', 'local'], metadata: { downloads: 1500, runtime: 'node', category: ['database', 'local'] } } ]; } /** * Deduplicate MCPs by package name */ deduplicateMCPs(mcps) { const seen = new Set(); return mcps.filter(mcp => { if (seen.has(mcp.packageName)) { return false; } seen.add(mcp.packageName); return true; }); } /** * Save index to disk */ async saveIndex() { fs.writeFileSync(this.indexPath, JSON.stringify(this.index, null, 2)); } /** * SEMANTIC SEARCH - The Magic! */ async discover(query, limit = 10) { // Generate embedding for the query const queryEmbedding = this.embedder.embed(query); // Calculate similarity scores for all MCPs const scores = this.index.map(mcp => { const similarity = this.embedder.cosineSimilarity(queryEmbedding, mcp.embedding); // Boost score based on metadata let boost = 1.0; if (mcp.metadata.downloads && mcp.metadata.downloads > 1000) boost *= 1.1; if (mcp.metadata.stars && mcp.metadata.stars > 100) boost *= 1.1; // Keyword matching bonus const queryWords = query.toLowerCase().split(/\W+/); const keywordMatches = mcp.keywords.filter(k => queryWords.some(w => k.includes(w) || w.includes(k))); if (keywordMatches.length > 0) boost *= (1 + keywordMatches.length * 0.2); return { mcp, score: similarity * boost, reason: this.generateReason(mcp, queryWords, similarity) }; }); // Sort by score and return top results return scores .sort((a, b) => b.score - a.score) .slice(0, limit) .filter(s => s.score > 0.1); // Filter out very low relevance } /** * Generate human-readable reason for match */ generateReason(mcp, queryWords, similarity) { const reasons = []; // Check for keyword matches const keywordMatches = mcp.keywords.filter(k => queryWords.some(w => k.includes(w) || w.includes(k))); if (keywordMatches.length > 0) { reasons.push(`Matches keywords: ${keywordMatches.join(', ')}`); } // Check category matches if (mcp.metadata.category) { const categoryMatches = mcp.metadata.category.filter(c => queryWords.some(w => c.includes(w))); if (categoryMatches.length > 0) { reasons.push(`Category: ${categoryMatches.join(', ')}`); } } // Semantic similarity if (similarity > 0.7) { reasons.push('High semantic relevance'); } else if (similarity > 0.4) { reasons.push('Good semantic match'); } // Popularity if (mcp.metadata.downloads && mcp.metadata.downloads > 1000) { reasons.push(`Popular (${mcp.metadata.downloads} downloads)`); } return reasons.join('; ') || 'Related to query'; } /** * Add a new MCP to the index dynamically */ async addMCP(mcp) { const entry = { ...mcp, embedding: this.embedder.embed(`${mcp.name} ${mcp.description} ${mcp.keywords.join(' ')}`) }; this.index.push(entry); await this.saveIndex(); console.log(`āœ… Added ${mcp.name} to index`); } /** * Update the index with fresh data */ async updateIndex() { console.log('šŸ”„ Updating MCP index...'); await this.buildIndex(); } /** * Get statistics about the index */ getStats() { const stats = { totalMCPs: this.index.length, byRuntime: {}, byCategory: {}, mostPopular: [] }; this.index.forEach(mcp => { // Count by runtime const runtime = mcp.metadata.runtime || 'unknown'; stats.byRuntime[runtime] = (stats.byRuntime[runtime] || 0) + 1; // Count by category mcp.metadata.category?.forEach(cat => { stats.byCategory[cat] = (stats.byCategory[cat] || 0) + 1; }); }); // Find most popular stats.mostPopular = this.index .filter(m => m.metadata.downloads) .sort((a, b) => (b.metadata.downloads || 0) - (a.metadata.downloads || 0)) .slice(0, 5) .map(m => ({ name: m.name, downloads: m.metadata.downloads })); return stats; } } // Example usage async function testVectorDiscovery() { const engine = new VectorDiscoveryEngine(); await engine.initialize(); // Test queries const queries = [ "I need to analyze CSV files with statistics", "automate browser testing", "work with Notion API", "manage local SQLite database" ]; for (const query of queries) { console.log(`\nšŸ” Query: "${query}"`); const results = await engine.discover(query, 5); results.forEach((result, i) => { console.log(`${i + 1}. ${result.mcp.name} (score: ${result.score.toFixed(3)})`); console.log(` Package: ${result.mcp.packageName}`); console.log(` Reason: ${result.reason}`); }); } // Show statistics console.log('\nšŸ“Š Index Statistics:'); console.log(engine.getStats()); } // Run test if this file is executed directly if (import.meta.url === `file://${__filename}`) { testVectorDiscovery().catch(console.error); }