mcp-orchestrator
Version:
MCP Orchestrator - Discover and install MCPs with automatic OAuth support. Uses Claude CLI for OAuth MCPs (Canva, Asana, etc). 34 trusted MCPs from Claude Partners.
347 lines (346 loc) ⢠12.3 kB
JavaScript
/**
* Vector-Based MCP Discovery - Working Prototype
* This is a real, implementable solution using local vectors
* No external dependencies needed for MVP!
*/
import * as fs from 'fs';
/**
* Simple embedding generator using keyword extraction
* (In production, use OpenAI/Ollama/HuggingFace for real embeddings)
*/
class SimpleEmbedder {
vocabulary = new Map();
constructor() {
// Build vocabulary from common MCP-related terms
const terms = [
'file', 'data', 'api', 'database', 'web', 'csv', 'json', 'sql',
'git', 'github', 'analyze', 'scrape', 'automate', 'cloud', 'local',
'read', 'write', 'query', 'search', 'fetch', 'process', 'transform',
'python', 'node', 'browser', 'server', 'client', 'tool', 'integration'
];
terms.forEach((term, index) => {
this.vocabulary.set(term, index);
});
}
embed(text) {
const vector = new Array(this.vocabulary.size).fill(0);
const words = text.toLowerCase().split(/\W+/);
// Simple TF-IDF style embedding
words.forEach(word => {
if (this.vocabulary.has(word)) {
vector[this.vocabulary.get(word)] += 1;
}
});
// Normalize
const magnitude = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0));
return magnitude > 0 ? vector.map(v => v / magnitude) : vector;
}
cosineSimilarity(a, b) {
let dotProduct = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
}
return dotProduct;
}
}
/**
* The Vector-Based Discovery Engine
*/
export class VectorDiscoveryEngine {
index = [];
embedder;
indexPath;
constructor(indexPath = './mcp-vector-index.json') {
this.embedder = new SimpleEmbedder();
this.indexPath = indexPath;
}
/**
* Initialize with crawled MCP data
*/
async initialize() {
// Try to load existing index
if (fs.existsSync(this.indexPath)) {
console.log('š Loading existing MCP index...');
const data = fs.readFileSync(this.indexPath, 'utf-8');
this.index = JSON.parse(data);
console.log(`ā
Loaded ${this.index.length} MCPs from index`);
}
else {
console.log('šØ Building new index...');
await this.buildIndex();
}
}
/**
* Build/rebuild the index from multiple sources
*/
async buildIndex() {
console.log('š Starting MCP index build...');
// 1. Crawl NPM
const npmMCPs = await this.crawlNPM();
console.log(`š¦ Found ${npmMCPs.length} MCPs on npm`);
// 2. Crawl GitHub (mock for now)
const githubMCPs = await this.crawlGitHub();
console.log(`š Found ${githubMCPs.length} MCPs on GitHub`);
// 3. Add known good MCPs
const knownMCPs = this.getKnownMCPs();
console.log(`ā
Added ${knownMCPs.length} verified MCPs`);
// 4. Merge and deduplicate
const allMCPs = [...npmMCPs, ...githubMCPs, ...knownMCPs];
const uniqueMCPs = this.deduplicateMCPs(allMCPs);
// 5. Generate embeddings for each MCP
this.index = uniqueMCPs.map(mcp => ({
...mcp,
embedding: this.embedder.embed(`${mcp.name} ${mcp.description} ${mcp.keywords.join(' ')}`)
}));
// 6. Save index
await this.saveIndex();
console.log(`š¾ Indexed ${this.index.length} MCPs successfully!`);
}
/**
* Crawl NPM for MCP packages
*/
async crawlNPM() {
// In real implementation, use npm registry API
// For now, return mock data
return [
{
id: 'filesystem',
name: 'Filesystem MCP',
description: 'Read, write, and manage files and directories',
packageName: '@modelcontextprotocol/server-filesystem',
embedding: [],
keywords: ['file', 'read', 'write', 'directory'],
metadata: {
downloads: 5000,
runtime: 'node',
category: ['storage', 'local']
}
},
{
id: 'notion',
name: 'Notion MCP',
description: 'Official Notion API integration for pages and databases',
packageName: '@notionhq/notion-mcp-server',
embedding: [],
keywords: ['notion', 'api', 'database', 'pages'],
metadata: {
downloads: 3000,
runtime: 'node',
category: ['productivity', 'api']
}
}
// In reality, this would return 1000+ entries
];
}
/**
* Crawl GitHub for MCP repositories
*/
async crawlGitHub() {
// Use GitHub API to find MCP repos
// For now, return mock data
return [
{
id: 'pandas-analysis',
name: 'Pandas Analysis MCP',
description: 'Data analysis and manipulation with Pandas',
packageName: 'mcp-pandas-server',
embedding: [],
keywords: ['pandas', 'data', 'csv', 'analysis', 'statistics'],
metadata: {
stars: 150,
runtime: 'python',
category: ['data', 'analysis']
}
}
];
}
/**
* Get known good MCPs
*/
getKnownMCPs() {
// These are verified, high-quality MCPs
return [
{
id: 'puppeteer',
name: 'Puppeteer MCP',
description: 'Browser automation for screenshots, PDFs, and scraping',
packageName: 'puppeteer-mcp-server',
embedding: [],
keywords: ['browser', 'automation', 'screenshot', 'pdf', 'scrape'],
metadata: {
downloads: 2000,
runtime: 'node',
category: ['automation', 'browser']
}
},
{
id: 'sqlite',
name: 'SQLite MCP',
description: 'Query and manage SQLite databases locally',
packageName: 'sqlite-mcp-server',
embedding: [],
keywords: ['sqlite', 'database', 'sql', 'query', 'local'],
metadata: {
downloads: 1500,
runtime: 'node',
category: ['database', 'local']
}
}
];
}
/**
* Deduplicate MCPs by package name
*/
deduplicateMCPs(mcps) {
const seen = new Set();
return mcps.filter(mcp => {
if (seen.has(mcp.packageName)) {
return false;
}
seen.add(mcp.packageName);
return true;
});
}
/**
* Save index to disk
*/
async saveIndex() {
fs.writeFileSync(this.indexPath, JSON.stringify(this.index, null, 2));
}
/**
* SEMANTIC SEARCH - The Magic!
*/
async discover(query, limit = 10) {
// Generate embedding for the query
const queryEmbedding = this.embedder.embed(query);
// Calculate similarity scores for all MCPs
const scores = this.index.map(mcp => {
const similarity = this.embedder.cosineSimilarity(queryEmbedding, mcp.embedding);
// Boost score based on metadata
let boost = 1.0;
if (mcp.metadata.downloads && mcp.metadata.downloads > 1000)
boost *= 1.1;
if (mcp.metadata.stars && mcp.metadata.stars > 100)
boost *= 1.1;
// Keyword matching bonus
const queryWords = query.toLowerCase().split(/\W+/);
const keywordMatches = mcp.keywords.filter(k => queryWords.some(w => k.includes(w) || w.includes(k)));
if (keywordMatches.length > 0)
boost *= (1 + keywordMatches.length * 0.2);
return {
mcp,
score: similarity * boost,
reason: this.generateReason(mcp, queryWords, similarity)
};
});
// Sort by score and return top results
return scores
.sort((a, b) => b.score - a.score)
.slice(0, limit)
.filter(s => s.score > 0.1); // Filter out very low relevance
}
/**
* Generate human-readable reason for match
*/
generateReason(mcp, queryWords, similarity) {
const reasons = [];
// Check for keyword matches
const keywordMatches = mcp.keywords.filter(k => queryWords.some(w => k.includes(w) || w.includes(k)));
if (keywordMatches.length > 0) {
reasons.push(`Matches keywords: ${keywordMatches.join(', ')}`);
}
// Check category matches
if (mcp.metadata.category) {
const categoryMatches = mcp.metadata.category.filter(c => queryWords.some(w => c.includes(w)));
if (categoryMatches.length > 0) {
reasons.push(`Category: ${categoryMatches.join(', ')}`);
}
}
// Semantic similarity
if (similarity > 0.7) {
reasons.push('High semantic relevance');
}
else if (similarity > 0.4) {
reasons.push('Good semantic match');
}
// Popularity
if (mcp.metadata.downloads && mcp.metadata.downloads > 1000) {
reasons.push(`Popular (${mcp.metadata.downloads} downloads)`);
}
return reasons.join('; ') || 'Related to query';
}
/**
* Add a new MCP to the index dynamically
*/
async addMCP(mcp) {
const entry = {
...mcp,
embedding: this.embedder.embed(`${mcp.name} ${mcp.description} ${mcp.keywords.join(' ')}`)
};
this.index.push(entry);
await this.saveIndex();
console.log(`ā
Added ${mcp.name} to index`);
}
/**
* Update the index with fresh data
*/
async updateIndex() {
console.log('š Updating MCP index...');
await this.buildIndex();
}
/**
* Get statistics about the index
*/
getStats() {
const stats = {
totalMCPs: this.index.length,
byRuntime: {},
byCategory: {},
mostPopular: []
};
this.index.forEach(mcp => {
// Count by runtime
const runtime = mcp.metadata.runtime || 'unknown';
stats.byRuntime[runtime] = (stats.byRuntime[runtime] || 0) + 1;
// Count by category
mcp.metadata.category?.forEach(cat => {
stats.byCategory[cat] = (stats.byCategory[cat] || 0) + 1;
});
});
// Find most popular
stats.mostPopular = this.index
.filter(m => m.metadata.downloads)
.sort((a, b) => (b.metadata.downloads || 0) - (a.metadata.downloads || 0))
.slice(0, 5)
.map(m => ({ name: m.name, downloads: m.metadata.downloads }));
return stats;
}
}
// Example usage
async function testVectorDiscovery() {
const engine = new VectorDiscoveryEngine();
await engine.initialize();
// Test queries
const queries = [
"I need to analyze CSV files with statistics",
"automate browser testing",
"work with Notion API",
"manage local SQLite database"
];
for (const query of queries) {
console.log(`\nš Query: "${query}"`);
const results = await engine.discover(query, 5);
results.forEach((result, i) => {
console.log(`${i + 1}. ${result.mcp.name} (score: ${result.score.toFixed(3)})`);
console.log(` Package: ${result.mcp.packageName}`);
console.log(` Reason: ${result.reason}`);
});
}
// Show statistics
console.log('\nš Index Statistics:');
console.log(engine.getStats());
}
// Run test if this file is executed directly
if (import.meta.url === `file://${__filename}`) {
testVectorDiscovery().catch(console.error);
}