UNPKG

mcp-orchestrator

Version:

MCP Orchestrator - Discover and install MCPs with automatic OAuth support. Uses Claude CLI for OAuth MCPs (Canva, Asana, etc). 34 trusted MCPs from Claude Partners.

444 lines (443 loc) • 17.2 kB
/** * MCP Mega Crawler - Fetches ALL MCPs from multiple sources * This actually works and will find thousands of MCPs! */ import { exec } from 'child_process'; import { promisify } from 'util'; import * as fs from 'fs'; import * as https from 'https'; const execAsync = promisify(exec); export class MCPMegaCrawler { allMCPs = []; /** * Crawl NPM for all MCP-related packages */ async crawlNPM() { console.log('šŸ” Crawling npm for MCP servers...'); const mcps = []; // Search patterns that catch MCP servers const searchPatterns = [ 'mcp-server', '@modelcontextprotocol', 'mcp server', 'model-context-protocol', 'modelcontextprotocol' ]; for (const pattern of searchPatterns) { try { console.log(` Searching: "${pattern}"`); // Use npm search with JSON output const { stdout } = await execAsync(`npm search "${pattern}" --json --long`, { maxBuffer: 10 * 1024 * 1024 } // 10MB buffer for large results ); const packages = JSON.parse(stdout || '[]'); for (const pkg of packages) { // Filter to likely MCP servers const name = pkg.name || ''; const description = pkg.description || ''; // Check if this is likely an MCP server if (name.includes('mcp') || name.includes('modelcontext') || description.toLowerCase().includes('mcp') || description.toLowerCase().includes('model context protocol')) { const mcp = { id: this.generateId(name), name: this.cleanName(name), description: description, packageName: name, source: 'npm', keywords: pkg.keywords || [], metadata: { version: pkg.version, author: pkg.publisher?.username, lastUpdated: pkg.date, homepage: pkg.links?.homepage, repository: pkg.links?.repository, runtime: 'node' } }; mcps.push(mcp); } } } catch (error) { console.error(` Error searching npm for "${pattern}":`, error); } } console.log(` āœ… Found ${mcps.length} potential MCP packages on npm`); return this.deduplicateByPackageName(mcps); } /** * Crawl PyPI for Python MCP servers */ async crawlPyPI() { console.log('šŸ Crawling PyPI for MCP servers...'); const mcps = []; try { // Fetch the simple index const response = await this.httpsGet('https://pypi.org/simple/'); const html = response; // Find all packages with 'mcp' in the name const packageRegex = /<a[^>]*href="\/simple\/([^"]*mcp[^"]*)\/"[^>]*>([^<]*)<\/a>/gi; let match; const mcpPackages = []; while ((match = packageRegex.exec(html)) !== null) { mcpPackages.push(match[1]); } console.log(` Found ${mcpPackages.length} potential Python MCP packages`); // Get details for each package (limit to first 50 for performance) for (const pkgName of mcpPackages.slice(0, 50)) { try { // Fetch package info from PyPI JSON API const pkgInfo = await this.httpsGet(`https://pypi.org/pypi/${pkgName}/json`); const data = JSON.parse(pkgInfo); const mcp = { id: this.generateId(pkgName), name: this.cleanName(pkgName), description: data.info?.summary || '', packageName: pkgName, source: 'pypi', keywords: data.info?.keywords?.split(',') || [], metadata: { version: data.info?.version, author: data.info?.author, homepage: data.info?.home_page, lastUpdated: data.releases?.[data.info?.version]?.[0]?.upload_time, runtime: 'python' } }; mcps.push(mcp); } catch (err) { // Skip packages we can't fetch } } } catch (error) { console.error(' Error crawling PyPI:', error); } console.log(` āœ… Successfully indexed ${mcps.length} Python MCP packages`); return mcps; } /** * Crawl GitHub for MCP repositories */ async crawlGitHub() { console.log('šŸ™ Crawling GitHub for MCP servers...'); const mcps = []; try { // Search GitHub for MCP-related repos const queries = [ 'mcp-server', 'model-context-protocol', 'topic:mcp', 'topic:modelcontextprotocol' ]; for (const query of queries) { const searchUrl = `https://api.github.com/search/repositories?q=${encodeURIComponent(query)}&per_page=30`; const response = await this.httpsGet(searchUrl, { 'User-Agent': 'MCP-Crawler/1.0' }); const data = JSON.parse(response); if (data.items) { for (const repo of data.items) { const mcp = { id: this.generateId(repo.name), name: this.cleanName(repo.name), description: repo.description || '', packageName: repo.full_name, // owner/repo format source: 'github', keywords: repo.topics || [], metadata: { stars: repo.stargazers_count, lastUpdated: repo.updated_at, homepage: repo.homepage, repository: repo.html_url, runtime: this.detectRuntime(repo) } }; mcps.push(mcp); } } } } catch (error) { console.error(' Error crawling GitHub:', error); } console.log(` āœ… Found ${mcps.length} MCP repositories on GitHub`); return this.deduplicateByPackageName(mcps); } /** * Crawl the official MCP registry */ async crawlOfficialRegistry() { console.log('šŸ“š Fetching official MCP registry...'); const mcps = []; try { // Try different registry endpoints const endpoints = [ 'https://raw.githubusercontent.com/modelcontextprotocol/servers/main/README.md', 'https://raw.githubusercontent.com/modelcontextprotocol/registry/main/registry.json' ]; for (const endpoint of endpoints) { try { const response = await this.httpsGet(endpoint); // Parse based on content type if (endpoint.endsWith('.json')) { const data = JSON.parse(response); // Process JSON registry if (Array.isArray(data)) { data.forEach(item => { mcps.push(this.parseRegistryItem(item)); }); } } else { // Parse markdown for MCP references const mcpRegex = /\[([^\]]+)\]\(([^)]+)\)\s*-\s*([^\n]+)/g; let match; while ((match = mcpRegex.exec(response)) !== null) { const [_, name, url, description] = match; mcps.push({ id: this.generateId(name), name: name, description: description, packageName: this.extractPackageFromUrl(url), source: 'registry', keywords: this.extractKeywords(description), metadata: { homepage: url, runtime: 'unknown' } }); } } } catch (err) { // Try next endpoint } } } catch (error) { console.error(' Error fetching official registry:', error); } console.log(` āœ… Found ${mcps.length} MCPs in official registry`); return mcps; } /** * Master crawl function - gets everything! */ async crawlAll() { console.log('šŸš€ Starting mega crawl of all MCP sources...\n'); const results = await Promise.allSettled([ this.crawlNPM(), this.crawlPyPI(), this.crawlGitHub(), this.crawlOfficialRegistry() ]); // Combine all results for (const result of results) { if (result.status === 'fulfilled') { this.allMCPs.push(...result.value); } } // Deduplicate across all sources this.allMCPs = this.deduplicateGlobally(this.allMCPs); console.log(`\nšŸŽ‰ Mega crawl complete!`); console.log(`šŸ“Š Total unique MCPs found: ${this.allMCPs.length}`); // Save to file await this.saveResults(); return this.allMCPs; } /** * Save crawled results to JSON file */ async saveResults() { const filename = `mcp-crawl-${new Date().toISOString().split('T')[0]}.json`; const filepath = `./data/${filename}`; // Ensure data directory exists if (!fs.existsSync('./data')) { fs.mkdirSync('./data'); } fs.writeFileSync(filepath, JSON.stringify({ timestamp: new Date().toISOString(), totalMCPs: this.allMCPs.length, bySource: { npm: this.allMCPs.filter(m => m.source === 'npm').length, pypi: this.allMCPs.filter(m => m.source === 'pypi').length, github: this.allMCPs.filter(m => m.source === 'github').length, registry: this.allMCPs.filter(m => m.source === 'registry').length }, mcps: this.allMCPs }, null, 2)); console.log(`šŸ’¾ Results saved to ${filepath}`); } /** * Helper: HTTPS GET request */ httpsGet(url, headers = {}) { return new Promise((resolve, reject) => { https.get(url, { headers }, (res) => { let data = ''; res.on('data', chunk => data += chunk); res.on('end', () => resolve(data)); }).on('error', reject); }); } /** * Helper: Generate clean ID from name */ generateId(name) { return name .toLowerCase() .replace(/[@\/]/g, '-') .replace(/[^a-z0-9-]/g, '') .replace(/^-+|-+$/g, ''); } /** * Helper: Clean package name for display */ cleanName(name) { return name .replace(/^@[^/]+\//, '') // Remove scope .replace(/-mcp-?server$/i, '') // Remove common suffixes .replace(/-mcp$/i, '') .replace(/^mcp-/, '') .split(/[-_]/) .map(word => word.charAt(0).toUpperCase() + word.slice(1)) .join(' ') + ' MCP'; } /** * Helper: Detect runtime from GitHub repo */ detectRuntime(repo) { const lang = repo.language?.toLowerCase(); if (lang === 'javascript' || lang === 'typescript') return 'node'; if (lang === 'python') return 'python'; return 'unknown'; } /** * Helper: Extract package name from URL */ extractPackageFromUrl(url) { if (url.includes('npm')) { const match = url.match(/package\/(.+)$/); return match ? match[1] : url; } if (url.includes('github.com')) { const match = url.match(/github\.com\/([^/]+\/[^/]+)/); return match ? match[1] : url; } return url; } /** * Helper: Extract keywords from description */ extractKeywords(text) { const commonWords = new Set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for']); return text .toLowerCase() .split(/\W+/) .filter(word => word.length > 3 && !commonWords.has(word)) .slice(0, 10); } /** * Helper: Parse registry item */ parseRegistryItem(item) { return { id: this.generateId(item.name || item.id), name: item.name || item.id, description: item.description || '', packageName: item.package || item.packageName || item.name, source: 'registry', keywords: item.keywords || [], metadata: { version: item.version, author: item.author, homepage: item.homepage, repository: item.repository, runtime: item.runtime || 'unknown' } }; } /** * Helper: Deduplicate by package name */ deduplicateByPackageName(mcps) { const seen = new Map(); for (const mcp of mcps) { const key = mcp.packageName.toLowerCase(); if (!seen.has(key) || (mcp.metadata.downloads || 0) > (seen.get(key).metadata.downloads || 0)) { seen.set(key, mcp); } } return Array.from(seen.values()); } /** * Helper: Global deduplication with source priority */ deduplicateGlobally(mcps) { const sourcePriority = { registry: 4, npm: 3, github: 2, pypi: 1 }; const seen = new Map(); for (const mcp of mcps) { const key = mcp.packageName.toLowerCase(); const existing = seen.get(key); if (!existing || sourcePriority[mcp.source] > sourcePriority[existing.source]) { seen.set(key, mcp); } } return Array.from(seen.values()); } /** * Get statistics about crawled MCPs */ getStatistics() { return { total: this.allMCPs.length, bySource: { npm: this.allMCPs.filter(m => m.source === 'npm').length, pypi: this.allMCPs.filter(m => m.source === 'pypi').length, github: this.allMCPs.filter(m => m.source === 'github').length, registry: this.allMCPs.filter(m => m.source === 'registry').length }, byRuntime: { node: this.allMCPs.filter(m => m.metadata.runtime === 'node').length, python: this.allMCPs.filter(m => m.metadata.runtime === 'python').length, unknown: this.allMCPs.filter(m => m.metadata.runtime === 'unknown').length }, topKeywords: this.getTopKeywords(), mostStarred: this.allMCPs .filter(m => m.metadata.stars) .sort((a, b) => (b.metadata.stars || 0) - (a.metadata.stars || 0)) .slice(0, 10) .map(m => ({ name: m.name, stars: m.metadata.stars })) }; } /** * Get most common keywords */ getTopKeywords() { const keywordCount = new Map(); for (const mcp of this.allMCPs) { for (const keyword of mcp.keywords) { keywordCount.set(keyword, (keywordCount.get(keyword) || 0) + 1); } } return Array.from(keywordCount.entries()) .sort((a, b) => b[1] - a[1]) .slice(0, 20) .map(([keyword, count]) => ({ keyword, count })); } } // Run crawler if executed directly if (import.meta.url === `file://${__filename}`) { const crawler = new MCPMegaCrawler(); crawler.crawlAll() .then(() => { console.log('\nšŸ“Š Crawl Statistics:'); console.log(JSON.stringify(crawler.getStatistics(), null, 2)); }) .catch(console.error); }