mcp-orchestrator
Version:
MCP Orchestrator - Discover and install MCPs with automatic OAuth support. Uses Claude CLI for OAuth MCPs (Canva, Asana, etc). 34 trusted MCPs from Claude Partners.
444 lines (443 loc) ⢠17.2 kB
JavaScript
/**
* MCP Mega Crawler - Fetches ALL MCPs from multiple sources
* This actually works and will find thousands of MCPs!
*/
import { exec } from 'child_process';
import { promisify } from 'util';
import * as fs from 'fs';
import * as https from 'https';
const execAsync = promisify(exec);
export class MCPMegaCrawler {
allMCPs = [];
/**
* Crawl NPM for all MCP-related packages
*/
async crawlNPM() {
console.log('š Crawling npm for MCP servers...');
const mcps = [];
// Search patterns that catch MCP servers
const searchPatterns = [
'mcp-server',
'@modelcontextprotocol',
'mcp server',
'model-context-protocol',
'modelcontextprotocol'
];
for (const pattern of searchPatterns) {
try {
console.log(` Searching: "${pattern}"`);
// Use npm search with JSON output
const { stdout } = await execAsync(`npm search "${pattern}" --json --long`, { maxBuffer: 10 * 1024 * 1024 } // 10MB buffer for large results
);
const packages = JSON.parse(stdout || '[]');
for (const pkg of packages) {
// Filter to likely MCP servers
const name = pkg.name || '';
const description = pkg.description || '';
// Check if this is likely an MCP server
if (name.includes('mcp') ||
name.includes('modelcontext') ||
description.toLowerCase().includes('mcp') ||
description.toLowerCase().includes('model context protocol')) {
const mcp = {
id: this.generateId(name),
name: this.cleanName(name),
description: description,
packageName: name,
source: 'npm',
keywords: pkg.keywords || [],
metadata: {
version: pkg.version,
author: pkg.publisher?.username,
lastUpdated: pkg.date,
homepage: pkg.links?.homepage,
repository: pkg.links?.repository,
runtime: 'node'
}
};
mcps.push(mcp);
}
}
}
catch (error) {
console.error(` Error searching npm for "${pattern}":`, error);
}
}
console.log(` ā
Found ${mcps.length} potential MCP packages on npm`);
return this.deduplicateByPackageName(mcps);
}
/**
* Crawl PyPI for Python MCP servers
*/
async crawlPyPI() {
console.log('š Crawling PyPI for MCP servers...');
const mcps = [];
try {
// Fetch the simple index
const response = await this.httpsGet('https://pypi.org/simple/');
const html = response;
// Find all packages with 'mcp' in the name
const packageRegex = /<a[^>]*href="\/simple\/([^"]*mcp[^"]*)\/"[^>]*>([^<]*)<\/a>/gi;
let match;
const mcpPackages = [];
while ((match = packageRegex.exec(html)) !== null) {
mcpPackages.push(match[1]);
}
console.log(` Found ${mcpPackages.length} potential Python MCP packages`);
// Get details for each package (limit to first 50 for performance)
for (const pkgName of mcpPackages.slice(0, 50)) {
try {
// Fetch package info from PyPI JSON API
const pkgInfo = await this.httpsGet(`https://pypi.org/pypi/${pkgName}/json`);
const data = JSON.parse(pkgInfo);
const mcp = {
id: this.generateId(pkgName),
name: this.cleanName(pkgName),
description: data.info?.summary || '',
packageName: pkgName,
source: 'pypi',
keywords: data.info?.keywords?.split(',') || [],
metadata: {
version: data.info?.version,
author: data.info?.author,
homepage: data.info?.home_page,
lastUpdated: data.releases?.[data.info?.version]?.[0]?.upload_time,
runtime: 'python'
}
};
mcps.push(mcp);
}
catch (err) {
// Skip packages we can't fetch
}
}
}
catch (error) {
console.error(' Error crawling PyPI:', error);
}
console.log(` ā
Successfully indexed ${mcps.length} Python MCP packages`);
return mcps;
}
/**
* Crawl GitHub for MCP repositories
*/
async crawlGitHub() {
console.log('š Crawling GitHub for MCP servers...');
const mcps = [];
try {
// Search GitHub for MCP-related repos
const queries = [
'mcp-server',
'model-context-protocol',
'topic:mcp',
'topic:modelcontextprotocol'
];
for (const query of queries) {
const searchUrl = `https://api.github.com/search/repositories?q=${encodeURIComponent(query)}&per_page=30`;
const response = await this.httpsGet(searchUrl, {
'User-Agent': 'MCP-Crawler/1.0'
});
const data = JSON.parse(response);
if (data.items) {
for (const repo of data.items) {
const mcp = {
id: this.generateId(repo.name),
name: this.cleanName(repo.name),
description: repo.description || '',
packageName: repo.full_name, // owner/repo format
source: 'github',
keywords: repo.topics || [],
metadata: {
stars: repo.stargazers_count,
lastUpdated: repo.updated_at,
homepage: repo.homepage,
repository: repo.html_url,
runtime: this.detectRuntime(repo)
}
};
mcps.push(mcp);
}
}
}
}
catch (error) {
console.error(' Error crawling GitHub:', error);
}
console.log(` ā
Found ${mcps.length} MCP repositories on GitHub`);
return this.deduplicateByPackageName(mcps);
}
/**
* Crawl the official MCP registry
*/
async crawlOfficialRegistry() {
console.log('š Fetching official MCP registry...');
const mcps = [];
try {
// Try different registry endpoints
const endpoints = [
'https://raw.githubusercontent.com/modelcontextprotocol/servers/main/README.md',
'https://raw.githubusercontent.com/modelcontextprotocol/registry/main/registry.json'
];
for (const endpoint of endpoints) {
try {
const response = await this.httpsGet(endpoint);
// Parse based on content type
if (endpoint.endsWith('.json')) {
const data = JSON.parse(response);
// Process JSON registry
if (Array.isArray(data)) {
data.forEach(item => {
mcps.push(this.parseRegistryItem(item));
});
}
}
else {
// Parse markdown for MCP references
const mcpRegex = /\[([^\]]+)\]\(([^)]+)\)\s*-\s*([^\n]+)/g;
let match;
while ((match = mcpRegex.exec(response)) !== null) {
const [_, name, url, description] = match;
mcps.push({
id: this.generateId(name),
name: name,
description: description,
packageName: this.extractPackageFromUrl(url),
source: 'registry',
keywords: this.extractKeywords(description),
metadata: {
homepage: url,
runtime: 'unknown'
}
});
}
}
}
catch (err) {
// Try next endpoint
}
}
}
catch (error) {
console.error(' Error fetching official registry:', error);
}
console.log(` ā
Found ${mcps.length} MCPs in official registry`);
return mcps;
}
/**
* Master crawl function - gets everything!
*/
async crawlAll() {
console.log('š Starting mega crawl of all MCP sources...\n');
const results = await Promise.allSettled([
this.crawlNPM(),
this.crawlPyPI(),
this.crawlGitHub(),
this.crawlOfficialRegistry()
]);
// Combine all results
for (const result of results) {
if (result.status === 'fulfilled') {
this.allMCPs.push(...result.value);
}
}
// Deduplicate across all sources
this.allMCPs = this.deduplicateGlobally(this.allMCPs);
console.log(`\nš Mega crawl complete!`);
console.log(`š Total unique MCPs found: ${this.allMCPs.length}`);
// Save to file
await this.saveResults();
return this.allMCPs;
}
/**
* Save crawled results to JSON file
*/
async saveResults() {
const filename = `mcp-crawl-${new Date().toISOString().split('T')[0]}.json`;
const filepath = `./data/${filename}`;
// Ensure data directory exists
if (!fs.existsSync('./data')) {
fs.mkdirSync('./data');
}
fs.writeFileSync(filepath, JSON.stringify({
timestamp: new Date().toISOString(),
totalMCPs: this.allMCPs.length,
bySource: {
npm: this.allMCPs.filter(m => m.source === 'npm').length,
pypi: this.allMCPs.filter(m => m.source === 'pypi').length,
github: this.allMCPs.filter(m => m.source === 'github').length,
registry: this.allMCPs.filter(m => m.source === 'registry').length
},
mcps: this.allMCPs
}, null, 2));
console.log(`š¾ Results saved to ${filepath}`);
}
/**
* Helper: HTTPS GET request
*/
httpsGet(url, headers = {}) {
return new Promise((resolve, reject) => {
https.get(url, { headers }, (res) => {
let data = '';
res.on('data', chunk => data += chunk);
res.on('end', () => resolve(data));
}).on('error', reject);
});
}
/**
* Helper: Generate clean ID from name
*/
generateId(name) {
return name
.toLowerCase()
.replace(/[@\/]/g, '-')
.replace(/[^a-z0-9-]/g, '')
.replace(/^-+|-+$/g, '');
}
/**
* Helper: Clean package name for display
*/
cleanName(name) {
return name
.replace(/^@[^/]+\//, '') // Remove scope
.replace(/-mcp-?server$/i, '') // Remove common suffixes
.replace(/-mcp$/i, '')
.replace(/^mcp-/, '')
.split(/[-_]/)
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ') + ' MCP';
}
/**
* Helper: Detect runtime from GitHub repo
*/
detectRuntime(repo) {
const lang = repo.language?.toLowerCase();
if (lang === 'javascript' || lang === 'typescript')
return 'node';
if (lang === 'python')
return 'python';
return 'unknown';
}
/**
* Helper: Extract package name from URL
*/
extractPackageFromUrl(url) {
if (url.includes('npm')) {
const match = url.match(/package\/(.+)$/);
return match ? match[1] : url;
}
if (url.includes('github.com')) {
const match = url.match(/github\.com\/([^/]+\/[^/]+)/);
return match ? match[1] : url;
}
return url;
}
/**
* Helper: Extract keywords from description
*/
extractKeywords(text) {
const commonWords = new Set(['the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for']);
return text
.toLowerCase()
.split(/\W+/)
.filter(word => word.length > 3 && !commonWords.has(word))
.slice(0, 10);
}
/**
* Helper: Parse registry item
*/
parseRegistryItem(item) {
return {
id: this.generateId(item.name || item.id),
name: item.name || item.id,
description: item.description || '',
packageName: item.package || item.packageName || item.name,
source: 'registry',
keywords: item.keywords || [],
metadata: {
version: item.version,
author: item.author,
homepage: item.homepage,
repository: item.repository,
runtime: item.runtime || 'unknown'
}
};
}
/**
* Helper: Deduplicate by package name
*/
deduplicateByPackageName(mcps) {
const seen = new Map();
for (const mcp of mcps) {
const key = mcp.packageName.toLowerCase();
if (!seen.has(key) || (mcp.metadata.downloads || 0) > (seen.get(key).metadata.downloads || 0)) {
seen.set(key, mcp);
}
}
return Array.from(seen.values());
}
/**
* Helper: Global deduplication with source priority
*/
deduplicateGlobally(mcps) {
const sourcePriority = { registry: 4, npm: 3, github: 2, pypi: 1 };
const seen = new Map();
for (const mcp of mcps) {
const key = mcp.packageName.toLowerCase();
const existing = seen.get(key);
if (!existing || sourcePriority[mcp.source] > sourcePriority[existing.source]) {
seen.set(key, mcp);
}
}
return Array.from(seen.values());
}
/**
* Get statistics about crawled MCPs
*/
getStatistics() {
return {
total: this.allMCPs.length,
bySource: {
npm: this.allMCPs.filter(m => m.source === 'npm').length,
pypi: this.allMCPs.filter(m => m.source === 'pypi').length,
github: this.allMCPs.filter(m => m.source === 'github').length,
registry: this.allMCPs.filter(m => m.source === 'registry').length
},
byRuntime: {
node: this.allMCPs.filter(m => m.metadata.runtime === 'node').length,
python: this.allMCPs.filter(m => m.metadata.runtime === 'python').length,
unknown: this.allMCPs.filter(m => m.metadata.runtime === 'unknown').length
},
topKeywords: this.getTopKeywords(),
mostStarred: this.allMCPs
.filter(m => m.metadata.stars)
.sort((a, b) => (b.metadata.stars || 0) - (a.metadata.stars || 0))
.slice(0, 10)
.map(m => ({ name: m.name, stars: m.metadata.stars }))
};
}
/**
* Get most common keywords
*/
getTopKeywords() {
const keywordCount = new Map();
for (const mcp of this.allMCPs) {
for (const keyword of mcp.keywords) {
keywordCount.set(keyword, (keywordCount.get(keyword) || 0) + 1);
}
}
return Array.from(keywordCount.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 20)
.map(([keyword, count]) => ({ keyword, count }));
}
}
// Run crawler if executed directly
if (import.meta.url === `file://${__filename}`) {
const crawler = new MCPMegaCrawler();
crawler.crawlAll()
.then(() => {
console.log('\nš Crawl Statistics:');
console.log(JSON.stringify(crawler.getStatistics(), null, 2));
})
.catch(console.error);
}