UNPKG

mushcode-mcp-server

Version:

A specialized Model Context Protocol server for MUSHCODE development assistance. Provides AI-powered code generation, validation, optimization, and examples for MUD development.

447 lines 17.4 kB
/** * GitHub repository scraper for MUSHCODE content * Scrapes MUSHCODE files from GitHub repositories to enhance the knowledge base */ import { logger } from '../utils/logger.js'; export class GitHubScraper { knowledgeBase; baseUrl = 'https://api.github.com'; token; constructor(knowledgeBase, token) { this.knowledgeBase = knowledgeBase; this.token = token; } /** * Get headers for GitHub API requests */ getHeaders() { const headers = { 'Accept': 'application/vnd.github.v3+json', 'User-Agent': 'MUSHCODE-MCP-Server/1.0.0' }; if (this.token) { headers['Authorization'] = `token ${this.token}`; } return headers; } /** * Scrape MUSHCODE content from GitHub repositories */ async scrapeRepositories(repos) { logger.info(`Starting GitHub scraping for ${repos.length} repositories`, { operation: 'github_scrape_start', repoCount: repos.length }); for (const repo of repos) { try { await this.scrapeRepository(repo); } catch (error) { logger.error(`Failed to scrape repository ${repo.owner}/${repo.name}`, error, { operation: 'github_scrape_repo_error', repo: `${repo.owner}/${repo.name}` }); } } logger.info('GitHub scraping completed', { operation: 'github_scrape_complete' }); } /** * Scrape a single GitHub repository */ async scrapeRepository(repo) { logger.info(`Scraping repository: ${repo.owner}/${repo.name}`, { operation: 'github_scrape_repo', repo: `${repo.owner}/${repo.name}` }); try { // Get repository contents const files = await this.getRepositoryFiles(repo); // Filter for MUSHCODE files const mushcodeFiles = files.filter(file => this.isMushcodeFile(file)); logger.info(`Found ${mushcodeFiles.length} MUSHCODE files in ${repo.owner}/${repo.name}`, { operation: 'github_files_found', fileCount: mushcodeFiles.length }); // Process each MUSHCODE file for (const file of mushcodeFiles) { try { await this.processFile(file, repo); } catch (error) { logger.warn(`Failed to process file ${file.path}`, { operation: 'github_file_process_error', file: file.path, error: error.message }); } } } catch (error) { logger.error(`Repository scraping failed for ${repo.owner}/${repo.name}`, error); throw error; } } /** * Get all files from a GitHub repository */ async getRepositoryFiles(repo) { const branch = repo.branch || 'main'; const url = `${this.baseUrl}/repos/${repo.owner}/${repo.name}/git/trees/${branch}?recursive=1`; try { const response = await fetch(url, { headers: this.getHeaders() }); if (!response.ok) { if (response.status === 429 || response.status === 403) { // Rate limited - wait and retry logger.warn('GitHub API rate limited, waiting 60 seconds before retry', { operation: 'github_rate_limit', repo: `${repo.owner}/${repo.name}` }); await new Promise(resolve => setTimeout(resolve, 60000)); return this.getRepositoryFiles(repo); } throw new Error(`GitHub API error: ${response.status} ${response.statusText}`); } const data = await response.json(); const files = []; // Get file contents for each file for (const item of data.tree) { if (item.type === 'blob' && this.isMushcodeFile({ name: item.path })) { try { const content = await this.getFileContent(repo, item.path); files.push({ name: item.path.split('/').pop() || item.path, path: item.path, content, size: item.size || 0, sha: item.sha }); } catch (error) { logger.warn(`Failed to get content for ${item.path}`, { operation: 'github_file_content_error', path: item.path }); } } } return files; } catch (error) { logger.error(`Failed to get repository files for ${repo.owner}/${repo.name}`, error); throw error; } } /** * Get content of a specific file from GitHub */ async getFileContent(repo, path) { const url = `${this.baseUrl}/repos/${repo.owner}/${repo.name}/contents/${path}`; // Add small delay between requests to avoid rate limiting await new Promise(resolve => setTimeout(resolve, 100)); const response = await fetch(url, { headers: this.getHeaders() }); if (!response.ok) { if (response.status === 429 || response.status === 403) { // Rate limited - wait and retry logger.warn(`Rate limited getting file ${path}, waiting 60 seconds`, { operation: 'github_file_rate_limit', path }); await new Promise(resolve => setTimeout(resolve, 60000)); return this.getFileContent(repo, path); } throw new Error(`Failed to get file content: ${response.status}`); } const data = await response.json(); // Decode base64 content if (data.encoding === 'base64') { return Buffer.from(data.content, 'base64').toString('utf-8'); } return data.content; } /** * Check if a file is a MUSHCODE file */ isMushcodeFile(file) { const mushcodeExtensions = ['.mush', '.mu', '.mushcode', '.softcode', '.txt']; const mushcodeKeywords = ['mushcode', 'softcode', 'mush', 'command', 'function', 'trigger']; const fileName = file.name.toLowerCase(); const filePath = (file.path || '').toLowerCase(); // Check file extension if (mushcodeExtensions.some(ext => fileName.endsWith(ext))) { return true; } // Check for MUSHCODE keywords in filename or path if (mushcodeKeywords.some(keyword => fileName.includes(keyword) || filePath.includes(keyword))) { return true; } return false; } /** * Process a MUSHCODE file and extract patterns/examples */ async processFile(file, repo) { logger.debug(`Processing file: ${file.path}`, { operation: 'github_process_file', file: file.path, size: file.size }); try { // Extract patterns and examples from the file content const patterns = this.extractPatterns(file, repo); const examples = this.extractExamples(file, repo); // Add to knowledge base for (const pattern of patterns) { this.knowledgeBase.addPattern(pattern); } for (const example of examples) { this.knowledgeBase.addExample(example); } logger.debug(`Extracted ${patterns.length} patterns and ${examples.length} examples from ${file.path}`, { operation: 'github_file_processed', patterns: patterns.length, examples: examples.length }); } catch (error) { logger.error(`Failed to process file ${file.path}`, error); throw error; } } /** * Extract MUSHCODE patterns from file content */ extractPatterns(file, repo) { const patterns = []; const content = file.content; // Look for function definitions const functionMatches = content.match(/^&[A-Z_][A-Z0-9_]*\s+[^=]+=(.+)$/gm); if (functionMatches) { for (const match of functionMatches) { const pattern = this.createPatternFromFunction(match, file, repo); if (pattern) { patterns.push(pattern); } } } // Look for command definitions const commandMatches = content.match(/^\$[^:]+:(.+)$/gm); if (commandMatches) { for (const match of commandMatches) { const pattern = this.createPatternFromCommand(match, file, repo); if (pattern) { patterns.push(pattern); } } } return patterns; } /** * Extract code examples from file content */ extractExamples(file, repo) { const examples = []; // Create a general example from the entire file if it's small enough if (file.content.length < 2000 && file.content.trim().length > 0) { const example = { id: `github-${repo.owner}-${repo.name}-${file.path.replace(/[^a-zA-Z0-9]/g, '-')}`, title: `${file.name} from ${repo.owner}/${repo.name}`, description: `MUSHCODE example from ${file.path} in ${repo.owner}/${repo.name}`, category: this.inferCategory(file.content), difficulty: this.inferDifficulty(file.content), serverCompatibility: ['PennMUSH', 'TinyMUSH', 'RhostMUSH', 'TinyMUX', 'MUX'], code: file.content.trim(), tags: this.extractTags(file.content, file.path), explanation: `This code example was extracted from ${file.path} in the ${repo.owner}/${repo.name} repository.`, relatedConcepts: this.extractConcepts(file.content), learningObjectives: [ 'Understand real-world MUSHCODE implementation', 'Learn from community examples', 'Practice with tested code patterns' ] }; examples.push(example); } return examples; } /** * Create a pattern from a function definition */ createPatternFromFunction(functionCode, file, repo) { const match = functionCode.match(/^&([A-Z_][A-Z0-9_]*)\s+([^=]+)=(.+)$/); if (!match) return null; const [, functionName, , code] = match; if (!functionName || !code) return null; return { id: `github-func-${repo.owner}-${repo.name}-${functionName.toLowerCase()}`, name: `${functionName} Function`, description: `Function ${functionName} from ${repo.owner}/${repo.name}`, category: 'function', difficulty: this.inferDifficulty(code), serverCompatibility: ['PennMUSH', 'TinyMUSH', 'RhostMUSH', 'TinyMUX', 'MUX'], codeTemplate: functionCode, parameters: this.extractParameters(code), tags: this.extractTags(code, file.path), relatedPatterns: [], examples: [functionCode], securityLevel: this.inferSecurityLevel(code), createdAt: new Date(), updatedAt: new Date() }; } /** * Create a pattern from a command definition */ createPatternFromCommand(commandCode, file, repo) { const match = commandCode.match(/^\$([^:]+):(.+)$/); if (!match) return null; const [, trigger, code] = match; if (!trigger || !code) return null; return { id: `github-cmd-${repo.owner}-${repo.name}-${trigger.replace(/[^a-zA-Z0-9]/g, '-')}`, name: `${trigger} Command`, description: `Command trigger "${trigger}" from ${repo.owner}/${repo.name}`, category: 'command', difficulty: this.inferDifficulty(code), serverCompatibility: ['PennMUSH', 'TinyMUSH', 'RhostMUSH', 'TinyMUX', 'MUX'], codeTemplate: commandCode, parameters: this.extractParameters(code), tags: this.extractTags(code, file.path), relatedPatterns: [], examples: [commandCode], securityLevel: this.inferSecurityLevel(code), createdAt: new Date(), updatedAt: new Date() }; } /** * Infer the category of code based on content */ inferCategory(content) { const lowerContent = content.toLowerCase(); if (lowerContent.includes('$') && lowerContent.includes(':')) return 'command'; if (lowerContent.includes('&') && lowerContent.includes('=')) return 'function'; if (lowerContent.includes('@trigger') || lowerContent.includes('@listen')) return 'trigger'; if (lowerContent.includes('@create') || lowerContent.includes('@dig')) return 'creation'; if (lowerContent.includes('switch(') || lowerContent.includes('if(')) return 'conditional'; return 'utility'; } /** * Infer difficulty based on code complexity */ inferDifficulty(content) { const complexity = this.calculateComplexity(content); if (complexity < 3) return 'beginner'; if (complexity < 7) return 'intermediate'; return 'advanced'; } /** * Calculate code complexity score */ calculateComplexity(content) { let score = 0; // Count nested structures score += (content.match(/\[/g) || []).length * 0.5; score += (content.match(/switch\(/g) || []).length * 2; score += (content.match(/if\(/g) || []).length * 1; score += (content.match(/iter\(/g) || []).length * 2; score += (content.match(/setq\(/g) || []).length * 1; return Math.min(score, 10); } /** * Infer security level based on code content */ inferSecurityLevel(content) { const lowerContent = content.toLowerCase(); if (lowerContent.includes('@shutdown') || lowerContent.includes('@restart')) return 'god'; if (lowerContent.includes('@force') || lowerContent.includes('@tel')) return 'wizard'; if (lowerContent.includes('@create') || lowerContent.includes('@dig')) return 'builder'; return 'public'; } /** * Extract parameters from code */ extractParameters(content) { const params = []; // Look for %0, %1, etc. const paramMatches = content.match(/%[0-9]/g); if (paramMatches) { const uniqueParams = [...new Set(paramMatches)]; for (const param of uniqueParams) { params.push({ name: param, type: 'string', description: `Parameter ${param}`, required: true }); } } return params; } /** * Extract tags from content and file path */ extractTags(content, filePath) { const tags = new Set(); // Add tags based on file path const pathParts = filePath.toLowerCase().split('/'); for (const part of pathParts) { if (part.length > 2 && part !== 'mushcode') { tags.add(part.replace(/[^a-zA-Z0-9]/g, '')); } } // Add tags based on content const lowerContent = content.toLowerCase(); if (lowerContent.includes('switch(')) tags.add('switch'); if (lowerContent.includes('iter(')) tags.add('iteration'); if (lowerContent.includes('setq(')) tags.add('variables'); if (lowerContent.includes('@create')) tags.add('creation'); if (lowerContent.includes('@force')) tags.add('admin'); return Array.from(tags).slice(0, 10); // Limit to 10 tags } /** * Extract related concepts from content */ extractConcepts(content) { const concepts = new Set(); const lowerContent = content.toLowerCase(); if (lowerContent.includes('switch(')) concepts.add('conditional-logic'); if (lowerContent.includes('iter(')) concepts.add('iteration'); if (lowerContent.includes('setq(')) concepts.add('variable-management'); if (lowerContent.includes('@create')) concepts.add('object-creation'); if (lowerContent.includes('pemit')) concepts.add('messaging'); if (lowerContent.includes('lock')) concepts.add('permissions'); return Array.from(concepts); } } //# sourceMappingURL=github-scraper.js.map