UNPKG

@wildcard-ai/deepcodex

Version:

Advanced codebase indexing and semantic search MCP server

261 lines 11.1 kB
/** * LanguageDetector - Detect programming language from file extension and content */ import * as path from 'path'; export class LanguageDetector { EXTENSION_MAP = new Map([ // TypeScript ['.ts', { language: 'typescript', confidence: 1.0 }], ['.tsx', { language: 'typescript', confidence: 1.0 }], ['.d.ts', { language: 'typescript', confidence: 1.0 }], // JavaScript ['.js', { language: 'javascript', confidence: 1.0 }], ['.jsx', { language: 'javascript', confidence: 1.0 }], ['.mjs', { language: 'javascript', confidence: 1.0 }], ['.cjs', { language: 'javascript', confidence: 1.0 }], // Python ['.py', { language: 'python', confidence: 1.0 }], ['.pyx', { language: 'python', confidence: 0.9 }], ['.pyi', { language: 'python', confidence: 1.0 }], ['.pyw', { language: 'python', confidence: 1.0 }], // Java ['.java', { language: 'java', confidence: 1.0 }], // C/C++ ['.c', { language: 'c', confidence: 1.0 }], ['.h', { language: 'c', confidence: 0.8 }], // Could be C++ ['.cpp', { language: 'cpp', confidence: 1.0 }], ['.cxx', { language: 'cpp', confidence: 1.0 }], ['.cc', { language: 'cpp', confidence: 1.0 }], ['.hpp', { language: 'cpp', confidence: 1.0 }], ['.hxx', { language: 'cpp', confidence: 1.0 }], // Go ['.go', { language: 'go', confidence: 1.0 }], // Rust ['.rs', { language: 'rust', confidence: 1.0 }], // C# ['.cs', { language: 'csharp', confidence: 1.0 }], // PHP ['.php', { language: 'php', confidence: 1.0 }], // Ruby ['.rb', { language: 'ruby', confidence: 1.0 }], // Swift ['.swift', { language: 'swift', confidence: 1.0 }], // Kotlin ['.kt', { language: 'kotlin', confidence: 1.0 }], ['.kts', { language: 'kotlin', confidence: 1.0 }], // Shell ['.sh', { language: 'shell', confidence: 1.0 }], ['.bash', { language: 'shell', confidence: 1.0 }], ['.zsh', { language: 'shell', confidence: 1.0 }], // SQL ['.sql', { language: 'sql', confidence: 1.0 }], // YAML ['.yml', { language: 'yaml', confidence: 1.0 }], ['.yaml', { language: 'yaml', confidence: 1.0 }], // JSON ['.json', { language: 'json', confidence: 1.0 }], // Markdown ['.md', { language: 'markdown', confidence: 1.0 }], ['.markdown', { language: 'markdown', confidence: 1.0 }], // XML/HTML ['.xml', { language: 'xml', confidence: 1.0 }], ['.html', { language: 'html', confidence: 1.0 }], ['.htm', { language: 'html', confidence: 1.0 }], // CSS ['.css', { language: 'css', confidence: 1.0 }], ['.scss', { language: 'scss', confidence: 1.0 }], ['.sass', { language: 'sass', confidence: 1.0 }], ['.less', { language: 'less', confidence: 1.0 }] ]); FILE_TYPE_PATTERNS = [ // Test files { pattern: /\.(test|spec|tests)\.[jt]sx?$/, type: 'test' }, { pattern: /test_.*\.py$/, type: 'test' }, { pattern: /.*_test\.py$/, type: 'test' }, { pattern: /.*_test\.go$/, type: 'test' }, { pattern: /.*Test\.java$/, type: 'test' }, { pattern: /.*Tests\.cs$/, type: 'test' }, // Config files { pattern: /^(webpack|rollup|vite|babel|eslint|prettier)\.config\.[jt]s$/, type: 'config' }, { pattern: /^(tsconfig|jsconfig)\.json$/, type: 'config' }, { pattern: /^package\.json$/, type: 'config' }, { pattern: /^Cargo\.toml$/, type: 'config' }, { pattern: /^pom\.xml$/, type: 'config' }, { pattern: /^requirements\.txt$/, type: 'config' }, { pattern: /^Pipfile$/, type: 'config' }, { pattern: /^poetry\.lock$/, type: 'config' }, { pattern: /^go\.mod$/, type: 'config' }, { pattern: /\.config\.[jt]s$/, type: 'config' }, // Documentation { pattern: /\.(md|markdown|rst|txt)$/i, type: 'documentation' }, { pattern: /^README/i, type: 'documentation' }, { pattern: /^CHANGELOG/i, type: 'documentation' }, { pattern: /^LICENSE/i, type: 'documentation' }, // Data files { pattern: /\.(json|yaml|yml|xml|csv|toml)$/i, type: 'data' } ]; CONTENT_HINTS = [ // JavaScript/TypeScript hints { pattern: /import\s+.*\s+from\s+['"]/, language: 'typescript', boost: 0.2 }, { pattern: /export\s+(default\s+)?(class|function|const|interface)/, language: 'typescript', boost: 0.3 }, { pattern: /interface\s+\w+/, language: 'typescript', boost: 0.4 }, { pattern: /type\s+\w+\s*=/, language: 'typescript', boost: 0.3 }, { pattern: /<\w+.*>.*<\/\w+>/, language: 'typescript', boost: 0.2 }, // JSX // Python hints { pattern: /^def\s+\w+\s*\(/, language: 'python', boost: 0.3 }, { pattern: /^class\s+\w+\s*\(?\s*\w*\s*\)?:/, language: 'python', boost: 0.4 }, { pattern: /import\s+\w+/, language: 'python', boost: 0.2 }, { pattern: /from\s+\w+\s+import/, language: 'python', boost: 0.3 }, // Java hints { pattern: /public\s+(class|interface|enum)\s+\w+/, language: 'java', boost: 0.4 }, { pattern: /package\s+[\w.]+;/, language: 'java', boost: 0.3 }, { pattern: /@\w+/, language: 'java', boost: 0.2 }, // C/C++ hints { pattern: /#include\s*<.*>/, language: 'cpp', boost: 0.3 }, { pattern: /namespace\s+\w+/, language: 'cpp', boost: 0.4 }, { pattern: /class\s+\w+\s*{/, language: 'cpp', boost: 0.3 }, // Go hints { pattern: /package\s+\w+/, language: 'go', boost: 0.3 }, { pattern: /func\s+\w+\s*\(/, language: 'go', boost: 0.3 }, { pattern: /import\s*\([\s\S]*?\)/, language: 'go', boost: 0.2 }, // Rust hints { pattern: /fn\s+\w+\s*\(/, language: 'rust', boost: 0.3 }, { pattern: /use\s+\w+(::\w+)*;/, language: 'rust', boost: 0.3 }, { pattern: /struct\s+\w+\s*{/, language: 'rust', boost: 0.3 } ]; /** * Detect language from file path and content */ detectLanguage(filePath, content) { const fileName = path.basename(filePath); const extension = path.extname(filePath).toLowerCase(); // Primary detection based on extension const extensionMatch = this.EXTENSION_MAP.get(extension); let language = extensionMatch?.language || 'text'; let confidence = extensionMatch?.confidence || 0.1; // Enhance with content analysis if available if (content && content.length > 0) { const contentAnalysis = this.analyzeContent(content); if (contentAnalysis.language && contentAnalysis.confidence > confidence) { language = contentAnalysis.language; confidence = Math.min(contentAnalysis.confidence, 0.9); // Cap at 0.9 for content-based } else if (contentAnalysis.language === language) { // Boost confidence if content matches extension confidence = Math.min(confidence + contentAnalysis.confidence * 0.3, 1.0); } } // Determine file type const fileType = this.determineFileType(fileName, filePath); return { language, confidence, fileType }; } /** * Analyze content for language hints */ analyzeContent(content) { const languageScores = new Map(); const lines = content.split('\n').slice(0, 50); // Analyze first 50 lines const searchContent = lines.join('\n'); // Apply content hints for (const hint of this.CONTENT_HINTS) { if (hint.pattern.test(searchContent)) { const currentScore = languageScores.get(hint.language) || 0; languageScores.set(hint.language, currentScore + hint.boost); } } if (languageScores.size === 0) { return { language: null, confidence: 0 }; } // Find language with highest score let bestLanguage = ''; let bestScore = 0; for (const [language, score] of languageScores.entries()) { if (score > bestScore) { bestLanguage = language; bestScore = score; } } return { language: bestLanguage, confidence: Math.min(bestScore, 0.8) // Cap content-based confidence }; } /** * Determine file type (source, test, config, etc.) */ determineFileType(fileName, filePath) { const normalizedName = fileName.toLowerCase(); const normalizedPath = filePath.toLowerCase(); // Check against patterns for (const pattern of this.FILE_TYPE_PATTERNS) { if (pattern.pattern.test(normalizedName) || pattern.pattern.test(normalizedPath)) { return pattern.type; } } // Check directory context if (normalizedPath.includes('/test/') || normalizedPath.includes('/__tests__/') || normalizedPath.includes('/tests/') || normalizedPath.includes('/spec/')) { return 'test'; } if (normalizedPath.includes('/config/') || normalizedPath.includes('/configs/') || normalizedPath.includes('/.config/')) { return 'config'; } if (normalizedPath.includes('/docs/') || normalizedPath.includes('/doc/') || normalizedPath.includes('/documentation/')) { return 'documentation'; } return 'source'; // Default to source } /** * Get all supported languages */ getSupportedLanguages() { const languages = new Set(); for (const info of this.EXTENSION_MAP.values()) { languages.add(info.language); } return Array.from(languages).sort(); } /** * Check if a language is supported for advanced processing */ isLanguageSupported(language) { // Languages with good AST parsing support const supportedLanguages = new Set([ 'typescript', 'javascript', 'python', 'java', 'cpp', 'c', 'go', 'rust', 'csharp' ]); return supportedLanguages.has(language.toLowerCase()); } /** * Get file extension for a language */ getPrimaryExtension(language) { const extensionMap = new Map([ ['typescript', '.ts'], ['javascript', '.js'], ['python', '.py'], ['java', '.java'], ['cpp', '.cpp'], ['c', '.c'], ['go', '.go'], ['rust', '.rs'], ['csharp', '.cs'], ['php', '.php'], ['ruby', '.rb'], ['swift', '.swift'], ['kotlin', '.kt'] ]); return extensionMap.get(language.toLowerCase()) || null; } } //# sourceMappingURL=LanguageDetector.js.map