UNPKG

remcode

Version:

Turn your AI assistant into a codebase expert. Intelligent code analysis, semantic search, and software engineering guidance through MCP integration.

611 lines (610 loc) 24.5 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.SimilarityAnalyzer = exports.CodePattern = void 0; const logger_1 = require("../utils/logger"); const semantic_1 = require("./semantic"); const jsSimilarity = __importStar(require("string-similarity")); const path = __importStar(require("path")); const fs = __importStar(require("fs")); const logger = (0, logger_1.getLogger)('SimilarityAnalyzer'); /** * Known code patterns that can be detected */ var CodePattern; (function (CodePattern) { CodePattern["ERROR_HANDLING"] = "error-handling"; CodePattern["ASYNC_AWAIT"] = "async-await"; CodePattern["CLASS_BASED"] = "class-based"; CodePattern["FUNCTIONAL"] = "functional"; CodePattern["SINGLETON"] = "singleton"; CodePattern["FACTORY"] = "factory"; CodePattern["OBSERVER"] = "observer"; CodePattern["MVC"] = "mvc"; CodePattern["API_CLIENT"] = "api-client"; CodePattern["DATA_TRANSFORMATION"] = "data-transformation"; CodePattern["STATE_MANAGEMENT"] = "state-management"; })(CodePattern || (exports.CodePattern = CodePattern = {})); /** * Analyzes code for similarity and pattern detection */ class SimilarityAnalyzer { /** * Creates a new SimilarityAnalyzer */ constructor(options = {}) { this.semanticSearch = null; this.embeddingManager = null; this.initialized = false; // Pattern definitions - each with regex patterns and signatures this.patterns = { [CodePattern.ERROR_HANDLING]: { regex: [ /try\s*{[\s\S]*?}\s*catch\s*\([^)]*\)\s*{[\s\S]*?}/g, /throw\s+new\s+\w+\(/g, /\.catch\s*\([^)]*\)/g ], signatures: [ 'try/catch blocks', 'throw new Error', 'promise.catch' ], description: 'Error handling patterns including try/catch blocks and promise rejection handling' }, [CodePattern.ASYNC_AWAIT]: { regex: [ /async\s+function/g, /async\s+\([^)]*\)\s*=>/g, /await\s+/g ], signatures: [ 'async function', 'await operator' ], description: 'Asynchronous code patterns using async/await syntax' }, [CodePattern.CLASS_BASED]: { regex: [ /class\s+\w+/g, /extends\s+\w+/g, /constructor\s*\(/g, /this\./g ], signatures: [ 'class declaration', 'extends keyword', 'constructor method' ], description: 'Object-oriented patterns using class syntax' }, [CodePattern.FUNCTIONAL]: { regex: [ /\w+\s*=>\s*{/g, /\w+\.map\s*\(/g, /\w+\.filter\s*\(/g, /\w+\.reduce\s*\(/g ], signatures: [ 'arrow functions', 'map/filter/reduce' ], description: 'Functional programming patterns using higher-order functions' }, [CodePattern.SINGLETON]: { regex: [ /static\s+getInstance\s*\(/g, /private\s+static\s+instance/g, /if\s*\(\s*!\s*instance\s*\)/g ], signatures: [ 'getInstance method', 'private static instance', 'instance check' ], description: 'Singleton pattern implementation ensuring a single instance of a class' }, [CodePattern.FACTORY]: { regex: [ /\w+\.create\w+\s*\(/g, /factory\s*=\s*new\s+\w+/gi, /new\s+\$\w+\(/g ], signatures: [ 'create methods', 'factory class' ], description: 'Factory pattern for object creation' }, [CodePattern.OBSERVER]: { regex: [ /addEventListener\s*\(/g, /removeEventListener\s*\(/g, /\.subscribe\s*\(/g, /\.unsubscribe\s*\(/g, /\.on\s*\(['"]\w+['"]\s*,/g ], signatures: [ 'event listeners', 'subscribe/unsubscribe', 'on/emit methods' ], description: 'Observer pattern with event listening and notification' }, [CodePattern.MVC]: { regex: [ /class\s+\w+Controller/g, /class\s+\w+View/g, /class\s+\w+Model/g, /render\s*\(\s*\)/g ], signatures: [ 'Controller class', 'View class', 'Model class' ], description: 'Model-View-Controller architectural pattern' }, [CodePattern.API_CLIENT]: { regex: [ /axios\s*\.\s*(get|post|put|delete)\s*\(/g, /fetch\s*\(/g, /\.then\s*\(/g, /headers\s*:\s*{/g ], signatures: [ 'HTTP requests', 'API endpoints', 'request/response handling' ], description: 'API client patterns for making HTTP requests' }, [CodePattern.DATA_TRANSFORMATION]: { regex: [ /\.map\s*\(\s*\w+\s*=>/g, /JSON\.parse\s*\(/g, /JSON\.stringify\s*\(/g ], signatures: [ 'data mapping', 'JSON parsing/serialization' ], description: 'Data transformation and processing patterns' }, [CodePattern.STATE_MANAGEMENT]: { regex: [ /useState\s*\(/g, /useReducer\s*\(/g, /createStore\s*\(/g, /new\s+\w+Store\s*\(/g ], signatures: [ 'state hooks', 'reducers', 'store creation' ], description: 'State management patterns for tracking application state' } }; this.options = { minSimilarity: options.minSimilarity || 0.7, enableSemanticSearch: options.enableSemanticSearch !== false, enableSyntaxAnalysis: options.enableSyntaxAnalysis !== false, enablePatternDetection: options.enablePatternDetection !== false, ...options }; this.semanticSearch = options.semanticSearch || null; this.embeddingManager = options.embeddingManager || null; } /** * Initialize the analyzer if needed */ async ensureInitialized() { if (this.initialized) return; // If semantic search is enabled but not provided, create it if (this.options.enableSemanticSearch && !this.semanticSearch) { this.semanticSearch = new semantic_1.SemanticSearch({ pineconeApiKey: process.env.PINECONE_API_KEY, pineconeIndexName: 'remcode-default', pineconeEnvironment: process.env.PINECONE_ENVIRONMENT || 'gcp-starter', pineconeNamespace: 'default', huggingfaceToken: process.env.HUGGINGFACE_TOKEN, embeddingModel: 'microsoft/graphcodebert-base', fallbackModel: 'sentence-transformers/all-MiniLM-L6-v2' }); await this.semanticSearch.initialize(); } this.initialized = true; } /** * Find code patterns similar to the provided code snippet * @param codeSnippet The code snippet to analyze * @param threshold Minimum similarity threshold (0-1) * @returns Similarity analysis result */ async findSimilarPatterns(codeSnippet, threshold = 0.8) { logger.info(`Analyzing code similarity patterns with threshold ${threshold}`); await this.ensureInitialized(); // Detect what type of code it is (function, class, etc.) const patternType = this.detectPatternType(codeSnippet); // Detect what design patterns are used const detectedPatterns = this.detectPatterns(codeSnippet); let similarCode = []; // If semantic search is enabled and available, find similar code if (this.options.enableSemanticSearch && this.semanticSearch) { try { similarCode = await this.semanticSearch.searchSimilarCode(codeSnippet, 5); // Filter results below threshold similarCode = similarCode.filter(result => result.score >= threshold); } catch (error) { logger.error(`Error during semantic search: ${error instanceof Error ? error.message : String(error)}`); } } // Generate reasons for similarity based on detected patterns const similarityReasons = this.generateSimilarityReasons(codeSnippet, detectedPatterns); // Determine overall confidence based on pattern detection and semantic results const confidence = this.calculateOverallConfidence(detectedPatterns, similarCode); return { targetCode: codeSnippet, similarCode, similarityReasons, patternType, patternName: detectedPatterns.length > 0 ? detectedPatterns[0] : undefined, confidence }; } /** * Calculate overall confidence score */ calculateOverallConfidence(detectedPatterns, similarCode) { let confidence = 0.5; // Base confidence // Add confidence based on pattern detection if (detectedPatterns.length > 0) { confidence += Math.min(0.3, detectedPatterns.length * 0.1); } // Add confidence based on semantic search results if (similarCode.length > 0) { const avgScore = similarCode.reduce((sum, result) => sum + result.score, 0) / similarCode.length; confidence += avgScore * 0.2; } return Math.min(1, confidence); } /** * Compare the similarity between two code snippets * @param code1 First code snippet * @param code2 Second code snippet * @returns Similarity score (0-1) */ async compareCodeSimilarity(code1, code2) { logger.info('Comparing code similarity'); // Use multiple similarity metrics and combine them for better accuracy // 1. String-based similarity (Levenshtein distance) const stringSimilarity = jsSimilarity.compareTwoStrings(this.normalizeCode(code1), this.normalizeCode(code2)); // 2. Token-based similarity (if syntax analysis is enabled) let tokenSimilarity = 0; if (this.options.enableSyntaxAnalysis) { const tokens1 = this.extractTokens(code1); const tokens2 = this.extractTokens(code2); tokenSimilarity = this.calculateTokenSimilarity(tokens1, tokens2); } // 3. Pattern-based similarity let patternSimilarity = 0; if (this.options.enablePatternDetection) { const patterns1 = this.detectPatterns(code1); const patterns2 = this.detectPatterns(code2); patternSimilarity = this.calculatePatternSimilarity(patterns1, patterns2); } // 4. Semantic similarity (if available) let semanticSimilarity = 0; if (this.embeddingManager) { try { const chunks = [ { content: code1, metadata: { file_path: 'temp1', strategy: 'comparison', chunk_type: 'snippet' } }, { content: code2, metadata: { file_path: 'temp2', strategy: 'comparison', chunk_type: 'snippet' } } ]; const embeddings = await this.embeddingManager.embedChunks(chunks); if (embeddings[0].embedding && embeddings[1].embedding) { semanticSimilarity = this.cosineSimilarity(embeddings[0].embedding, embeddings[1].embedding); } } catch (error) { logger.error(`Error calculating semantic similarity: ${error instanceof Error ? error.message : String(error)}`); } } // Combine similarities with weights let combinedSimilarity; if (semanticSimilarity > 0) { // If we have semantic similarity, give it more weight combinedSimilarity = ((stringSimilarity * 0.2) + (tokenSimilarity * 0.2) + (patternSimilarity * 0.2) + (semanticSimilarity * 0.4)); } else { // Without semantic, adjust weights of other metrics combinedSimilarity = ((stringSimilarity * 0.4) + (tokenSimilarity * 0.3) + (patternSimilarity * 0.3)); } return Math.max(0, Math.min(1, combinedSimilarity)); } /** * Identify code patterns in a file or code content * @param filePathOrContent Path to the file to analyze or code content directly * @param isContent Whether the first parameter is content (true) or file path (false) * @returns Array of detected pattern names */ async identifyCodePatterns(filePathOrContent, isContent = false) { if (isContent) { logger.info(`Identifying patterns in provided code content`); return this.detectPatterns(filePathOrContent); } logger.info(`Identifying patterns in ${filePathOrContent}`); if (!fs.existsSync(filePathOrContent)) { logger.warn(`File not found: ${filePathOrContent}, returning empty patterns`); return []; } try { const fileContent = fs.readFileSync(filePathOrContent, 'utf-8'); const extension = path.extname(filePathOrContent).toLowerCase(); // Skip non-code files if (!['.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.cs', '.go', '.rb'].includes(extension)) { logger.warn(`Skipping non-code file: ${filePathOrContent}`); return []; } return this.detectPatterns(fileContent); } catch (error) { logger.error(`Error identifying patterns in ${filePathOrContent}: ${error instanceof Error ? error.message : String(error)}`); return []; } } /** * Find design patterns in a repository * @param repoPath Path to the repository * @returns Map of file paths to detected patterns */ async analyzeRepositoryPatterns(repoPath) { logger.info(`Analyzing design patterns in repository: ${repoPath}`); const results = new Map(); if (!fs.existsSync(repoPath)) { throw new Error(`Repository path not found: ${repoPath}`); } try { // Find code files in the repository const codeFiles = this.findCodeFiles(repoPath); // Analyze each file for (const file of codeFiles) { const patterns = await this.identifyCodePatterns(file); if (patterns.length > 0) { results.set(file, patterns); } } return results; } catch (error) { logger.error(`Error analyzing repository patterns: ${error instanceof Error ? error.message : String(error)}`); return new Map(); } } /** * Detect patterns in code * @param code Code to analyze * @returns Array of detected pattern names */ detectPatterns(code) { const detectedPatterns = []; if (!this.options.enablePatternDetection) { return detectedPatterns; } // Check each pattern against the code for (const [patternName, patternDef] of Object.entries(this.patterns)) { let matchCount = 0; // Check all regex patterns for this code pattern for (const regex of patternDef.regex) { const matches = code.match(regex); if (matches && matches.length > 0) { matchCount += matches.length; } } // If enough matches are found, consider the pattern detected if (matchCount >= 2) { detectedPatterns.push(patternName); } } return detectedPatterns; } /** * Detect the type of code pattern * @param code Code to analyze * @returns Pattern type */ detectPatternType(code) { // Check for class pattern if (code.match(/class\s+\w+/)) { return 'class'; } // Check for function pattern if (code.match(/function\s+\w+\s*\(/) || code.match(/const\s+\w+\s*=\s*\(\s*\)\s*=>/)) { return 'function'; } // Check if it might be a module if (code.match(/export\s+/) || code.match(/import\s+/) || code.match(/require\s*\(/)) { return 'module'; } // Default to generic pattern return 'pattern'; } /** * Generate reasons why code is similar based on detected patterns * @param code Code to analyze * @param detectedPatterns Array of detected patterns * @returns Array of reasons */ generateSimilarityReasons(code, detectedPatterns) { const reasons = []; // Add reasons based on pattern type const patternType = this.detectPatternType(code); reasons.push(`Similar ${patternType} structure`); // Add reasons based on detected design patterns for (const pattern of detectedPatterns) { const patternDef = this.patterns[pattern]; if (patternDef) { // Find which specific signatures are present for (let i = 0; i < patternDef.regex.length; i++) { if (code.match(patternDef.regex[i]) && patternDef.signatures[i]) { reasons.push(`Uses ${patternDef.signatures[i]}`); } } } } // Check for specific code elements if (code.match(/try\s*{/)) reasons.push('Contains error handling'); if (code.match(/async\s+/)) reasons.push('Uses asynchronous patterns'); if (code.match(/for\s*\(/)) reasons.push('Contains loop structures'); if (code.match(/if\s*\(/)) reasons.push('Has conditional logic'); return reasons; } /** * Extract tokens from code */ extractTokens(code) { // Simple tokenization - split by whitespace and symbols return this.normalizeCode(code) .split(/[\s\n\r;{}()\[\]\.,=:+\-*\/%<>!&|^~]+/) .filter(token => token.length > 0); } /** * Calculate similarity between token sets */ calculateTokenSimilarity(tokens1, tokens2) { if (tokens1.length === 0 || tokens2.length === 0) { return 0; } // Count tokens that appear in both sets const set1 = new Set(tokens1); const set2 = new Set(tokens2); let commonCount = 0; for (const token of set1) { if (set2.has(token)) { commonCount++; } } // Jaccard similarity: size of intersection / size of union return commonCount / (set1.size + set2.size - commonCount); } /** * Calculate similarity between pattern sets */ calculatePatternSimilarity(patterns1, patterns2) { if (patterns1.length === 0 || patterns2.length === 0) { return 0; } // Count patterns that appear in both sets const set1 = new Set(patterns1); const set2 = new Set(patterns2); let commonCount = 0; for (const pattern of set1) { if (set2.has(pattern)) { commonCount++; } } // Jaccard similarity: size of intersection / size of union return commonCount / (set1.size + set2.size - commonCount); } /** * Normalize code for better comparison */ normalizeCode(code) { return code .replace(/\/\/.*$/gm, '') // Remove single-line comments .replace(/\/\*[\s\S]*?\*\//g, '') // Remove multi-line comments .replace(/".*?"/g, '""') // Normalize string literals .replace(/'.*?'/g, "''") // Normalize string literals .replace(/\s+/g, ' ') // Normalize whitespace .trim(); } /** * Find all code files in a directory recursively */ findCodeFiles(dir) { const files = []; const codeExtensions = new Set([ '.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.cs', '.go', '.rb' ]); const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); // Skip node_modules, .git, and other common non-source directories if (entry.isDirectory()) { if (!['node_modules', '.git', 'dist', 'build', 'out'].includes(entry.name)) { files.push(...this.findCodeFiles(fullPath)); } } else if (codeExtensions.has(path.extname(entry.name).toLowerCase())) { files.push(fullPath); } } return files; } /** * Calculate cosine similarity between two vectors */ cosineSimilarity(vector1, vector2) { if (vector1.length !== vector2.length) { throw new Error('Vectors must be of equal length'); } let dotProduct = 0; let magnitude1 = 0; let magnitude2 = 0; for (let i = 0; i < vector1.length; i++) { dotProduct += vector1[i] * vector2[i]; magnitude1 += vector1[i] * vector1[i]; magnitude2 += vector2[i] * vector2[i]; } magnitude1 = Math.sqrt(magnitude1); magnitude2 = Math.sqrt(magnitude2); if (magnitude1 === 0 || magnitude2 === 0) { return 0; } return dotProduct / (magnitude1 * magnitude2); } } exports.SimilarityAnalyzer = SimilarityAnalyzer;