UNPKG

@mkpro118/semantic-change-detector

Version:

Semantic change detection for TypeScript and TSX code with GitHub Actions integration

656 lines 26.4 kB
/** * @file This module contains the core logic for analyzing file changes. * It orchestrates fetching file content from git, generating diffs, creating semantic contexts, * and running the final analysis. */ import { spawnSync } from 'child_process'; import * as fs from 'fs'; import { minimatch } from 'minimatch'; import * as path from 'path'; import * as process from 'process'; // Added for process.cwd() import ts from 'typescript'; import { fileURLToPath } from 'url'; import { detectSemanticChanges } from './analyzers/index.js'; import { DEFAULT_CONFIG, shouldRequireTestsForChange, } from './types/config.js'; import { createSemanticContext } from './context/semantic-context-builder.js'; import { formatForGitHubActions } from './formatters/github-actions.js'; import { mapConcurrent } from './utils/concurrency.js'; import { logger } from './utils/logger.js'; /** * Checks if a file path is valid for the given git reference. * For working tree (.), checks filesystem. For git refs, checks git history. * This is more robust than regex-based validation for complex file paths. * @param filePath The file path to check * @param ref The git reference (e.g., commit SHA, branch name, or '.' for working tree) * @returns True if the file exists in the specified context */ function isValidFilePath(filePath, ref) { try { // For working tree, check filesystem if (ref === '.') { fs.statSync(filePath); return true; } // For git refs, use git cat-file -e to check if file exists in that revision const result = gitRunner(['cat-file', '-e', `${ref}:${filePath}`]); return result.status === 0; } catch { return false; } } /** * Analyzes the semantic changes between two versions of a single file. * @param filePath The path to the file to analyze. * @param baseRef The git ref for the base version of the file. * @param headRef The git ref for the head version of the file. * @param config The analyzer configuration. * @returns An array of detected semantic changes for the file. */ export async function analyzeFileChanges(filePath, baseRef, headRef, config) { const baseContent = getFileContent(filePath, baseRef); const headContent = getFileContent(filePath, headRef); if (!baseContent && !headContent) { return []; } if (!baseContent) { logger.verbose(` New file detected`); return analyzeNewFile(headContent, filePath); } if (!headContent) { logger.verbose(` File deleted`); return []; } const locatedChanges = await detectSemanticChanges({ baseFilePath: filePath, baseCode: baseContent, modifiedFilePath: filePath, modifiedCode: headContent, config: { sideEffectCallees: config.sideEffectCallees, testGlobs: config.testGlobs, bypassLabels: config.bypassLabels, }, }); // Convert internal LocatedSemanticChange[] to FileChange[] format const changes = locatedChanges.map((change) => ({ line: change.line, column: change.column, kind: change.kind, detail: change.detail, severity: change.severity, astNode: change.astNode, context: change.context, })); return changes; } /** The internal function used to execute git commands. Can be overridden for testing. */ let gitRunner = (args) => { const res = spawnSync('git', args, { encoding: 'utf8' }); const status = typeof res.status === 'number' ? res.status : 1; const stdout = typeof res.stdout === 'string' ? res.stdout : undefined; return { status, stdout }; }; /** * Overrides the internal git runner function. Used for testing purposes. * @param fn The function to use for running git commands. * @internal */ export function __setGitRunner(fn) { gitRunner = fn; } /** * Retrieves the content of a file from a specific git ref. * @param filePath The path to the file. * @param ref The git ref (e.g., commit SHA, branch name). * @returns The file content as a string, or null if it could not be retrieved. */ export function getFileContent(filePath, ref) { // Handle working tree (current filesystem) reference if (ref === '.') { if (!isValidFilePath(filePath, ref)) { return null; } try { return fs.readFileSync(filePath, 'utf8'); } catch { return null; } } const res = gitRunner(['show', `${ref}:${filePath}`]); if (res.status === 0 && typeof res.stdout === 'string') { return res.stdout; } return null; } /** * Performs a lightweight analysis of a new file to find some basic changes. * @param content The full content of the new file. * @param _filePath The path to the new file (currently unused). * @returns An array of high-level changes found in the new file. */ export function analyzeNewFile(content, filePath) { const changes = []; const sourceFile = ts.createSourceFile(filePath, content, ts.ScriptTarget.Latest, true); const context = createSemanticContext(sourceFile, []); const exportedNames = new Set(context.exports.map((e) => e.name)); for (const exp of context.exports) { changes.push({ line: exp.line, column: exp.column, kind: 'exportAdded', detail: `New export '${exp.name}' added`, severity: 'high', // New exports are high-severity astNode: exp.type, context: `Export type: ${exp.type}`, }); } for (const func of context.functions) { if (!exportedNames.has(func.name)) { // Only add if not already covered by exports changes.push({ line: func.line, column: func.column, kind: 'functionAdded', detail: `New function '${func.name}' added`, severity: 'medium', astNode: 'FunctionDeclaration', }); } } return changes; } /** * Generates an array of diff hunks representing the changes between two versions of a file. * It attempts to use `git diff` for precision, falling back to a single, full-file hunk. * @param baseContent The content of the base version. * @param headContent The content of the head version. * @param filePath The path to the file. * @param baseRef The git ref for the base version. * @param headRef The git ref for the head version. * @returns An array of diff hunks. */ export function generateDiffHunks(baseContent, headContent, filePath, baseRef, headRef) { try { // For working tree, validate file exists if (headRef === '.' && !isValidFilePath(filePath, headRef)) { // Fall through to fallback } else { const args = ['diff', '--unified=0', baseRef]; if (headRef !== '.') { args.push(headRef); } args.push('--', filePath); const res = gitRunner(args); if (res.status === 0 && typeof res.stdout === 'string') { const parsed = parseUnifiedDiff(res.stdout, filePath); if (parsed.length > 0) return parsed; } } } catch { // ignore and fall back } const baseLines = baseContent.split('\n'); const headLines = headContent.split('\n'); return [ { file: filePath, baseRange: { start: 1, end: baseLines.length }, headRange: { start: 1, end: headLines.length }, addedLines: [], removedLines: [], }, ]; } /** * Parses a unified diff patch string into an array of DiffHunk objects. * @param patch The raw string output from a `git diff` command. * @param filePath The path to the file the patch applies to. * @returns An array of structured diff hunks. */ export function parseUnifiedDiff(patch, filePath) { const lines = patch.split(/\r?\n/); const hunks = []; let i = 0; while (i < lines.length) { const line = lines[i] ?? ''; if (line.startsWith('@@')) { const m = /@@\s+-([0-9]+)(?:,([0-9]+))?\s+\+([0-9]+)(?:,([0-9]+))?\s+@@/.exec(line); if (!m) { i++; continue; } const baseStart = Number.parseInt(m[1] ?? '1', 10); const baseLen = m[2] ? Number.parseInt(m[2], 10) : 1; const headStart = Number.parseInt(m[3] ?? '1', 10); const headLen = m[4] ? Number.parseInt(m[4], 10) : 1; let baseLine = baseStart; let headLine = headStart; const added = []; const removed = []; i++; while (i < lines.length && !(lines[i] ?? '').startsWith('@@')) { const l = lines[i] ?? ''; if (l.startsWith('+')) { added.push({ lineNumber: headLine, content: l.slice(1) }); headLine++; } else if (l.startsWith('-')) { removed.push({ lineNumber: baseLine, content: l.slice(1) }); baseLine++; } else if (l.startsWith(' ')) { baseLine++; headLine++; } else if (l.startsWith('diff ')) { break; } i++; } hunks.push({ file: filePath, baseRange: { start: baseStart, end: baseStart + Math.max(baseLen - 1, 0) }, headRange: { start: headStart, end: headStart + Math.max(headLen - 1, 0) }, addedLines: added, removedLines: removed, }); continue; } i++; } return hunks; } /** * Checks if a file has any diffs between two git refs. * @param filePath The path to the file to check. * @param baseRef The git ref for the base version. * @param headRef The git ref for the head version. * @returns True if the file has diffs, false otherwise. */ export function hasDiffs(filePath, baseRef, headRef) { try { // For working tree, validate file exists if (headRef === '.' && !isValidFilePath(filePath, headRef)) { return false; } const args = ['diff', '--unified=0', baseRef]; if (headRef !== '.') { args.push(headRef); } args.push('--', filePath); const res = gitRunner(args); // Check for the presence of hunk markers if (res.status === 0 && typeof res.stdout === 'string' && res.stdout.includes('@@')) { return true; } } catch (error) { logger.debug(`Error checking for diffs in ${filePath}: ${error instanceof Error ? error.message : String(error)}`); return false; // If git fails, assume no diff to be safe and avoid processing } return false; } /** * The main function to run the semantic analysis process. * It coordinates file filtering, concurrent analysis, and result processing. * @param options The analysis options parsed from the command line. * @returns A promise that resolves with the final analysis result. */ export async function runSemanticAnalysis(options) { const effectiveConfig = loadConfig(options); // Load config here const startTime = Date.now(); const initialMemory = getMemoryUsage(); logger.verbose('Starting semantic change analysis...'); logger.verbose(`Analyzing ${options.files.length} files concurrently...`); logger.verbose(`Comparing ${options.baseRef} -> ${options.headRef}`); const allChanges = []; const failedFiles = []; let filesAnalyzed = 0; const filesToAnalyzeInitially = options.files.filter((file) => shouldAnalyzeFile(file, effectiveConfig)); logger.verbose(`Filtered down to ${filesToAnalyzeInitially.length} files based on include/exclude rules.`); // Early exit for files with no diffs const filesWithDiffs = []; for (const file of filesToAnalyzeInitially) { try { if (hasDiffs(file, options.baseRef, options.headRef)) { filesWithDiffs.push(file); } else { logger.verbose(`Skipping ${file} (no diffs)`); } } catch (error) { logger.debug(`Error checking diffs for ${file}: ${error instanceof Error ? error.message : String(error)}`); failedFiles.push({ filePath: file, error: `Diff check failed: ${error instanceof Error ? error.message : String(error)}`, }); } } logger.verbose(`Filtered down to ${filesWithDiffs.length} files that have diffs.`); const __dirname = path.dirname(fileURLToPath(import.meta.url)); const workerScript = path.resolve(__dirname, 'worker.js'); const tasks = filesWithDiffs.map((file) => ({ filePath: file, baseRef: options.baseRef, headRef: options.headRef, config: effectiveConfig, // Use effectiveConfig })); try { const results = await mapConcurrent(tasks, workerScript, undefined, // Use default concurrency options.timeoutMs); for (const result of results) { if (result.status === 'success' && result.changes) { logger.verbose(`Successfully analyzed ${result.filePath} - Found ${result.changes.length} changes`); allChanges.push(...result.changes.map((change) => ({ ...change, file: result.filePath }))); filesAnalyzed++; } else { logger.debug(`Error analyzing ${result.filePath}: ${result.error}`); failedFiles.push({ filePath: result.filePath, error: result.error || 'Unknown analysis error', }); } } } catch (error) { logger.debug(`A worker failed catastrophically: ${error instanceof Error ? error.message : String(error)}`); } const analysisTime = Date.now() - startTime; const finalMemory = getMemoryUsage(); logger.verbose(`Analysis completed in ${analysisTime}ms`); logger.verbose(`Total changes found: ${allChanges.length}`); const result = processAnalysisResults(allChanges, filesAnalyzed, failedFiles, { analysisTimeMs: analysisTime, memoryUsageMB: finalMemory - initialMemory, }, effectiveConfig); await outputResults(result, options); return result; } /** * Checks if a file should be analyzed based on the include/exclude globs in the config. * @param filePath The path to the file to check. * @param config The analyzer configuration. * @returns True if the file should be analyzed, false otherwise. */ function shouldAnalyzeFile(filePath, config) { const included = config.include.some((glob) => minimatch(filePath, glob, { dot: true })); if (!included) return false; const excluded = config.exclude.some((glob) => minimatch(filePath, glob, { dot: true })); if (excluded) return false; return true; } /** * Processes the raw list of changes to generate a structured analysis result. * @param changes The flat list of all detected changes. * @param filesAnalyzed The number of files that were successfully analyzed. * @param performance An object containing performance metrics. * @returns A structured analysis result. */ function processAnalysisResults(changes, filesAnalyzed, failedFiles, performance, config) { const severityBreakdown = { high: changes.filter((c) => c.severity === 'high').length, medium: changes.filter((c) => c.severity === 'medium').length, low: changes.filter((c) => c.severity === 'low').length, }; const changeTypeCounts = {}; for (const change of changes) { const key = change.kind; const entry = changeTypeCounts[key] ?? { count: 0, maxSeverity: 'low' }; entry.count += 1; const level = { low: 0, medium: 1, high: 2 }; if (level[change.severity] > level[entry.maxSeverity]) { entry.maxSeverity = change.severity; } changeTypeCounts[key] = entry; } const changeKeys = Object.keys(changeTypeCounts); const topChangeTypes = changeKeys .map((k) => [k, changeTypeCounts[k]]) .filter((pair) => Boolean(pair[1])) .map(([kind, data]) => ({ kind, count: data.count, maxSeverity: data.maxSeverity })) .sort((a, b) => b.count - a.count); const requiresTests = shouldRequireTests(changes, config); const criticalChanges = changes.filter((change) => change.severity === 'high').slice(0, 20); const hasReactChanges = changes.some((change) => change.kind.includes('jsx') || change.kind.includes('hook') || change.kind.includes('component')); const summary = generateSummary(changes, severityBreakdown, requiresTests); return { requiresTests, summary, filesAnalyzed, totalChanges: changes.length, severityBreakdown, highSeverityChanges: severityBreakdown.high, topChangeTypes, criticalChanges, changes, failedFiles, hasReactChanges, performance, }; } /** * Determines if tests should be required based on the detected changes. * @param changes The list of detected semantic changes. * @param config The analyzer configuration. * @returns True if tests are required, false otherwise. */ function shouldRequireTests(changes, config) { // Check if any changes require tests based on configuration return changes.some((change) => shouldRequireTestsForChange(change.kind, change.severity, config)); } /** * Generates a human-readable summary string of the analysis results. * @param changes The list of detected changes. * @param severityBreakdown An object with the count of changes per severity. * @param requiresTests A boolean indicating if tests are required. * @returns A summary string. */ function generateSummary(changes, severityBreakdown, requiresTests) { if (changes.length === 0) { return 'No semantic changes detected'; } const parts = [ `${changes.length} semantic changes detected`, `${severityBreakdown.high} high-severity`, `${severityBreakdown.medium} medium-severity`, `${severityBreakdown.low} low-severity`, ]; if (requiresTests) { parts.push('Tests required'); } else { parts.push('No tests required'); } return parts.join(', '); } /** * Formats analysis results as sed/awk friendly output. * Uses colon-separated fields that can be easily parsed by shell tools. * @param result The structured analysis result. */ function formatMachineOutput(result) { // Helper function to escape colons and newlines in fields const escapeField = (field) => { if (field === undefined || field === null) return ''; return field.toString().replace(/:/g, '\\:').replace(/\n/g, '\\n').replace(/\r/g, '\\r'); }; // Output summary line logger.machine(`SUMMARY:${result.requiresTests}:${result.filesAnalyzed}:${result.totalChanges}:${result.severityBreakdown.high}:${result.severityBreakdown.medium}:${result.severityBreakdown.low}`); // Output each change as a separate line for (const change of result.changes) { logger.machine(`CHANGE:${escapeField(change.file)}:${change.line}:${change.column}:${change.severity}:${escapeField(change.kind)}:${escapeField(change.detail)}:${escapeField(change.astNode)}:${escapeField(change.context)}`); } // Output failed files for (const failed of result.failedFiles) { logger.machine(`FAILED:${escapeField(failed.filePath)}:${escapeField(failed.error)}`); } // Output performance metrics logger.machine(`PERFORMANCE:${result.performance.analysisTimeMs}:${result.performance.memoryUsageMB}`); // Output top change types for (const changeType of result.topChangeTypes.slice(0, 10)) { logger.machine(`CHANGETYPE:${escapeField(changeType.kind)}:${changeType.count}:${changeType.maxSeverity}`); } } /** * Outputs the analysis results in the specified format. * @param result The structured analysis result. * @param options The command-line options. */ async function outputResults(result, options) { const outputFile = options.outputFile || 'semantic-analysis-results.json'; switch (options.outputFormat) { case 'machine': { formatMachineOutput(result); break; } case 'json': { await fs.promises.writeFile(outputFile, JSON.stringify(result, null, 2), 'utf8'); logger.verbose(`Results written to ${outputFile}`); break; } case 'github-actions': { // Write JSON result file for reference await fs.promises.writeFile(outputFile, JSON.stringify(result, null, 2), 'utf8'); // Format and output GitHub Actions annotations to stdout const annotations = formatForGitHubActions(result); for (const annotation of annotations) { logger.output(annotation); } // Write summary information as logs (to stderr in GitHub Actions mode) logger.verbose(`::group::Analysis Results`); logger.verbose(`Files analyzed: ${result.filesAnalyzed}`); logger.verbose(`Total changes: ${result.totalChanges}`); logger.verbose(`High severity: ${result.severityBreakdown.high}`); logger.verbose(`Medium severity: ${result.severityBreakdown.medium}`); logger.verbose(`Low severity: ${result.severityBreakdown.low}`); logger.verbose(`Tests required: ${result.requiresTests}`); logger.verbose(`::endgroup::`); if (result.failedFiles.length > 0) { logger.verbose(`::error::Failed to analyze ${result.failedFiles.length} files:`); result.failedFiles.forEach((file) => { logger.verbose(`::error file=${file.filePath}::${file.error}`); }); } if (result.requiresTests) { logger.verbose(`::warning::Tests required due to ${result.highSeverityChanges} high-severity changes`); } writeGitHubOutputFlag(result.requiresTests); break; } case 'console': { logger.output('\nAnalysis Results:'); logger.output(` Files analyzed: ${result.filesAnalyzed}`); logger.output(` Total changes: ${result.totalChanges}`); logger.output(` High severity: ${result.severityBreakdown.high}`); logger.output(` Medium severity: ${result.severityBreakdown.medium}`); logger.output(` Low severity: ${result.severityBreakdown.low}`); logger.output(` Tests required: ${result.requiresTests ? 'Yes' : 'No'}`); if (result.failedFiles.length > 0) { logger.output(`\nFailed to analyze ${result.failedFiles.length} files:`); result.failedFiles.forEach((file) => { logger.output(` - ${file.filePath}: ${file.error}`); }); } if (result.topChangeTypes.length > 0) { logger.output('\nTop change types:'); result.topChangeTypes.slice(0, 5).forEach((change) => { logger.output(` ${change.kind}: ${change.count} (${change.maxSeverity})`); }); } break; } } } /** * Gets the current memory usage of the process. * @returns The heap usage in megabytes. */ function getMemoryUsage() { if (typeof process !== 'undefined' && process.memoryUsage) { return Math.round(process.memoryUsage().heapUsed / 1024 / 1024); } return 0; } /** * Writes the `requires-tests` output for GitHub Actions. * @param requiresTests A boolean indicating if tests are required. */ export function writeGitHubOutputFlag(requiresTests) { const outputPath = process.env.GITHUB_OUTPUT; if (!outputPath) return; try { fs.appendFileSync(outputPath, `requires-tests=${requiresTests}\n`); } catch { // ignore write errors in read-only contexts } } /** * Loads the analyzer configuration from a file, merging with default settings. * @param options The analysis options, potentially containing a custom config path. * @returns The merged AnalyzerConfig. */ function loadConfig(options) { let configPath; if (options.configPath) { configPath = path.resolve(process.cwd(), options.configPath); } else { configPath = path.join(process.cwd(), '.semantic-change-detector.json'); } let loadedConfig = {}; if (fs.existsSync(configPath)) { try { const configContent = fs.readFileSync(configPath, 'utf8'); const parsed = JSON.parse(configContent); loadedConfig = parsed; logger.verbose(`Loaded configuration from ${configPath}`); } catch (error) { logger.debug(`Failed to load configuration from ${configPath}: ${error instanceof Error ? error.message : String(error)}`); } } else { logger.verbose(`No custom configuration file found at ${configPath}. Using default settings.`); } // Merge with defaults, ensuring deep merge for nested objects const mergedConfig = { ...DEFAULT_CONFIG, ...loadedConfig, changeKindGroups: { ...DEFAULT_CONFIG.changeKindGroups, ...loadedConfig.changeKindGroups, }, severityOverrides: { ...DEFAULT_CONFIG.severityOverrides, ...loadedConfig.severityOverrides, }, jsxConfig: { ...DEFAULT_CONFIG.jsxConfig, ...loadedConfig.jsxConfig, }, performance: { ...DEFAULT_CONFIG.performance, ...loadedConfig.performance, }, testRequirements: { ...DEFAULT_CONFIG.testRequirements, ...loadedConfig.testRequirements, }, }; return mergedConfig; } //# sourceMappingURL=analysis-runner.js.map