UNPKG

@git.zone/tsdoc

Version:

A comprehensive TypeScript documentation tool that leverages AI to generate and enhance project documentation, including dynamic README creation, API docs via TypeDoc, and smart commit message generation.

354 lines (303 loc) 11.3 kB
/** * Intelligent git diff processor that handles large diffs by sampling and prioritization * instead of blind truncation. */ export interface IDiffFileInfo { filepath: string; status: 'added' | 'modified' | 'deleted'; linesAdded: number; linesRemoved: number; totalLines: number; estimatedTokens: number; diffContent: string; } export interface IProcessedDiff { summary: string; // Human-readable overview fullDiffs: string[]; // Small files included fully summarizedDiffs: string[]; // Medium files with head/tail metadataOnly: string[]; // Large files, just stats totalFiles: number; totalTokens: number; } export interface IDiffProcessorOptions { maxDiffTokens?: number; // Maximum tokens for entire diff section (default: 100000) smallFileLines?: number; // Files <= this are included fully (default: 50) mediumFileLines?: number; // Files <= this are summarized (default: 200) sampleHeadLines?: number; // Lines to show at start of medium files (default: 20) sampleTailLines?: number; // Lines to show at end of medium files (default: 20) } export class DiffProcessor { private options: Required<IDiffProcessorOptions>; constructor(options: IDiffProcessorOptions = {}) { this.options = { maxDiffTokens: options.maxDiffTokens ?? 100000, smallFileLines: options.smallFileLines ?? 50, mediumFileLines: options.mediumFileLines ?? 200, sampleHeadLines: options.sampleHeadLines ?? 20, sampleTailLines: options.sampleTailLines ?? 20, }; } /** * Process an array of git diffs into a structured, token-efficient format */ public processDiffs(diffStringArray: string[]): IProcessedDiff { // Parse all diffs into file info objects const fileInfos: IDiffFileInfo[] = diffStringArray .map(diffString => this.parseDiffFile(diffString)) .filter(info => info !== null) as IDiffFileInfo[]; // Prioritize files (source files first, build artifacts last) const prioritized = this.prioritizeFiles(fileInfos); const result: IProcessedDiff = { summary: '', fullDiffs: [], summarizedDiffs: [], metadataOnly: [], totalFiles: prioritized.length, totalTokens: 0, }; let tokensUsed = 0; const tokenBudget = this.options.maxDiffTokens; // Categorize and include files based on size and token budget for (const fileInfo of prioritized) { const remainingBudget = tokenBudget - tokensUsed; if (remainingBudget <= 0) { // Budget exhausted - rest are metadata only result.metadataOnly.push(this.formatMetadataOnly(fileInfo)); continue; } if (fileInfo.totalLines <= this.options.smallFileLines) { // Small file - include fully if budget allows if (fileInfo.estimatedTokens <= remainingBudget) { const statusPrefix = this.getFileStatusPrefix(fileInfo); result.fullDiffs.push(`${statusPrefix}${fileInfo.diffContent}`); tokensUsed += fileInfo.estimatedTokens; } else { result.metadataOnly.push(this.formatMetadataOnly(fileInfo)); } } else if (fileInfo.totalLines <= this.options.mediumFileLines) { // Medium file - try to include summary with head/tail const summary = this.extractDiffSample( fileInfo, this.options.sampleHeadLines, this.options.sampleTailLines ); const summaryTokens = Math.ceil(summary.length / 4); // Rough estimate if (summaryTokens <= remainingBudget) { result.summarizedDiffs.push(summary); tokensUsed += summaryTokens; } else { result.metadataOnly.push(this.formatMetadataOnly(fileInfo)); } } else { // Large file - metadata only result.metadataOnly.push(this.formatMetadataOnly(fileInfo)); } } result.totalTokens = tokensUsed; result.summary = this.generateSummary(result); return result; } /** * Format the processed diff for inclusion in context */ public formatForContext(processed: IProcessedDiff): string { const sections: string[] = []; // Summary section sections.push('====== GIT DIFF SUMMARY ======'); sections.push(processed.summary); sections.push(''); // Full diffs section if (processed.fullDiffs.length > 0) { sections.push(`====== FULL DIFFS (${processed.fullDiffs.length} files) ======`); sections.push(processed.fullDiffs.join('\n\n')); sections.push(''); } // Summarized diffs section if (processed.summarizedDiffs.length > 0) { sections.push(`====== SUMMARIZED DIFFS (${processed.summarizedDiffs.length} files) ======`); sections.push(processed.summarizedDiffs.join('\n\n')); sections.push(''); } // Metadata only section if (processed.metadataOnly.length > 0) { sections.push(`====== METADATA ONLY (${processed.metadataOnly.length} files) ======`); sections.push(processed.metadataOnly.join('\n')); sections.push(''); } sections.push('====== END OF GIT DIFF ======'); return sections.join('\n'); } /** * Parse a single git diff string into file information */ private parseDiffFile(diffString: string): IDiffFileInfo | null { if (!diffString || diffString.trim().length === 0) { return null; } const lines = diffString.split('\n'); let filepath = ''; let status: 'added' | 'modified' | 'deleted' = 'modified'; let linesAdded = 0; let linesRemoved = 0; // Parse diff header to extract filepath and status for (const line of lines) { if (line.startsWith('--- a/')) { filepath = line.substring(6); } else if (line.startsWith('+++ b/')) { const newPath = line.substring(6); if (newPath === '/dev/null') { status = 'deleted'; } else if (filepath === '/dev/null') { status = 'added'; filepath = newPath; } else { filepath = newPath; } } else if (line.startsWith('+') && !line.startsWith('+++')) { linesAdded++; } else if (line.startsWith('-') && !line.startsWith('---')) { linesRemoved++; } } const totalLines = linesAdded + linesRemoved; const estimatedTokens = Math.ceil(diffString.length / 4); return { filepath, status, linesAdded, linesRemoved, totalLines, estimatedTokens, diffContent: diffString, }; } /** * Prioritize files by importance (source files before build artifacts) */ private prioritizeFiles(files: IDiffFileInfo[]): IDiffFileInfo[] { return files.sort((a, b) => { const scoreA = this.getFileImportanceScore(a.filepath); const scoreB = this.getFileImportanceScore(b.filepath); return scoreB - scoreA; // Higher score first }); } /** * Calculate importance score for a file path */ private getFileImportanceScore(filepath: string): number { // Source files - highest priority if (filepath.match(/^(src|lib|app|components|pages|api)\//)) { return 100; } // Test files - high priority if (filepath.match(/\.(test|spec)\.(ts|js|tsx|jsx)$/) || filepath.startsWith('test/')) { return 80; } // Configuration files - medium-high priority if (filepath.match(/\.(json|yaml|yml|toml|config\.(ts|js))$/)) { return 60; } // Documentation - medium priority if (filepath.match(/\.(md|txt|rst)$/)) { return 40; } // Build artifacts - low priority if (filepath.match(/^(dist|build|out|\.next|public\/dist)\//)) { return 10; } // Start with default priority let score = 50; // Boost interface/type files - they're usually small but critical if (filepath.includes('interfaces/') || filepath.includes('.types.')) { score += 20; } // Boost entry points if (filepath.endsWith('index.ts') || filepath.endsWith('mod.ts')) { score += 15; } return score; } /** * Extract head and tail lines from a diff, omitting the middle */ private extractDiffSample(fileInfo: IDiffFileInfo, headLines: number, tailLines: number): string { const lines = fileInfo.diffContent.split('\n'); const totalLines = lines.length; if (totalLines <= headLines + tailLines) { // File is small enough to include fully return fileInfo.diffContent; } // Extract file metadata from diff header const headerLines: string[] = []; let bodyStartIndex = 0; for (let i = 0; i < lines.length; i++) { if (lines[i].startsWith('@@')) { headerLines.push(...lines.slice(0, i + 1)); bodyStartIndex = i + 1; break; } } const bodyLines = lines.slice(bodyStartIndex); const head = bodyLines.slice(0, headLines); const tail = bodyLines.slice(-tailLines); const omittedLines = bodyLines.length - headLines - tailLines; const statusEmoji = fileInfo.status === 'added' ? '➕' : fileInfo.status === 'deleted' ? '➖' : '📝'; const parts: string[] = []; parts.push(`${statusEmoji} FILE: ${fileInfo.filepath}`); parts.push(`CHANGES: +${fileInfo.linesAdded} lines, -${fileInfo.linesRemoved} lines (${fileInfo.totalLines} total)`); parts.push(''); parts.push(...headerLines); parts.push(...head); parts.push(''); parts.push(`[... ${omittedLines} lines omitted - use Read tool to see full file ...]`); parts.push(''); parts.push(...tail); return parts.join('\n'); } /** * Get file status prefix with emoji */ private getFileStatusPrefix(fileInfo: IDiffFileInfo): string { const statusEmoji = fileInfo.status === 'added' ? '➕' : fileInfo.status === 'deleted' ? '➖' : '📝'; return `${statusEmoji} `; } /** * Extract filepath from diff content */ private extractFilepathFromDiff(diffContent: string): string { const lines = diffContent.split('\n'); for (const line of lines) { if (line.startsWith('+++ b/')) { return line.substring(6); } } return 'unknown'; } /** * Format file info as metadata only */ private formatMetadataOnly(fileInfo: IDiffFileInfo): string { const statusEmoji = fileInfo.status === 'added' ? '➕' : fileInfo.status === 'deleted' ? '➖' : '📝'; return `${statusEmoji} ${fileInfo.filepath} (+${fileInfo.linesAdded}, -${fileInfo.linesRemoved})`; } /** * Generate human-readable summary of processed diff */ private generateSummary(result: IProcessedDiff): string { const parts: string[] = []; parts.push(`Files changed: ${result.totalFiles} total`); parts.push(`- ${result.fullDiffs.length} included in full`); parts.push(`- ${result.summarizedDiffs.length} summarized (head/tail shown)`); parts.push(`- ${result.metadataOnly.length} metadata only`); parts.push(`Estimated tokens: ~${result.totalTokens.toLocaleString()}`); if (result.metadataOnly.length > 0) { parts.push(''); parts.push('NOTE: Some files excluded to stay within token budget.'); parts.push('Use Read tool with specific file paths to see full content.'); } return parts.join('\n'); } }