UNPKG

commit-story

Version:

Automated Git Journal System with AI Assistant Context Integration

677 lines (592 loc) 28.4 kB
/** * Context Integrator - Time-based Chat Context Matching * * Orchestrates the collection of git commit data and chat messages, * correlating them by time windows to create unified context for AI processing. */ import { getLatestCommitData } from '../collectors/git-collector.js'; import { extractChatForCommit } from '../collectors/claude-collector.js'; import { execSync } from 'child_process'; import { filterContext } from '../generators/filters/context-filter.js'; import { redactSensitiveData } from '../generators/filters/sensitive-data-filter.js'; import { trace, SpanStatusCode } from '@opentelemetry/api'; import { OTEL } from '../telemetry/standards.js'; import { createNarrativeLogger } from '../utils/trace-logger.js'; /** * Extracts clean text content from grouped Claude messages, handling mixed content formats * * @param {Array} sessionGroups - Array of session objects containing messages * @returns {Array} Session objects with cleaned messages */ export function extractTextFromMessages(sessionGroups) { // Count total messages across all sessions const totalMessages = sessionGroups.reduce((sum, session) => sum + (session.messages?.length || 0), 0); return tracer.startActiveSpan(OTEL.span.context.extract_text(), { attributes: { 'code.function': 'extractTextFromMessages', [`${OTEL.NAMESPACE}.text.input_messages`]: totalMessages, [`${OTEL.NAMESPACE}.text.input_sessions`]: sessionGroups.length } }, (span) => { const logger = createNarrativeLogger('context.extract_text_from_messages'); const startTime = Date.now(); try { logger.start('Text extraction from messages', `Starting extraction from ${totalMessages} Claude messages across ${sessionGroups.length} sessions`, { inputCount: totalMessages, sessionCount: sessionGroups.length }); // Track content type statistics let stringContentMessages = 0; let arrayContentMessages = 0; let unknownContentMessages = 0; let emptyContentMessages = 0; let totalContentLength = 0; const result = sessionGroups.map(session => { const cleanedMessages = session.messages.map(msg => { const content = msg.message?.content; let cleanContent = ''; if (!content) { cleanContent = ''; emptyContentMessages++; logger.progress('Empty content found', 'Encountered message with no content', { msgType: msg.type }); } else if (typeof content === 'string') { cleanContent = content; stringContentMessages++; } else if (Array.isArray(content)) { // Extract text from array format: [{type: "text", text: "actual content"}] cleanContent = content .filter(item => item.type === 'text' && item.text) .map(item => item.text) .join(' '); arrayContentMessages++; } else { // Fallback for unknown content types cleanContent = JSON.stringify(content); unknownContentMessages++; logger.decision('Unknown content type', 'Using JSON stringify fallback for unknown content format', { contentType: typeof content, msgType: msg.type }); } // Filter sensitive data before AI processing const beforeRedaction = cleanContent.length; cleanContent = redactSensitiveData(cleanContent); const afterRedaction = cleanContent.length; if (beforeRedaction !== afterRedaction) { logger.progress('Sensitive data redacted', `Content length changed from ${beforeRedaction} to ${afterRedaction} characters`, { reductionAmount: beforeRedaction - afterRedaction, msgType: msg.type }); } totalContentLength += cleanContent.length; // Return minimal message object for AI processing (eliminates Claude Code metadata bloat) return { type: msg.type || 'assistant', // user or assistant message: { content: cleanContent }, timestamp: msg.timestamp }; }); return { sessionId: session.sessionId, messages: cleanedMessages, startTime: session.startTime, messageCount: cleanedMessages.length }; }); const processingDuration = Date.now() - startTime; const averageContentLength = totalMessages > 0 ? Math.round(totalContentLength / totalMessages) : 0; // Add comprehensive attributes to span const textAttrs = OTEL.attrs.textExtraction({ inputMessages: totalMessages, inputSessions: sessionGroups.length, processedMessages: totalMessages, processedSessions: result.length, stringContentMessages, arrayContentMessages, unknownContentMessages, emptyContentMessages, totalContentLength, averageContentLength, processingDuration }); span.setAttributes(textAttrs); // Emit correlated metrics for dashboard analysis Object.entries(textAttrs).forEach(([name, value]) => { if (typeof value === 'number') { OTEL.metrics.gauge(name, value); } }); // Additional key business metrics OTEL.metrics.gauge('commit_story.text.extraction_duration_ms', processingDuration); OTEL.metrics.gauge('commit_story.text.sessions_count', result.length); // Content type ratio metrics for detecting Claude Code format changes OTEL.metrics.gauge('commit_story.text.string_content_ratio', totalMessages > 0 ? stringContentMessages / totalMessages : 0); OTEL.metrics.gauge('commit_story.text.complex_content_ratio', totalMessages > 0 ? (arrayContentMessages + unknownContentMessages) / totalMessages : 0); logger.complete('Text extraction completed', `Successfully processed ${totalMessages} messages across ${result.length} sessions with ${averageContentLength} avg chars`, { processedCount: totalMessages, sessionCount: result.length, averageLength: averageContentLength, processingTime: processingDuration, contentTypes: { string: stringContentMessages, array: arrayContentMessages, unknown: unknownContentMessages, empty: emptyContentMessages } }); span.setStatus({ code: SpanStatusCode.OK, message: 'Text extraction completed successfully' }); return result; } catch (error) { span.recordException(error); span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); logger.error('Text extraction failed', 'Error during message content extraction', error, { inputMessageCount: totalMessages, inputSessionCount: sessionGroups.length }); throw error; } finally { span.end(); } }); } /** * Calculates metadata about chat messages for context enrichment * * @param {Array} messages - Array of clean messages from extractTextFromMessages() * @returns {Object} Metadata object with message statistics */ function calculateChatMetadata(messages) { return tracer.startActiveSpan(OTEL.span.context.calculate_metadata(), { attributes: { 'code.function': 'calculateChatMetadata', [`${OTEL.NAMESPACE}.metadata.input_messages`]: messages.length } }, (span) => { const logger = createNarrativeLogger('context.calculate_chat_metadata'); const startTime = Date.now(); try { logger.start('Chat metadata calculation', `Starting calculation for ${messages.length} messages`, { inputCount: messages.length }); const userMessages = messages.filter(msg => msg.type === 'user'); const assistantMessages = messages.filter(msg => msg.type === 'assistant'); logger.progress('Message type filtering', `Found ${userMessages.length} user and ${assistantMessages.length} assistant messages`, { userCount: userMessages.length, assistantCount: assistantMessages.length, totalCount: messages.length }); const overTwentyCharMessages = userMessages.filter(msg => { const content = msg.message?.content || ''; return content.length >= 20; }); const userLengths = userMessages.map(msg => (msg.message?.content || '').length); const assistantLengths = assistantMessages.map(msg => (msg.message?.content || '').length); const userAvgLength = userLengths.length > 0 ? Math.round(userLengths.reduce((a, b) => a + b, 0) / userLengths.length) : 0; const userMaxLength = userLengths.length > 0 ? Math.max(...userLengths) : 0; const assistantAvgLength = assistantLengths.length > 0 ? Math.round(assistantLengths.reduce((a, b) => a + b, 0) / assistantLengths.length) : 0; const assistantMaxLength = assistantLengths.length > 0 ? Math.max(...assistantLengths) : 0; logger.progress('Length analysis completed', `User msgs: avg=${userAvgLength}, max=${userMaxLength}; Assistant msgs: avg=${assistantAvgLength}, max=${assistantMaxLength}`, { userAvgLength, userMaxLength, assistantAvgLength, assistantMaxLength, overTwentyCharCount: overTwentyCharMessages.length }); const calculationDuration = Date.now() - startTime; const result = { totalMessages: userMessages.length + assistantMessages.length, userMessageCount: userMessages.length, assistantMessageCount: assistantMessages.length, userMessages: { total: userMessages.length, overTwentyCharacters: overTwentyCharMessages.length, averageLength: userAvgLength, maxLength: userMaxLength }, assistantMessages: { total: assistantMessages.length, averageLength: assistantAvgLength, maxLength: assistantMaxLength } }; // Add comprehensive attributes to span const metadataAttrs = OTEL.attrs.chatMetadata({ inputMessages: messages.length, userMessages: userMessages.length, assistantMessages: assistantMessages.length, overTwentyCharMessages: overTwentyCharMessages.length, userAvgLength, userMaxLength, assistantAvgLength, assistantMaxLength, calculationDuration }); span.setAttributes(metadataAttrs); // Emit correlated metrics for dashboard analysis Object.entries(metadataAttrs).forEach(([name, value]) => { if (typeof value === 'number') { OTEL.metrics.gauge(name, value); } }); // Additional key business metrics OTEL.metrics.gauge('commit_story.metadata.calculation_duration_ms', calculationDuration); OTEL.metrics.gauge('commit_story.metadata.user_message_ratio', messages.length > 0 ? userMessages.length / messages.length : 0); OTEL.metrics.gauge('commit_story.metadata.quality_message_ratio', userMessages.length > 0 ? overTwentyCharMessages.length / userMessages.length : 0); logger.complete('Metadata calculation completed', `Generated statistics for ${result.totalMessages} messages with ${overTwentyCharMessages.length} quality user messages`, { totalMessages: result.totalMessages, qualityMessages: overTwentyCharMessages.length, processingTime: calculationDuration, statistics: { userRatio: messages.length > 0 ? userMessages.length / messages.length : 0, qualityRatio: userMessages.length > 0 ? overTwentyCharMessages.length / userMessages.length : 0 } }); span.setStatus({ code: SpanStatusCode.OK, message: 'Chat metadata calculation completed successfully' }); return result; } catch (error) { span.recordException(error); span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); logger.error('Metadata calculation failed', 'Error during chat metadata calculation', error, { inputMessageCount: messages.length }); throw error; } finally { span.end(); } }); } // Get tracer instance for context integration instrumentation const tracer = trace.getTracer('commit-story-context', '1.0.0'); /** * Gathers all context for a commit: git data and time-correlated chat messages * * @param {string} commitRef - Git commit reference (HEAD, HEAD~1, hash, etc.) * @returns {Promise<Object>} Combined context object with commit data and chat messages * @returns {Object} context.commit - Current commit data from git-collector * @returns {Array} context.chatMessages - Chat messages from claude-collector * @returns {Object|null} context.previousCommit - Previous commit basic data or null */ export async function gatherContextForCommit(commitRef = 'HEAD') { return await tracer.startActiveSpan(OTEL.span.context.gather(), { attributes: { ...OTEL.attrs.repository({ path: process.cwd() }), [`${OTEL.NAMESPACE}.commit.ref`]: commitRef, 'code.function': 'gatherContextForCommit' } }, async (span) => { try { // Get current commit data (returns Date object for timestamp) const currentCommit = await getLatestCommitData(commitRef); if (!currentCommit) { throw new Error('❌ Failed to get current commit data'); } // Add commit data to span const commitAttrs = OTEL.attrs.commit(currentCommit); span.setAttributes(commitAttrs); // Emit commit metrics for context integration analysis Object.entries(commitAttrs).forEach(([name, value]) => { if (typeof value === 'number') { OTEL.metrics.gauge(name, value); } }); // Get previous commit data for time window const previousCommit = await getPreviousCommitData(commitRef); if (previousCommit) { const prevCommitAttrs = { [`${OTEL.NAMESPACE}.previous_commit.hash`]: previousCommit.hash, [`${OTEL.NAMESPACE}.previous_commit.timestamp`]: previousCommit.timestamp.toISOString() }; span.setAttributes(prevCommitAttrs); // Emit previous commit metrics for context window analysis Object.entries(prevCommitAttrs).forEach(([name, value]) => { if (typeof value === 'string' && name.includes('timestamp')) { // Convert timestamp to numeric metric (epoch milliseconds) const timestampMs = new Date(value).getTime(); OTEL.metrics.gauge(name.replace('timestamp', 'timestamp_ms'), timestampMs); } }); } // Extract chat messages using existing claude-collector API // Signature: extractChatForCommit(commitTime, previousCommitTime, repoPath) const rawChatMessages = await extractChatForCommit( currentCommit.timestamp, // Date object - current commit time previousCommit?.timestamp || null, // Date object or null - previous commit time process.cwd() // string - repo path for cwd filtering ); // Extract clean text content from messages (now returns session groups) const cleanChatSessions = extractTextFromMessages(rawChatMessages || []); // Flatten session groups to get all messages for metadata calculation and filtering const flattenedMessages = cleanChatSessions.flatMap(session => session.messages); // Add raw message data to span const rawSessionCount = rawChatMessages?.length || 0; const totalMessageCount = flattenedMessages.length; const rawChatData = { raw_sessions: rawSessionCount, sessions: cleanChatSessions.length, raw_messages: rawChatMessages?.reduce((sum, session) => sum + (session.messages?.length || 0), 0) || 0, count: totalMessageCount }; span.setAttributes(OTEL.attrs.chat(rawChatData)); // Dual emission: emit metrics alongside span attributes OTEL.metrics.gauge('commit_story.chat.raw_sessions_count', rawChatData.raw_sessions); OTEL.metrics.gauge('commit_story.chat.sessions_count', rawChatData.sessions); OTEL.metrics.gauge('commit_story.chat.raw_messages_count', rawChatData.raw_messages); OTEL.metrics.gauge('commit_story.chat.messages_count', rawChatData.count); // Apply complete context preparation (consolidate all filtering and token management) // Use flattened messages for filtering to maintain existing filter logic const rawContext = { commit: currentCommit, chatMessages: flattenedMessages }; const filteredContext = filterContext(rawContext); // Apply the same filtering to session groups to maintain consistency // Filter messages within each session and remove empty sessions const filteredChatSessions = cleanChatSessions .map(session => { const filteredMessages = session.messages.filter(msg => filteredContext.chatMessages.some(filtered => filtered.timestamp === msg.timestamp && filtered.content === msg.content ) ); return { ...session, messages: filteredMessages, messageCount: filteredMessages.length }; }) .filter(session => session.messages.length > 0); // Calculate metadata from cleaned messages (before filtering for richer data) const metadata = calculateChatMetadata(flattenedMessages); // Add final metadata to span const finalChatData = { total: metadata.totalMessages, userMessages: metadata.userMessageCount, assistantMessages: metadata.assistantMessageCount, userMessagesOverTwenty: metadata.userMessages.overTwentyCharacters, filtered: filteredContext.chatMessages.length }; span.setAttributes(OTEL.attrs.chat(finalChatData)); // Dual emission: emit key business metrics OTEL.metrics.gauge('commit_story.chat.total_messages', finalChatData.total); OTEL.metrics.gauge('commit_story.chat.user_messages', finalChatData.userMessages); OTEL.metrics.gauge('commit_story.chat.assistant_messages', finalChatData.assistantMessages); OTEL.metrics.gauge('commit_story.chat.user_messages_over_twenty', finalChatData.userMessagesOverTwenty); // Return self-documenting context object for journal generation const result = { commit: { data: filteredContext.commit, // Filtered git data (hash, message, author, timestamp, diff) description: `Git commit with fields: - hash: Commit hash string - message: Commit message (may be null) - author: Object with {name, email} - timestamp: ISO 8601 timestamp - diff: Full unified diff showing file changes - File paths in headers: diff --git a/path/to/file b/path/to/file - Lines added (+) and removed (-)` }, previousCommit: { data: previousCommit, // Previous commit data for time window calculation description: "Previous commit data used for calculating development time window" }, chatMessages: { data: filteredContext.chatMessages, // Filtered chat messages with token optimization (flattened) description: "Chat messages where type:'user' = HUMAN DEVELOPER input, type:'assistant' = AI ASSISTANT responses" }, chatSessions: { data: filteredChatSessions, // Session-grouped chat messages (filtered to match chatMessages) description: `Chat sessions - array of session objects, each containing: - session_id: "Session 1", "Session 2", etc. - session_start: ISO 8601 timestamp when session began - message_count: Total messages in this session - messages: Array of message objects, each with: - type: "user" (human developer) or "assistant" (AI) - content: The message text - timestamp: ISO 8601 timestamp when message was sent` }, chatMetadata: { data: metadata, description: "Chat statistics: Message counts, lengths, and quality metrics for decision-making" } }; span.setStatus({ code: SpanStatusCode.OK, message: 'Context gathered successfully' }); return result; } catch (error) { span.recordException(error); span.setStatus({ code: SpanStatusCode.ERROR, message: error.message }); console.error('Error gathering context for commit:', error.message); throw error; } finally { span.end(); } }); } /** * Gets the previous commit data for time window calculation * * @param {string} commitRef - Git commit reference to calculate previous from * @returns {Promise<Object|null>} Previous commit data or null if no previous commit */ async function getPreviousCommitData(commitRef = 'HEAD') { return await tracer.startActiveSpan(OTEL.span.collectors.git(), { attributes: { 'code.function': 'getPreviousCommitData', [`${OTEL.NAMESPACE}.git.commit_ref`]: commitRef, [`${OTEL.NAMESPACE}.git.command`]: `git log -1 --format="%H|%ct" ${commitRef}~1` } }, async (span) => { const logger = createNarrativeLogger('git.collect_previous_commit_data'); const startTime = Date.now(); try { logger.start('Previous commit data retrieval', `Retrieving previous commit data for reference: ${commitRef}`, { commitRef, command: `git log -1 --format="%H|%ct" ${commitRef}~1` }); // Get previous commit hash and timestamp (one commit before the specified commit) let previousCommitInfo; try { previousCommitInfo = execSync( `git log -1 --format="%H|%ct" ${commitRef}~1`, { encoding: 'utf8', cwd: process.cwd() } ).trim(); } catch (gitError) { // No parent commit exists (first commit in repository) const executionDuration = Date.now() - startTime; logger.decision('No previous commit found', 'Git command failed - this is the first commit in repository', { commitRef, executionTime: executionDuration, result: 'no_previous_commit', error: gitError.message }); // Add attributes for null result case const gitAttrs = OTEL.attrs.gitCollection({ commitRef, command: `git log -1 --format="%H|%ct" ${commitRef}~1`, previousCommitFound: false, previousCommitHash: null, previousCommitTimestamp: null, executionDuration }); span.setAttributes(gitAttrs); // Emit metrics for null result OTEL.metrics.gauge('commit_story.git.execution_duration_ms', executionDuration); OTEL.metrics.gauge('commit_story.git.previous_commit_found', 0); // 0 for false OTEL.metrics.counter('commit_story.git.no_previous_commit_total', 1); logger.complete('Previous commit data retrieval completed', 'No previous commit found - returning null for first commit', { result: null, executionTime: executionDuration, reason: 'first_commit_in_repo' }); span.setStatus({ code: SpanStatusCode.OK, message: 'No previous commit found (first commit)' }); return null; // No previous commit (first commit in repo) } const executionDuration = Date.now() - startTime; if (!previousCommitInfo) { logger.decision('No previous commit found', 'Git command returned empty result - likely first commit in repository', { commitRef, executionTime: executionDuration, result: 'no_previous_commit' }); // Add attributes for null result case const gitAttrs = OTEL.attrs.gitCollection({ commitRef, command: `git log -1 --format="%H|%ct" ${commitRef}~1`, previousCommitFound: false, previousCommitHash: null, previousCommitTimestamp: null, executionDuration }); span.setAttributes(gitAttrs); // Emit metrics for null result OTEL.metrics.gauge('commit_story.git.execution_duration_ms', executionDuration); OTEL.metrics.gauge('commit_story.git.previous_commit_found', 0); // 0 for false OTEL.metrics.counter('commit_story.git.no_previous_commit_total', 1); logger.complete('Previous commit data retrieval completed', 'No previous commit found - returning null for first commit', { result: null, executionTime: executionDuration, reason: 'first_commit_in_repo' }); span.setStatus({ code: SpanStatusCode.OK, message: 'No previous commit found (first commit)' }); return null; // No previous commit (first commit in repo) } const [hash, timestamp] = previousCommitInfo.split('|'); const previousCommitTimestamp = new Date(parseInt(timestamp) * 1000); // Convert to Date object like git-collector logger.progress('Previous commit data parsed', `Found previous commit: ${hash} at ${previousCommitTimestamp.toISOString()}`, { hash, timestamp: previousCommitTimestamp.toISOString(), rawTimestamp: timestamp }); const result = { hash: hash, timestamp: previousCommitTimestamp }; // Add comprehensive attributes to span const gitAttrs = OTEL.attrs.gitCollection({ commitRef, command: `git log -1 --format="%H|%ct" ${commitRef}~1`, previousCommitFound: true, previousCommitHash: hash, previousCommitTimestamp: previousCommitTimestamp.toISOString(), executionDuration }); span.setAttributes(gitAttrs); // Emit correlated metrics for dashboard analysis Object.entries(gitAttrs).forEach(([name, value]) => { if (typeof value === 'number') { OTEL.metrics.gauge(name, value); } else if (typeof value === 'boolean') { OTEL.metrics.gauge(name, value ? 1 : 0); } }); // Additional key business metrics OTEL.metrics.gauge('commit_story.git.execution_duration_ms', executionDuration); OTEL.metrics.gauge('commit_story.git.previous_commit_found', 1); // 1 for true OTEL.metrics.counter('commit_story.git.previous_commit_success_total', 1); logger.complete('Previous commit data retrieval completed', `Successfully retrieved previous commit ${hash} from ${previousCommitTimestamp.toISOString()}`, { result: result, executionTime: executionDuration, hash: hash, timestamp: previousCommitTimestamp.toISOString() }); span.setStatus({ code: SpanStatusCode.OK, message: 'Previous commit data retrieved successfully' }); return result; } catch (error) { const executionDuration = Date.now() - startTime; logger.decision('Git command failed', 'Git log command failed - treating as no previous commit available', { error: error.message, commitRef, executionTime: executionDuration, errorType: 'git_command_error' }); // Add attributes for error case (treat as no previous commit) const gitAttrs = OTEL.attrs.gitCollection({ commitRef, command: `git log -1 --format="%H|%ct" ${commitRef}~1`, previousCommitFound: false, previousCommitHash: null, previousCommitTimestamp: null, executionDuration }); span.setAttributes(gitAttrs); // Emit metrics for error case (treated as no previous commit) OTEL.metrics.gauge('commit_story.git.execution_duration_ms', executionDuration); OTEL.metrics.gauge('commit_story.git.previous_commit_found', 0); // 0 for false OTEL.metrics.counter('commit_story.git.command_error_total', 1); logger.complete('Previous commit data retrieval completed', 'Git command error treated as no previous commit - returning null', { result: null, executionTime: executionDuration, reason: 'git_command_error' }); // Don't throw error - return null as this is expected behavior for first commit span.setStatus({ code: SpanStatusCode.OK, message: 'No previous commit exists (git error handled)' }); return null; // No previous commit exists (first commit in repo) } finally { span.end(); } }); }