@vfarcic/dot-ai
Version:
AI-powered development productivity platform that enhances software development workflows through intelligent automation and AI-driven assistance
260 lines (256 loc) • 11.5 kB
JavaScript
;
/**
* Shared debugging utilities for AI providers
*
* Common functions for logging metrics and debugging AI interactions
* when DEBUG_DOT_AI=true
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.ensureDebugDirectory = ensureDebugDirectory;
exports.generateDebugId = generateDebugId;
exports.shouldSkipDatasetGeneration = shouldSkipDatasetGeneration;
exports.logEvaluationDataset = logEvaluationDataset;
exports.createAndLogAgenticResult = createAndLogAgenticResult;
exports.debugLogPromptOnly = debugLogPromptOnly;
exports.debugLogInteraction = debugLogInteraction;
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const crypto = __importStar(require("crypto"));
/**
* Create debug directory if it doesn't exist
*/
function ensureDebugDirectory() {
const debugDir = path.join(process.cwd(), 'tmp', 'debug-ai');
if (!fs.existsSync(debugDir)) {
fs.mkdirSync(debugDir, { recursive: true });
}
return debugDir;
}
/**
* Generate unique identifier for debug files with operation context
*/
function generateDebugId(operation) {
const timestamp = new Date().toISOString().replace(/[:.]/g, '').split('T');
const dateTime = timestamp[0] + 'T' + timestamp[1].substring(0, 6);
const randomHex = crypto.randomBytes(4).toString('hex');
return `${dateTime}_${randomHex}_${operation}`;
}
/**
* Determine if dataset generation should be skipped for specific operations
*/
function shouldSkipDatasetGeneration(operation) {
const skipDatasetOperations = ['version-connectivity-check', 'generic'];
return skipDatasetOperations.includes(operation);
}
/**
* Log unified evaluation metrics when DEBUG_DOT_AI=true
* Single function for all metrics and evaluation data capture
*/
/**
* Generate eval dataset entry in standard OpenAI Evals format
* Logs evaluation metrics to JSONL dataset files for AI quality assessment
*/
function logEvaluationDataset(metrics, debugMode = false) {
if (!debugMode)
return;
// Skip dataset generation for non-evaluable operations
if (shouldSkipDatasetGeneration(metrics.test_scenario))
return;
try {
// Parse operation for tool name
const operationParts = metrics.operation.split('-');
const toolName = operationParts[0]; // e.g., "remediate"
// Check if this is a comparative evaluation
const isComparativeEvaluation = metrics.operation.includes('-comparative-');
// Use different directories for comparative evaluations vs raw test datasets
const baseDir = isComparativeEvaluation ?
path.join(process.cwd(), 'eval', 'results') : // Comparative evaluation results go here
path.join(process.cwd(), 'eval', 'datasets'); // Raw test datasets go here
// Ensure directory exists
if (!fs.existsSync(baseDir)) {
fs.mkdirSync(baseDir, { recursive: true });
}
let datasetFile;
const timestamp = new Date().toISOString().replace(/[:.]/g, '').split('T').join('_');
if (isComparativeEvaluation) {
// For comparative evaluations, save to results directory
datasetFile = path.join(baseDir, `${toolName}_comparative_evaluation_${timestamp}.jsonl`);
}
else {
// Use modelVersion directly for accurate model identification
const modelName = metrics.modelVersion || 'unknown';
// Create filename with interaction ID, SDK, model, and timestamp for single-model datasets
datasetFile = path.join(baseDir, `${toolName}_${metrics.interaction_id}_${metrics.sdk}_${modelName}_${timestamp}.jsonl`);
}
// Transform metrics into OpenAI Evals format (no ideal field - using model-graded evaluation)
const evalEntry = {
input: {
issue: metrics.user_intent || "Tool execution scenario"
},
output: metrics.ai_response_summary || "",
performance: {
duration_ms: metrics.durationMs,
input_tokens: metrics.inputTokens,
output_tokens: metrics.outputTokens,
total_tokens: metrics.inputTokens + metrics.outputTokens,
sdk: metrics.sdk,
model_version: metrics.modelVersion,
iterations: metrics.iterationCount,
tool_calls_executed: metrics.toolCallCount,
cache_read_tokens: metrics.cacheReadTokens || 0,
cache_creation_tokens: metrics.cacheCreationTokens || 0
},
metadata: {
timestamp: new Date().toISOString(),
complexity: "medium",
tags: ["troubleshooting"],
source: "integration_test",
tool: toolName,
test_scenario: metrics.test_scenario || `${toolName}_test`,
failure_analysis: metrics.failure_analysis || ""
}
};
fs.writeFileSync(datasetFile, JSON.stringify(evalEntry) + '\n');
console.log(`📊 Generated eval dataset: ${path.basename(datasetFile)} (${metrics.interaction_id}, ${metrics.durationMs}ms, ${metrics.inputTokens}+${metrics.outputTokens} tokens)`);
}
catch (error) {
console.error(`❌ Failed to generate eval dataset for ${metrics.interaction_id} (${metrics.test_scenario}):`, error);
}
}
/**
* Create AgenticResult and log metrics in one step
* Reduces code duplication across providers
*
* PRD #154: Updated to use unified evaluation metrics
*/
function createAndLogAgenticResult(config) {
const result = {
finalMessage: config.finalMessage,
iterations: config.iterations,
toolCallsExecuted: config.toolCallsExecuted,
totalTokens: config.totalTokens,
status: config.status,
completionReason: config.completionReason,
modelVersion: config.modelVersion
};
const durationMs = Date.now() - config.startTime;
if (config.debugMode) {
// PRD #154: Use unified evaluation metrics system
const evaluationMetrics = {
// Core execution data
operation: config.operation,
sdk: config.sdk,
inputTokens: config.totalTokens.input,
outputTokens: config.totalTokens.output,
durationMs,
// Required fields
iterationCount: config.iterations,
toolCallCount: config.toolCallsExecuted.length,
status: config.status,
completionReason: config.completionReason,
modelVersion: config.modelVersion,
// Required evaluation context - NO DEFAULTS, must be provided
test_scenario: config.operation,
ai_response_summary: config.finalMessage,
user_intent: config.evaluationContext?.user_intent || '', // Will be enhanced later by EvalDatasetEnhancer
interaction_id: config.interaction_id || '', // Will be enhanced later if missing
// Optional performance data
...(config.totalTokens.cacheCreation !== undefined && { cacheCreationTokens: config.totalTokens.cacheCreation }),
...(config.totalTokens.cacheRead !== undefined && { cacheReadTokens: config.totalTokens.cacheRead }),
...(config.toolCallsExecuted.length > 0 && {
uniqueToolsUsed: [...new Set(config.toolCallsExecuted.map(tc => tc.tool))]
}),
...(config.debugFiles && { debug_files: { full_prompt: config.debugFiles.promptFile, full_response: config.debugFiles.responseFile } }),
...(config.evaluationContext?.failure_analysis && { failure_analysis: config.evaluationContext.failure_analysis })
};
// Calculate cache hit rate if applicable
if (config.totalTokens.cacheRead !== undefined && config.totalTokens.input > 0) {
evaluationMetrics.cacheHitRate = Math.round((config.totalTokens.cacheRead / config.totalTokens.input) * 100);
}
logEvaluationDataset(evaluationMetrics, config.debugMode);
}
return result;
}
/**
* Helper to write prompt file
*/
function writePromptFile(debugDir, debugId, prompt, operation, provider, model) {
const promptFile = path.join(debugDir, `${debugId}_prompt.md`);
fs.writeFileSync(promptFile, `# AI Prompt - ${operation}\n\nTimestamp: ${new Date().toISOString()}\nProvider: ${provider}\nModel: ${model}\nOperation: ${operation}\n\n---\n\n${prompt}`);
}
/**
* Save just the AI prompt for debugging (useful when AI call fails)
*/
function debugLogPromptOnly(debugId, prompt, operation, provider, model, debugMode) {
if (!debugMode)
return;
try {
const debugDir = ensureDebugDirectory();
writePromptFile(debugDir, debugId, prompt, operation, provider, model);
console.log(`🐛 DEBUG: AI prompt logged to tmp/debug-ai/${debugId}_prompt.md (call failed before response)`);
}
catch (error) {
console.warn('Failed to log AI debug prompt:', error);
}
}
/**
* Save AI interaction for debugging when DEBUG_DOT_AI=true
*/
function debugLogInteraction(debugId, prompt, response, operation, provider, model, debugMode) {
if (!debugMode)
return;
try {
const debugDir = ensureDebugDirectory();
// Save prompt using shared helper
writePromptFile(debugDir, debugId, prompt, operation, provider, model);
// Save response with matching naming
const responseFile = path.join(debugDir, `${debugId}_response.md`);
const responseContent = `# AI Response - ${operation}
Timestamp: ${new Date().toISOString()}
Provider: ${provider}
Model: ${model}
Operation: ${operation}
Input Tokens: ${response.usage.input_tokens}
Output Tokens: ${response.usage.output_tokens}
---
${response.content}`;
fs.writeFileSync(responseFile, responseContent);
console.log(`🐛 DEBUG: AI interaction logged to tmp/debug-ai/${debugId}_*.md`);
}
catch (error) {
console.warn('Failed to log AI debug interaction:', error);
}
}