UNPKG

erosolar-cli

Version:

Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning

482 lines 17.6 kB
/** * Result Verification Framework * * Prevents erosolar-cli from hallucinating success by requiring explicit * verification of tool operation outcomes. All critical operations must * return structured results with verification status. * * CRITICAL: LLMs can misinterpret ambiguous output as success. * This module ensures clear, unambiguous success/failure signals. * * @license MIT * @author Bo Shang */ /** * Format a verified result into a clear, unambiguous string for LLM consumption. * The format is designed to prevent misinterpretation. * * For SUCCESS: Keep it simple - just show the output * For FAILURE/others: Show detailed diagnostic information */ export function formatVerifiedResult(result) { // For success, keep output clean and simple if (result.status === 'VERIFIED_SUCCESS') { return result.details || result.summary; } // For non-success states, show detailed diagnostic info const lines = []; switch (result.status) { case 'VERIFIED_FAILURE': lines.push('═══ FAILED ═══'); break; case 'UNVERIFIED': lines.push('═══ UNVERIFIED ═══'); break; case 'PARTIAL_SUCCESS': lines.push('═══ PARTIAL SUCCESS ═══'); break; case 'REQUIRES_USER_ACTION': lines.push('═══ ACTION REQUIRED ═══'); break; } lines.push(''); lines.push(result.summary); lines.push(''); // Show verification checks if any failed if (result.verificationChecks && result.verificationChecks.length > 0) { const failedChecks = result.verificationChecks.filter(c => !c.passed); if (failedChecks.length > 0) { lines.push('Failed checks:'); for (const check of failedChecks) { lines.push(` ✗ ${check.check}${check.details ? `: ${check.details}` : ''}`); } lines.push(''); } } // Show details if (result.details) { lines.push(result.details); lines.push(''); } // Show suggested actions if (result.suggestedActions && result.suggestedActions.length > 0) { lines.push('Suggested actions:'); for (const action of result.suggestedActions) { lines.push(` → ${action}`); } } return lines.join('\n'); } /** * Create a verified success result */ export function verifiedSuccess(summary, details, checks, durationMs) { return formatVerifiedResult({ status: 'VERIFIED_SUCCESS', summary, details, verificationChecks: checks, verifiedAt: new Date().toISOString(), durationMs, }); } /** * Create a verified failure result */ export function verifiedFailure(summary, details, suggestedActions, checks, durationMs) { return formatVerifiedResult({ status: 'VERIFIED_FAILURE', summary, details, verificationChecks: checks, suggestedActions, verifiedAt: new Date().toISOString(), durationMs, }); } /** * Create an unverified result (when we can't confirm success) */ export function unverifiedResult(summary, details, suggestedActions, durationMs) { return formatVerifiedResult({ status: 'UNVERIFIED', summary: `${summary} (VERIFICATION NOT POSSIBLE - DO NOT ASSUME SUCCESS)`, details, suggestedActions: suggestedActions || [ 'Manually verify the operation completed as expected', 'Run verification commands to confirm state', ], verifiedAt: new Date().toISOString(), durationMs, }); } /** * Create a partial success result */ export function partialSuccess(summary, details, checks, suggestedActions, durationMs) { return formatVerifiedResult({ status: 'PARTIAL_SUCCESS', summary, details, verificationChecks: checks, suggestedActions, verifiedAt: new Date().toISOString(), durationMs, }); } /** * Create a result requiring user action */ export function requiresUserAction(summary, details, requiredActions, durationMs) { return formatVerifiedResult({ status: 'REQUIRES_USER_ACTION', summary, details, suggestedActions: requiredActions, verifiedAt: new Date().toISOString(), durationMs, }); } /** * Verification patterns for common outputs */ export const OutputPatterns = { // Firebase deployment patterns firebase: { success: [ /Deploy complete!/i, /✔\s+Deploy complete/i, /Hosting URL:/i, /Function URL/i, ], failure: [ /Error:/i, /deploy failed/i, /Authentication Error/i, /not logged in/i, /permission denied/i, /PERMISSION_DENIED/i, /quota exceeded/i, /build error/i, ], authRequired: [ /firebase login/i, /not logged in/i, /authenticate/i, /FIREBASE_TOKEN/i, ], }, // Generic command patterns command: { success: [ /^(success|completed|done|finished)/im, /successfully/i, ], failure: [ /^error/im, /^fatal/im, /failed/i, /command not found/i, /permission denied/i, /access denied/i, /ENOENT/i, /EACCES/i, /EPERM/i, ], }, // NPM patterns npm: { success: [ /npm notice/i, /\+ .+@\d+\.\d+\.\d+/, // Package published pattern /published/i, ], failure: [ /npm ERR!/i, /ERESOLVE/i, /E404/i, /EINTEGRITY/i, ], }, // Git patterns git: { success: [ /\[.+\s+\w+\]/, // Commit hash pattern like [main abc1234] /pushed/i, /merged/i, /On branch/i, // git status output /nothing to commit/i, // git status clean /Changes to be committed/i, // git status staged /Changes not staged/i, // git status modified /Untracked files/i, // git status untracked /Your branch is/i, // git status branch info /HEAD detached/i, // git status detached HEAD /diff --git/i, // git diff output /create mode/i, // git commit output /delete mode/i, // git commit output /^\s*\d+ files? changed/im, // commit summary /^\s*\d+ insertions?/im, // commit summary /^\s*\d+ deletions?/im, // commit summary /Already up to date/i, // git pull /Fast-forward/i, // git pull/merge /Switched to/i, // git checkout /Already on/i, // git checkout ], failure: [ /fatal:/i, /error:/i, /conflict/i, /rejected/i, /CONFLICT/, /Aborting/i, /not a git repository/i, ], }, }; /** * Normalize text to prevent Unicode lookalike attacks * Converts confusable characters (e.g., Cyrillic 'а' -> Latin 'a') * This prevents attackers from bypassing pattern detection with visually similar characters */ export function normalizeForPatternMatch(text) { // Normalize Unicode to NFC form first let normalized = text.normalize('NFC'); // Map common Unicode confusables to ASCII equivalents // This prevents bypass using Cyrillic, Greek, or other lookalikes const confusables = { // Cyrillic lookalikes 'а': 'a', 'е': 'e', 'о': 'o', 'р': 'p', 'с': 'c', 'у': 'y', 'х': 'x', 'А': 'A', 'В': 'B', 'Е': 'E', 'К': 'K', 'М': 'M', 'Н': 'H', 'О': 'O', 'Р': 'P', 'С': 'C', 'Т': 'T', 'Х': 'X', // Greek lookalikes 'Α': 'A', 'Β': 'B', 'Ε': 'E', 'Η': 'H', 'Ι': 'I', 'Κ': 'K', 'Μ': 'M', 'Ν': 'N', 'Ο': 'O', 'Ρ': 'P', 'Τ': 'T', 'Χ': 'X', 'Υ': 'Y', 'Ζ': 'Z', 'ο': 'o', 'ν': 'v', 'ρ': 'p', // Fullwidth characters 'a': 'a', 'b': 'b', 'c': 'c', 'd': 'd', 'e': 'e', 'f': 'f', 'g': 'g', 'h': 'h', 'i': 'i', 'j': 'j', 'k': 'k', 'l': 'l', 'm': 'm', 'n': 'n', 'o': 'o', 'p': 'p', 'q': 'q', 'r': 'r', 's': 's', 't': 't', 'u': 'u', 'v': 'v', 'w': 'w', 'x': 'x', 'y': 'y', 'z': 'z', // Other confusables 'ⅰ': 'i', 'ⅼ': 'l', 'ℓ': 'l', '𝕒': 'a', '𝕓': 'b', '𝕔': 'c', '0': '0', '1': '1', '2': '2', '3': '3', '4': '4', '5': '5', '6': '6', '7': '7', '8': '8', '9': '9', }; for (const [confusable, replacement] of Object.entries(confusables)) { normalized = normalized.split(confusable).join(replacement); } // Remove zero-width characters that could hide content normalized = normalized.replace(/[\u200B-\u200D\uFEFF\u2060\u180E]/g, ''); return normalized; } /** * Analyze output against patterns to determine success/failure * * CRITICAL: This function is defensive against: * 1. Unicode lookalike attacks (Cyrillic/Greek characters that look like Latin) * 2. Exit code 0 with hidden failures in output * 3. False positives from success words appearing in error messages */ export function analyzeOutput(output, patterns, exitCode) { // CRITICAL: Normalize output to prevent Unicode bypass attacks const normalizedOutput = normalizeForPatternMatch(output); // Check explicit failure patterns first (highest priority) for (const pattern of patterns.failure) { if (pattern.test(normalizedOutput)) { return { isSuccess: false, isFailure: true, matchedPattern: pattern.source, confidence: 'high' }; } } // Check success patterns for (const pattern of patterns.success) { if (pattern.test(normalizedOutput)) { // Additional validation: ensure the success pattern isn't in a negative context // e.g., "not successful", "failed to succeed", "success was not achieved" const negativeContextPatterns = [ /not\s+(been\s+)?success/i, /fail(ed|ure|s)?\s+to\s+(be\s+)?success/i, /success\s+(was|is)\s+not/i, /no\s+success/i, /unsuccess/i, /!success/i, ]; const hasNegativeContext = negativeContextPatterns.some(p => p.test(normalizedOutput)); if (hasNegativeContext) { // Success word appears but in negative context - treat as failure return { isSuccess: false, isFailure: true, matchedPattern: 'success in negative context', confidence: 'medium' }; } return { isSuccess: true, isFailure: false, matchedPattern: pattern.source, confidence: 'high' }; } } // IMPORTANT: Exit code 0 alone is NOT sufficient to declare success // We only use exit code as a weak signal, not as verification if (exitCode !== undefined) { if (exitCode !== 0) { // Non-zero exit is a clear failure return { isSuccess: false, isFailure: true, confidence: 'high' }; } // Exit code 0 but no success pattern matched - this is INDETERMINATE, not success // Return low confidence - the caller should treat this as unverified return { isSuccess: false, isFailure: false, confidence: 'low' }; } // Cannot determine at all return { isSuccess: false, isFailure: false, confidence: 'low' }; } /** * Create a verification check from a command result */ export function createCommandCheck(checkName, exitCode, output, expectedPatterns) { let passed = exitCode === 0; let details = exitCode === 0 ? 'Exit code 0' : `Exit code ${exitCode}`; // If we have expected patterns, check for them if (expectedPatterns && passed) { const foundPattern = expectedPatterns.some((p) => p.test(output)); if (!foundPattern) { passed = false; details = 'Exit code 0 but expected output pattern not found'; } } return { check: checkName, passed, details, }; } /** * Verify that a file exists and optionally contains expected content */ export async function verifyFileExists(filePath, expectedContent) { const fs = await import('fs/promises'); try { const content = await fs.readFile(filePath, 'utf-8'); if (expectedContent) { const matches = typeof expectedContent === 'string' ? content.includes(expectedContent) : expectedContent.test(content); return { check: `File ${filePath} exists with expected content`, passed: matches, details: matches ? 'Content verified' : 'Content does not match expected', }; } return { check: `File ${filePath} exists`, passed: true, details: `File size: ${content.length} bytes`, }; } catch (error) { return { check: `File ${filePath} exists`, passed: false, details: error instanceof Error ? error.message : 'File not found', }; } } /** * Verify URL is accessible (for deployment verification) */ export async function verifyUrlAccessible(url, expectedStatus = 200, timeoutMs = 10000) { try { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), timeoutMs); const response = await fetch(url, { method: 'HEAD', signal: controller.signal, }); clearTimeout(timeout); const passed = response.status === expectedStatus; return { check: `URL ${url} accessible`, passed, details: passed ? `Status ${response.status} (expected ${expectedStatus})` : `Status ${response.status} (expected ${expectedStatus})`, }; } catch (error) { return { check: `URL ${url} accessible`, passed: false, details: error instanceof Error ? error.message : 'Request failed', }; } } /** * Verify a JSON response matches expected structure */ export async function verifyJsonEndpoint(url, expectedFields, timeoutMs = 10000) { try { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), timeoutMs); const response = await fetch(url, { signal: controller.signal }); clearTimeout(timeout); if (!response.ok) { return { check: `JSON endpoint ${url}`, passed: false, details: `HTTP ${response.status}`, }; } const json = await response.json(); const missingFields = expectedFields.filter((field) => !(field in json)); return { check: `JSON endpoint ${url}`, passed: missingFields.length === 0, details: missingFields.length === 0 ? `All expected fields present: ${expectedFields.join(', ')}` : `Missing fields: ${missingFields.join(', ')}`, }; } catch (error) { return { check: `JSON endpoint ${url}`, passed: false, details: error instanceof Error ? error.message : 'Request failed', }; } } /** * Common error indicators that might be hidden in truncated content */ const ERROR_INDICATORS = [ 'error', 'failed', 'failure', 'exception', 'denied', 'unauthorized', 'forbidden', 'not found', '404', '500', '502', '503', 'timeout', 'refused', 'rejected', 'invalid', 'expired', 'abort', 'crash', 'fatal', 'critical', 'panic', 'segfault', 'killed', 'terminated', ]; /** * Check if content that would be truncated might contain error indicators * Returns a warning message if potential errors are detected in the truncated portion */ export function checkTruncationForErrors(fullContent, truncateAt) { if (fullContent.length <= truncateAt) { return { shouldWarn: false, warning: '' }; } const truncatedPortion = fullContent.slice(truncateAt).toLowerCase(); const foundIndicators = ERROR_INDICATORS.filter(indicator => truncatedPortion.includes(indicator)); if (foundIndicators.length > 0) { return { shouldWarn: true, warning: `⚠️ TRUNCATION WARNING: The hidden portion (${fullContent.length - truncateAt} chars) may contain error indicators: ${foundIndicators.join(', ')}. If the operation appears to have failed, review full output.`, }; } return { shouldWarn: false, warning: '' }; } /** * Safely truncate content with error detection * Returns truncated content with appropriate warning if errors might be hidden */ export function safeTruncate(content, maxLength, _label = 'Content') { if (content.length <= maxLength) { return content; } const truncation = checkTruncationForErrors(content, maxLength); const truncatedContent = content.slice(0, maxLength); const hiddenChars = content.length - maxLength; let result = `${truncatedContent}\n\n[... ${hiddenChars} characters truncated]`; if (truncation.shouldWarn) { result += `\n\n${truncation.warning}`; } return result; } //# sourceMappingURL=resultVerification.js.map