erosolar-cli
Version:
Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning
482 lines • 17.6 kB
JavaScript
/**
* Result Verification Framework
*
* Prevents erosolar-cli from hallucinating success by requiring explicit
* verification of tool operation outcomes. All critical operations must
* return structured results with verification status.
*
* CRITICAL: LLMs can misinterpret ambiguous output as success.
* This module ensures clear, unambiguous success/failure signals.
*
* @license MIT
* @author Bo Shang
*/
/**
* Format a verified result into a clear, unambiguous string for LLM consumption.
* The format is designed to prevent misinterpretation.
*
* For SUCCESS: Keep it simple - just show the output
* For FAILURE/others: Show detailed diagnostic information
*/
export function formatVerifiedResult(result) {
// For success, keep output clean and simple
if (result.status === 'VERIFIED_SUCCESS') {
return result.details || result.summary;
}
// For non-success states, show detailed diagnostic info
const lines = [];
switch (result.status) {
case 'VERIFIED_FAILURE':
lines.push('═══ FAILED ═══');
break;
case 'UNVERIFIED':
lines.push('═══ UNVERIFIED ═══');
break;
case 'PARTIAL_SUCCESS':
lines.push('═══ PARTIAL SUCCESS ═══');
break;
case 'REQUIRES_USER_ACTION':
lines.push('═══ ACTION REQUIRED ═══');
break;
}
lines.push('');
lines.push(result.summary);
lines.push('');
// Show verification checks if any failed
if (result.verificationChecks && result.verificationChecks.length > 0) {
const failedChecks = result.verificationChecks.filter(c => !c.passed);
if (failedChecks.length > 0) {
lines.push('Failed checks:');
for (const check of failedChecks) {
lines.push(` ✗ ${check.check}${check.details ? `: ${check.details}` : ''}`);
}
lines.push('');
}
}
// Show details
if (result.details) {
lines.push(result.details);
lines.push('');
}
// Show suggested actions
if (result.suggestedActions && result.suggestedActions.length > 0) {
lines.push('Suggested actions:');
for (const action of result.suggestedActions) {
lines.push(` → ${action}`);
}
}
return lines.join('\n');
}
/**
* Create a verified success result
*/
export function verifiedSuccess(summary, details, checks, durationMs) {
return formatVerifiedResult({
status: 'VERIFIED_SUCCESS',
summary,
details,
verificationChecks: checks,
verifiedAt: new Date().toISOString(),
durationMs,
});
}
/**
* Create a verified failure result
*/
export function verifiedFailure(summary, details, suggestedActions, checks, durationMs) {
return formatVerifiedResult({
status: 'VERIFIED_FAILURE',
summary,
details,
verificationChecks: checks,
suggestedActions,
verifiedAt: new Date().toISOString(),
durationMs,
});
}
/**
* Create an unverified result (when we can't confirm success)
*/
export function unverifiedResult(summary, details, suggestedActions, durationMs) {
return formatVerifiedResult({
status: 'UNVERIFIED',
summary: `${summary} (VERIFICATION NOT POSSIBLE - DO NOT ASSUME SUCCESS)`,
details,
suggestedActions: suggestedActions || [
'Manually verify the operation completed as expected',
'Run verification commands to confirm state',
],
verifiedAt: new Date().toISOString(),
durationMs,
});
}
/**
* Create a partial success result
*/
export function partialSuccess(summary, details, checks, suggestedActions, durationMs) {
return formatVerifiedResult({
status: 'PARTIAL_SUCCESS',
summary,
details,
verificationChecks: checks,
suggestedActions,
verifiedAt: new Date().toISOString(),
durationMs,
});
}
/**
* Create a result requiring user action
*/
export function requiresUserAction(summary, details, requiredActions, durationMs) {
return formatVerifiedResult({
status: 'REQUIRES_USER_ACTION',
summary,
details,
suggestedActions: requiredActions,
verifiedAt: new Date().toISOString(),
durationMs,
});
}
/**
* Verification patterns for common outputs
*/
export const OutputPatterns = {
// Firebase deployment patterns
firebase: {
success: [
/Deploy complete!/i,
/✔\s+Deploy complete/i,
/Hosting URL:/i,
/Function URL/i,
],
failure: [
/Error:/i,
/deploy failed/i,
/Authentication Error/i,
/not logged in/i,
/permission denied/i,
/PERMISSION_DENIED/i,
/quota exceeded/i,
/build error/i,
],
authRequired: [
/firebase login/i,
/not logged in/i,
/authenticate/i,
/FIREBASE_TOKEN/i,
],
},
// Generic command patterns
command: {
success: [
/^(success|completed|done|finished)/im,
/successfully/i,
],
failure: [
/^error/im,
/^fatal/im,
/failed/i,
/command not found/i,
/permission denied/i,
/access denied/i,
/ENOENT/i,
/EACCES/i,
/EPERM/i,
],
},
// NPM patterns
npm: {
success: [
/npm notice/i,
/\+ .+@\d+\.\d+\.\d+/, // Package published pattern
/published/i,
],
failure: [
/npm ERR!/i,
/ERESOLVE/i,
/E404/i,
/EINTEGRITY/i,
],
},
// Git patterns
git: {
success: [
/\[.+\s+\w+\]/, // Commit hash pattern like [main abc1234]
/pushed/i,
/merged/i,
/On branch/i, // git status output
/nothing to commit/i, // git status clean
/Changes to be committed/i, // git status staged
/Changes not staged/i, // git status modified
/Untracked files/i, // git status untracked
/Your branch is/i, // git status branch info
/HEAD detached/i, // git status detached HEAD
/diff --git/i, // git diff output
/create mode/i, // git commit output
/delete mode/i, // git commit output
/^\s*\d+ files? changed/im, // commit summary
/^\s*\d+ insertions?/im, // commit summary
/^\s*\d+ deletions?/im, // commit summary
/Already up to date/i, // git pull
/Fast-forward/i, // git pull/merge
/Switched to/i, // git checkout
/Already on/i, // git checkout
],
failure: [
/fatal:/i,
/error:/i,
/conflict/i,
/rejected/i,
/CONFLICT/,
/Aborting/i,
/not a git repository/i,
],
},
};
/**
* Normalize text to prevent Unicode lookalike attacks
* Converts confusable characters (e.g., Cyrillic 'а' -> Latin 'a')
* This prevents attackers from bypassing pattern detection with visually similar characters
*/
export function normalizeForPatternMatch(text) {
// Normalize Unicode to NFC form first
let normalized = text.normalize('NFC');
// Map common Unicode confusables to ASCII equivalents
// This prevents bypass using Cyrillic, Greek, or other lookalikes
const confusables = {
// Cyrillic lookalikes
'а': 'a', 'е': 'e', 'о': 'o', 'р': 'p', 'с': 'c', 'у': 'y', 'х': 'x',
'А': 'A', 'В': 'B', 'Е': 'E', 'К': 'K', 'М': 'M', 'Н': 'H', 'О': 'O',
'Р': 'P', 'С': 'C', 'Т': 'T', 'Х': 'X',
// Greek lookalikes
'Α': 'A', 'Β': 'B', 'Ε': 'E', 'Η': 'H', 'Ι': 'I', 'Κ': 'K', 'Μ': 'M',
'Ν': 'N', 'Ο': 'O', 'Ρ': 'P', 'Τ': 'T', 'Χ': 'X', 'Υ': 'Y', 'Ζ': 'Z',
'ο': 'o', 'ν': 'v', 'ρ': 'p',
// Fullwidth characters
'a': 'a', 'b': 'b', 'c': 'c', 'd': 'd', 'e': 'e', 'f': 'f', 'g': 'g',
'h': 'h', 'i': 'i', 'j': 'j', 'k': 'k', 'l': 'l', 'm': 'm', 'n': 'n',
'o': 'o', 'p': 'p', 'q': 'q', 'r': 'r', 's': 's', 't': 't', 'u': 'u',
'v': 'v', 'w': 'w', 'x': 'x', 'y': 'y', 'z': 'z',
// Other confusables
'ⅰ': 'i', 'ⅼ': 'l', 'ℓ': 'l', '𝕒': 'a', '𝕓': 'b', '𝕔': 'c',
'0': '0', '1': '1', '2': '2', '3': '3', '4': '4',
'5': '5', '6': '6', '7': '7', '8': '8', '9': '9',
};
for (const [confusable, replacement] of Object.entries(confusables)) {
normalized = normalized.split(confusable).join(replacement);
}
// Remove zero-width characters that could hide content
normalized = normalized.replace(/[\u200B-\u200D\uFEFF\u2060\u180E]/g, '');
return normalized;
}
/**
* Analyze output against patterns to determine success/failure
*
* CRITICAL: This function is defensive against:
* 1. Unicode lookalike attacks (Cyrillic/Greek characters that look like Latin)
* 2. Exit code 0 with hidden failures in output
* 3. False positives from success words appearing in error messages
*/
export function analyzeOutput(output, patterns, exitCode) {
// CRITICAL: Normalize output to prevent Unicode bypass attacks
const normalizedOutput = normalizeForPatternMatch(output);
// Check explicit failure patterns first (highest priority)
for (const pattern of patterns.failure) {
if (pattern.test(normalizedOutput)) {
return { isSuccess: false, isFailure: true, matchedPattern: pattern.source, confidence: 'high' };
}
}
// Check success patterns
for (const pattern of patterns.success) {
if (pattern.test(normalizedOutput)) {
// Additional validation: ensure the success pattern isn't in a negative context
// e.g., "not successful", "failed to succeed", "success was not achieved"
const negativeContextPatterns = [
/not\s+(been\s+)?success/i,
/fail(ed|ure|s)?\s+to\s+(be\s+)?success/i,
/success\s+(was|is)\s+not/i,
/no\s+success/i,
/unsuccess/i,
/!success/i,
];
const hasNegativeContext = negativeContextPatterns.some(p => p.test(normalizedOutput));
if (hasNegativeContext) {
// Success word appears but in negative context - treat as failure
return { isSuccess: false, isFailure: true, matchedPattern: 'success in negative context', confidence: 'medium' };
}
return { isSuccess: true, isFailure: false, matchedPattern: pattern.source, confidence: 'high' };
}
}
// IMPORTANT: Exit code 0 alone is NOT sufficient to declare success
// We only use exit code as a weak signal, not as verification
if (exitCode !== undefined) {
if (exitCode !== 0) {
// Non-zero exit is a clear failure
return { isSuccess: false, isFailure: true, confidence: 'high' };
}
// Exit code 0 but no success pattern matched - this is INDETERMINATE, not success
// Return low confidence - the caller should treat this as unverified
return { isSuccess: false, isFailure: false, confidence: 'low' };
}
// Cannot determine at all
return { isSuccess: false, isFailure: false, confidence: 'low' };
}
/**
* Create a verification check from a command result
*/
export function createCommandCheck(checkName, exitCode, output, expectedPatterns) {
let passed = exitCode === 0;
let details = exitCode === 0 ? 'Exit code 0' : `Exit code ${exitCode}`;
// If we have expected patterns, check for them
if (expectedPatterns && passed) {
const foundPattern = expectedPatterns.some((p) => p.test(output));
if (!foundPattern) {
passed = false;
details = 'Exit code 0 but expected output pattern not found';
}
}
return {
check: checkName,
passed,
details,
};
}
/**
* Verify that a file exists and optionally contains expected content
*/
export async function verifyFileExists(filePath, expectedContent) {
const fs = await import('fs/promises');
try {
const content = await fs.readFile(filePath, 'utf-8');
if (expectedContent) {
const matches = typeof expectedContent === 'string'
? content.includes(expectedContent)
: expectedContent.test(content);
return {
check: `File ${filePath} exists with expected content`,
passed: matches,
details: matches ? 'Content verified' : 'Content does not match expected',
};
}
return {
check: `File ${filePath} exists`,
passed: true,
details: `File size: ${content.length} bytes`,
};
}
catch (error) {
return {
check: `File ${filePath} exists`,
passed: false,
details: error instanceof Error ? error.message : 'File not found',
};
}
}
/**
* Verify URL is accessible (for deployment verification)
*/
export async function verifyUrlAccessible(url, expectedStatus = 200, timeoutMs = 10000) {
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const response = await fetch(url, {
method: 'HEAD',
signal: controller.signal,
});
clearTimeout(timeout);
const passed = response.status === expectedStatus;
return {
check: `URL ${url} accessible`,
passed,
details: passed
? `Status ${response.status} (expected ${expectedStatus})`
: `Status ${response.status} (expected ${expectedStatus})`,
};
}
catch (error) {
return {
check: `URL ${url} accessible`,
passed: false,
details: error instanceof Error ? error.message : 'Request failed',
};
}
}
/**
* Verify a JSON response matches expected structure
*/
export async function verifyJsonEndpoint(url, expectedFields, timeoutMs = 10000) {
try {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
const response = await fetch(url, { signal: controller.signal });
clearTimeout(timeout);
if (!response.ok) {
return {
check: `JSON endpoint ${url}`,
passed: false,
details: `HTTP ${response.status}`,
};
}
const json = await response.json();
const missingFields = expectedFields.filter((field) => !(field in json));
return {
check: `JSON endpoint ${url}`,
passed: missingFields.length === 0,
details: missingFields.length === 0
? `All expected fields present: ${expectedFields.join(', ')}`
: `Missing fields: ${missingFields.join(', ')}`,
};
}
catch (error) {
return {
check: `JSON endpoint ${url}`,
passed: false,
details: error instanceof Error ? error.message : 'Request failed',
};
}
}
/**
* Common error indicators that might be hidden in truncated content
*/
const ERROR_INDICATORS = [
'error', 'failed', 'failure', 'exception', 'denied', 'unauthorized',
'forbidden', 'not found', '404', '500', '502', '503', 'timeout',
'refused', 'rejected', 'invalid', 'expired', 'abort', 'crash',
'fatal', 'critical', 'panic', 'segfault', 'killed', 'terminated',
];
/**
* Check if content that would be truncated might contain error indicators
* Returns a warning message if potential errors are detected in the truncated portion
*/
export function checkTruncationForErrors(fullContent, truncateAt) {
if (fullContent.length <= truncateAt) {
return { shouldWarn: false, warning: '' };
}
const truncatedPortion = fullContent.slice(truncateAt).toLowerCase();
const foundIndicators = ERROR_INDICATORS.filter(indicator => truncatedPortion.includes(indicator));
if (foundIndicators.length > 0) {
return {
shouldWarn: true,
warning: `⚠️ TRUNCATION WARNING: The hidden portion (${fullContent.length - truncateAt} chars) may contain error indicators: ${foundIndicators.join(', ')}. If the operation appears to have failed, review full output.`,
};
}
return { shouldWarn: false, warning: '' };
}
/**
* Safely truncate content with error detection
* Returns truncated content with appropriate warning if errors might be hidden
*/
export function safeTruncate(content, maxLength, _label = 'Content') {
if (content.length <= maxLength) {
return content;
}
const truncation = checkTruncationForErrors(content, maxLength);
const truncatedContent = content.slice(0, maxLength);
const hiddenChars = content.length - maxLength;
let result = `${truncatedContent}\n\n[... ${hiddenChars} characters truncated]`;
if (truncation.shouldWarn) {
result += `\n\n${truncation.warning}`;
}
return result;
}
//# sourceMappingURL=resultVerification.js.map