UNPKG

@ai2070/l0

Version:

L0: The Missing Reliability Substrate for AI

270 lines 9.32 kB
import { compareStrings, compareValues, deepEqual, calculateSimilarityScore, countFields, } from "./utils/comparison"; export function evaluate(options) { const { expected, actual, style = "strict", threshold = 0.8, numericTolerance = 0.001, ignoreArrayOrder = false, ignoreExtraFields = style === "lenient", customComparisons, metadata, } = options; const isSchema = isZodSchema(expected); let differences = []; let comparisonType = "exact"; let schemaValid = false; let exactMatch = false; let structureMatch = false; let contentSimilarity = 0; let fieldsCompared = 0; let fieldsMatched = 0; if (isSchema) { comparisonType = "schema"; const schemaResult = validateSchema(expected, actual); schemaValid = schemaResult.valid; differences = schemaResult.differences; fieldsCompared = countFields(actual); fieldsMatched = schemaValid ? fieldsCompared : 0; contentSimilarity = schemaValid ? 1.0 : 0; exactMatch = schemaValid; structureMatch = schemaValid; } else { exactMatch = deepEqual(expected, actual); if (exactMatch) { comparisonType = "exact"; schemaValid = true; structureMatch = true; contentSimilarity = 1.0; fieldsCompared = countFields(expected); fieldsMatched = fieldsCompared; } else { const expectedType = typeof expected; const actualType = typeof actual; if (expectedType === "string" && actualType === "string") { comparisonType = "fuzzy"; contentSimilarity = compareStrings(expected, actual, { caseSensitive: true, normalizeWhitespace: true, algorithm: "levenshtein", }); fieldsCompared = 1; fieldsMatched = contentSimilarity >= threshold ? 1 : 0; if (contentSimilarity < 1.0) { differences.push({ path: "", expected, actual, type: "different", severity: contentSimilarity >= threshold ? "warning" : "error", message: `Strings differ (${(contentSimilarity * 100).toFixed(0)}% similar)`, similarity: contentSimilarity, }); } } else if (expectedType === "number" && actualType === "number") { comparisonType = "numeric"; const numericMatch = Math.abs(expected - actual) <= numericTolerance; contentSimilarity = numericMatch ? 1.0 : 0; fieldsCompared = 1; fieldsMatched = numericMatch ? 1 : 0; if (!numericMatch) { differences.push({ path: "", expected, actual, type: "different", severity: "error", message: `Numbers differ: ${expected} vs ${actual}`, }); } } else if ((expectedType === "object" && actualType === "object") || (Array.isArray(expected) && Array.isArray(actual))) { comparisonType = "structural"; const compOptions = { style, ignoreExtraFields, ignoreArrayOrder, numericTolerance, customComparisons, }; differences = compareValues(expected, actual, compOptions, ""); fieldsCompared = countFields(expected); const errorDiffs = differences.filter((d) => d.severity === "error"); fieldsMatched = fieldsCompared - errorDiffs.length; structureMatch = errorDiffs.length === 0; contentSimilarity = calculateSimilarityScore(differences, fieldsCompared); } else { comparisonType = "mixed"; differences.push({ path: "", expected, actual, type: "type-mismatch", severity: "error", message: `Type mismatch: expected ${expectedType}, got ${actualType}`, }); fieldsCompared = 1; fieldsMatched = 0; contentSimilarity = 0; } } } const score = differences.length === 0 ? 1.0 : calculateSimilarityScore(differences, fieldsCompared); const match = style === "strict" ? exactMatch && differences.length === 0 : score >= threshold; const details = { exactMatch, schemaValid, structureMatch, contentSimilarity, fieldsCompared, fieldsMatched, comparisonType, }; return { match, score, differences, details, metadata, }; } export function evaluateBatch(tests) { const results = []; for (const test of tests) { const result = evaluate({ expected: test.expected, actual: test.actual, style: test.style, threshold: test.threshold, metadata: test.metadata, }); results.push({ name: test.name, passed: result.match, result, metadata: test.metadata, }); } const passCount = results.filter((r) => r.passed).length; const failCount = results.length - passCount; const passed = failCount === 0; const averageScore = results.reduce((sum, r) => sum + r.result.score, 0) / results.length; const summary = { exactMatches: results.filter((r) => r.result.details.exactMatch).length, schemaValid: results.filter((r) => r.result.details.schemaValid).length, fuzzyMatches: results.filter((r) => r.passed && !r.result.details.exactMatch && r.result.details.comparisonType === "fuzzy").length, totalDifferences: results.reduce((sum, r) => sum + r.result.differences.length, 0), }; return { passed, passCount, failCount, total: results.length, averageScore, results, summary, }; } export function validateSchema(schema, value) { const result = schema.safeParse(value); if (result.success) { return { valid: true, data: result.data, differences: [], }; } const differences = result.error.errors.map((err) => ({ path: err.path.join("."), expected: err.message, actual: value, type: "schema-violation", severity: "error", message: `${err.path.join(".")}: ${err.message}`, })); return { valid: false, errors: result.error, differences, }; } function isZodSchema(value) { return (value && typeof value === "object" && "_def" in value && "parse" in value && "safeParse" in value); } export function assertMatch(options) { const result = evaluate(options); if (!result.match) { const errorMsg = [ "Evaluation failed:", `Score: ${(result.score * 100).toFixed(1)}%`, `Differences (${result.differences.length}):`, ...result.differences.map((d) => ` - ${d.path}: ${d.message}`), ].join("\n"); throw new Error(errorMsg); } } export function similarity(a, b, options = {}) { const result = evaluate({ expected: a, actual: b, style: "lenient", ...options, }); return result.score; } export function matches(a, b, options = {}) { const result = evaluate({ expected: a, actual: b, ...options, }); return result.match; } export function getDifferences(expected, actual, options = {}) { const result = evaluate({ expected, actual, ...options, }); return result.differences; } export function createMatcher(expected, options = {}) { return (actual) => { return matches(expected, actual, options); }; } export function snapshot(name, actual, snapshots, options = {}) { if (!snapshots.has(name)) { snapshots.set(name, actual); return { match: true, score: 1.0, differences: [], details: { exactMatch: true, schemaValid: true, structureMatch: true, contentSimilarity: 1.0, fieldsCompared: countFields(actual), fieldsMatched: countFields(actual), comparisonType: "exact", }, metadata: { snapshot: "created", name }, }; } const expected = snapshots.get(name); return evaluate({ expected, actual, ...options, metadata: { ...options.metadata, snapshot: "compared", name }, }); } //# sourceMappingURL=evaluate.js.map