UNPKG

@ai2070/l0

Version:

L0: The Missing Reliability Substrate for AI

293 lines (292 loc) 8.11 kB
import { compareStrings, compareValues, deepEqual, calculateSimilarityScore, countFields } from "./utils/comparison"; function evaluate(options) { const { expected, actual, style = "strict", threshold = 0.8, numericTolerance = 1e-3, ignoreArrayOrder = false, ignoreExtraFields = style === "lenient", customComparisons, metadata } = options; const isSchema = isZodSchema(expected); let differences = []; let comparisonType = "exact"; let schemaValid = false; let exactMatch = false; let structureMatch = false; let contentSimilarity = 0; let fieldsCompared = 0; let fieldsMatched = 0; if (isSchema) { comparisonType = "schema"; const schemaResult = validateSchema(expected, actual); schemaValid = schemaResult.valid; differences = schemaResult.differences; fieldsCompared = countFields(actual); fieldsMatched = schemaValid ? fieldsCompared : 0; contentSimilarity = schemaValid ? 1 : 0; exactMatch = schemaValid; structureMatch = schemaValid; } else { exactMatch = deepEqual(expected, actual); if (exactMatch) { comparisonType = "exact"; schemaValid = true; structureMatch = true; contentSimilarity = 1; fieldsCompared = countFields(expected); fieldsMatched = fieldsCompared; } else { const expectedType = typeof expected; const actualType = typeof actual; if (expectedType === "string" && actualType === "string") { comparisonType = "fuzzy"; contentSimilarity = compareStrings( expected, actual, { caseSensitive: true, normalizeWhitespace: true, algorithm: "levenshtein" } ); fieldsCompared = 1; fieldsMatched = contentSimilarity >= threshold ? 1 : 0; if (contentSimilarity < 1) { differences.push({ path: "", expected, actual, type: "different", severity: contentSimilarity >= threshold ? "warning" : "error", message: `Strings differ (${(contentSimilarity * 100).toFixed(0)}% similar)`, similarity: contentSimilarity }); } } else if (expectedType === "number" && actualType === "number") { comparisonType = "numeric"; const numericMatch = Math.abs(expected - actual) <= numericTolerance; contentSimilarity = numericMatch ? 1 : 0; fieldsCompared = 1; fieldsMatched = numericMatch ? 1 : 0; if (!numericMatch) { differences.push({ path: "", expected, actual, type: "different", severity: "error", message: `Numbers differ: ${expected} vs ${actual}` }); } } else if (expectedType === "object" && actualType === "object" || Array.isArray(expected) && Array.isArray(actual)) { comparisonType = "structural"; const compOptions = { style, ignoreExtraFields, ignoreArrayOrder, numericTolerance, customComparisons }; differences = compareValues(expected, actual, compOptions, ""); fieldsCompared = countFields(expected); const errorDiffs = differences.filter((d) => d.severity === "error"); fieldsMatched = fieldsCompared - errorDiffs.length; structureMatch = errorDiffs.length === 0; contentSimilarity = calculateSimilarityScore( differences, fieldsCompared ); } else { comparisonType = "mixed"; differences.push({ path: "", expected, actual, type: "type-mismatch", severity: "error", message: `Type mismatch: expected ${expectedType}, got ${actualType}` }); fieldsCompared = 1; fieldsMatched = 0; contentSimilarity = 0; } } } const score = differences.length === 0 ? 1 : calculateSimilarityScore(differences, fieldsCompared); const match = style === "strict" ? exactMatch && differences.length === 0 : score >= threshold; const details = { exactMatch, schemaValid, structureMatch, contentSimilarity, fieldsCompared, fieldsMatched, comparisonType }; return { match, score, differences, details, metadata }; } function evaluateBatch(tests) { const results = []; for (const test of tests) { const result = evaluate({ expected: test.expected, actual: test.actual, style: test.style, threshold: test.threshold, metadata: test.metadata }); results.push({ name: test.name, passed: result.match, result, metadata: test.metadata }); } const passCount = results.filter((r) => r.passed).length; const failCount = results.length - passCount; const passed = failCount === 0; const averageScore = results.reduce((sum, r) => sum + r.result.score, 0) / results.length; const summary = { exactMatches: results.filter((r) => r.result.details.exactMatch).length, schemaValid: results.filter((r) => r.result.details.schemaValid).length, fuzzyMatches: results.filter( (r) => r.passed && !r.result.details.exactMatch && r.result.details.comparisonType === "fuzzy" ).length, totalDifferences: results.reduce( (sum, r) => sum + r.result.differences.length, 0 ) }; return { passed, passCount, failCount, total: results.length, averageScore, results, summary }; } function validateSchema(schema, value) { const result = schema.safeParse(value); if (result.success) { return { valid: true, data: result.data, differences: [] }; } const differences = result.error.errors.map((err) => ({ path: err.path.join("."), expected: err.message, actual: value, type: "schema-violation", severity: "error", message: `${err.path.join(".")}: ${err.message}` })); return { valid: false, errors: result.error, differences }; } function isZodSchema(value) { return value && typeof value === "object" && "_def" in value && "parse" in value && "safeParse" in value; } function assertMatch(options) { const result = evaluate(options); if (!result.match) { const errorMsg = [ "Evaluation failed:", `Score: ${(result.score * 100).toFixed(1)}%`, `Differences (${result.differences.length}):`, ...result.differences.map((d) => ` - ${d.path}: ${d.message}`) ].join("\n"); throw new Error(errorMsg); } } function similarity(a, b, options = {}) { const result = evaluate({ expected: a, actual: b, style: "lenient", ...options }); return result.score; } function matches(a, b, options = {}) { const result = evaluate({ expected: a, actual: b, ...options }); return result.match; } function getDifferences(expected, actual, options = {}) { const result = evaluate({ expected, actual, ...options }); return result.differences; } function createMatcher(expected, options = {}) { return (actual) => { return matches(expected, actual, options); }; } function snapshot(name, actual, snapshots, options = {}) { if (!snapshots.has(name)) { snapshots.set(name, actual); return { match: true, score: 1, differences: [], details: { exactMatch: true, schemaValid: true, structureMatch: true, contentSimilarity: 1, fieldsCompared: countFields(actual), fieldsMatched: countFields(actual), comparisonType: "exact" }, metadata: { snapshot: "created", name } }; } const expected = snapshots.get(name); return evaluate({ expected, actual, ...options, metadata: { ...options.metadata, snapshot: "compared", name } }); } export { assertMatch, createMatcher, evaluate, evaluateBatch, getDifferences, matches, similarity, snapshot, validateSchema }; //# sourceMappingURL=evaluate.js.map