@ai2070/l0
Version:
L0: The Missing Reliability Substrate for AI
270 lines • 9.32 kB
JavaScript
import { compareStrings, compareValues, deepEqual, calculateSimilarityScore, countFields, } from "./utils/comparison";
export function evaluate(options) {
const { expected, actual, style = "strict", threshold = 0.8, numericTolerance = 0.001, ignoreArrayOrder = false, ignoreExtraFields = style === "lenient", customComparisons, metadata, } = options;
const isSchema = isZodSchema(expected);
let differences = [];
let comparisonType = "exact";
let schemaValid = false;
let exactMatch = false;
let structureMatch = false;
let contentSimilarity = 0;
let fieldsCompared = 0;
let fieldsMatched = 0;
if (isSchema) {
comparisonType = "schema";
const schemaResult = validateSchema(expected, actual);
schemaValid = schemaResult.valid;
differences = schemaResult.differences;
fieldsCompared = countFields(actual);
fieldsMatched = schemaValid ? fieldsCompared : 0;
contentSimilarity = schemaValid ? 1.0 : 0;
exactMatch = schemaValid;
structureMatch = schemaValid;
}
else {
exactMatch = deepEqual(expected, actual);
if (exactMatch) {
comparisonType = "exact";
schemaValid = true;
structureMatch = true;
contentSimilarity = 1.0;
fieldsCompared = countFields(expected);
fieldsMatched = fieldsCompared;
}
else {
const expectedType = typeof expected;
const actualType = typeof actual;
if (expectedType === "string" && actualType === "string") {
comparisonType = "fuzzy";
contentSimilarity = compareStrings(expected, actual, {
caseSensitive: true,
normalizeWhitespace: true,
algorithm: "levenshtein",
});
fieldsCompared = 1;
fieldsMatched = contentSimilarity >= threshold ? 1 : 0;
if (contentSimilarity < 1.0) {
differences.push({
path: "",
expected,
actual,
type: "different",
severity: contentSimilarity >= threshold ? "warning" : "error",
message: `Strings differ (${(contentSimilarity * 100).toFixed(0)}% similar)`,
similarity: contentSimilarity,
});
}
}
else if (expectedType === "number" && actualType === "number") {
comparisonType = "numeric";
const numericMatch = Math.abs(expected - actual) <=
numericTolerance;
contentSimilarity = numericMatch ? 1.0 : 0;
fieldsCompared = 1;
fieldsMatched = numericMatch ? 1 : 0;
if (!numericMatch) {
differences.push({
path: "",
expected,
actual,
type: "different",
severity: "error",
message: `Numbers differ: ${expected} vs ${actual}`,
});
}
}
else if ((expectedType === "object" && actualType === "object") ||
(Array.isArray(expected) && Array.isArray(actual))) {
comparisonType = "structural";
const compOptions = {
style,
ignoreExtraFields,
ignoreArrayOrder,
numericTolerance,
customComparisons,
};
differences = compareValues(expected, actual, compOptions, "");
fieldsCompared = countFields(expected);
const errorDiffs = differences.filter((d) => d.severity === "error");
fieldsMatched = fieldsCompared - errorDiffs.length;
structureMatch = errorDiffs.length === 0;
contentSimilarity = calculateSimilarityScore(differences, fieldsCompared);
}
else {
comparisonType = "mixed";
differences.push({
path: "",
expected,
actual,
type: "type-mismatch",
severity: "error",
message: `Type mismatch: expected ${expectedType}, got ${actualType}`,
});
fieldsCompared = 1;
fieldsMatched = 0;
contentSimilarity = 0;
}
}
}
const score = differences.length === 0
? 1.0
: calculateSimilarityScore(differences, fieldsCompared);
const match = style === "strict"
? exactMatch && differences.length === 0
: score >= threshold;
const details = {
exactMatch,
schemaValid,
structureMatch,
contentSimilarity,
fieldsCompared,
fieldsMatched,
comparisonType,
};
return {
match,
score,
differences,
details,
metadata,
};
}
export function evaluateBatch(tests) {
const results = [];
for (const test of tests) {
const result = evaluate({
expected: test.expected,
actual: test.actual,
style: test.style,
threshold: test.threshold,
metadata: test.metadata,
});
results.push({
name: test.name,
passed: result.match,
result,
metadata: test.metadata,
});
}
const passCount = results.filter((r) => r.passed).length;
const failCount = results.length - passCount;
const passed = failCount === 0;
const averageScore = results.reduce((sum, r) => sum + r.result.score, 0) / results.length;
const summary = {
exactMatches: results.filter((r) => r.result.details.exactMatch).length,
schemaValid: results.filter((r) => r.result.details.schemaValid).length,
fuzzyMatches: results.filter((r) => r.passed &&
!r.result.details.exactMatch &&
r.result.details.comparisonType === "fuzzy").length,
totalDifferences: results.reduce((sum, r) => sum + r.result.differences.length, 0),
};
return {
passed,
passCount,
failCount,
total: results.length,
averageScore,
results,
summary,
};
}
export function validateSchema(schema, value) {
const result = schema.safeParse(value);
if (result.success) {
return {
valid: true,
data: result.data,
differences: [],
};
}
const differences = result.error.errors.map((err) => ({
path: err.path.join("."),
expected: err.message,
actual: value,
type: "schema-violation",
severity: "error",
message: `${err.path.join(".")}: ${err.message}`,
}));
return {
valid: false,
errors: result.error,
differences,
};
}
function isZodSchema(value) {
return (value &&
typeof value === "object" &&
"_def" in value &&
"parse" in value &&
"safeParse" in value);
}
export function assertMatch(options) {
const result = evaluate(options);
if (!result.match) {
const errorMsg = [
"Evaluation failed:",
`Score: ${(result.score * 100).toFixed(1)}%`,
`Differences (${result.differences.length}):`,
...result.differences.map((d) => ` - ${d.path}: ${d.message}`),
].join("\n");
throw new Error(errorMsg);
}
}
export function similarity(a, b, options = {}) {
const result = evaluate({
expected: a,
actual: b,
style: "lenient",
...options,
});
return result.score;
}
export function matches(a, b, options = {}) {
const result = evaluate({
expected: a,
actual: b,
...options,
});
return result.match;
}
export function getDifferences(expected, actual, options = {}) {
const result = evaluate({
expected,
actual,
...options,
});
return result.differences;
}
export function createMatcher(expected, options = {}) {
return (actual) => {
return matches(expected, actual, options);
};
}
export function snapshot(name, actual, snapshots, options = {}) {
if (!snapshots.has(name)) {
snapshots.set(name, actual);
return {
match: true,
score: 1.0,
differences: [],
details: {
exactMatch: true,
schemaValid: true,
structureMatch: true,
contentSimilarity: 1.0,
fieldsCompared: countFields(actual),
fieldsMatched: countFields(actual),
comparisonType: "exact",
},
metadata: { snapshot: "created", name },
};
}
const expected = snapshots.get(name);
return evaluate({
expected,
actual,
...options,
metadata: { ...options.metadata, snapshot: "compared", name },
});
}
//# sourceMappingURL=evaluate.js.map