@ai2070/l0
Version:
L0: The Missing Reliability Substrate for AI
293 lines (292 loc) • 8.11 kB
JavaScript
import {
compareStrings,
compareValues,
deepEqual,
calculateSimilarityScore,
countFields
} from "./utils/comparison";
function evaluate(options) {
const {
expected,
actual,
style = "strict",
threshold = 0.8,
numericTolerance = 1e-3,
ignoreArrayOrder = false,
ignoreExtraFields = style === "lenient",
customComparisons,
metadata
} = options;
const isSchema = isZodSchema(expected);
let differences = [];
let comparisonType = "exact";
let schemaValid = false;
let exactMatch = false;
let structureMatch = false;
let contentSimilarity = 0;
let fieldsCompared = 0;
let fieldsMatched = 0;
if (isSchema) {
comparisonType = "schema";
const schemaResult = validateSchema(expected, actual);
schemaValid = schemaResult.valid;
differences = schemaResult.differences;
fieldsCompared = countFields(actual);
fieldsMatched = schemaValid ? fieldsCompared : 0;
contentSimilarity = schemaValid ? 1 : 0;
exactMatch = schemaValid;
structureMatch = schemaValid;
} else {
exactMatch = deepEqual(expected, actual);
if (exactMatch) {
comparisonType = "exact";
schemaValid = true;
structureMatch = true;
contentSimilarity = 1;
fieldsCompared = countFields(expected);
fieldsMatched = fieldsCompared;
} else {
const expectedType = typeof expected;
const actualType = typeof actual;
if (expectedType === "string" && actualType === "string") {
comparisonType = "fuzzy";
contentSimilarity = compareStrings(
expected,
actual,
{
caseSensitive: true,
normalizeWhitespace: true,
algorithm: "levenshtein"
}
);
fieldsCompared = 1;
fieldsMatched = contentSimilarity >= threshold ? 1 : 0;
if (contentSimilarity < 1) {
differences.push({
path: "",
expected,
actual,
type: "different",
severity: contentSimilarity >= threshold ? "warning" : "error",
message: `Strings differ (${(contentSimilarity * 100).toFixed(0)}% similar)`,
similarity: contentSimilarity
});
}
} else if (expectedType === "number" && actualType === "number") {
comparisonType = "numeric";
const numericMatch = Math.abs(expected - actual) <= numericTolerance;
contentSimilarity = numericMatch ? 1 : 0;
fieldsCompared = 1;
fieldsMatched = numericMatch ? 1 : 0;
if (!numericMatch) {
differences.push({
path: "",
expected,
actual,
type: "different",
severity: "error",
message: `Numbers differ: ${expected} vs ${actual}`
});
}
} else if (expectedType === "object" && actualType === "object" || Array.isArray(expected) && Array.isArray(actual)) {
comparisonType = "structural";
const compOptions = {
style,
ignoreExtraFields,
ignoreArrayOrder,
numericTolerance,
customComparisons
};
differences = compareValues(expected, actual, compOptions, "");
fieldsCompared = countFields(expected);
const errorDiffs = differences.filter((d) => d.severity === "error");
fieldsMatched = fieldsCompared - errorDiffs.length;
structureMatch = errorDiffs.length === 0;
contentSimilarity = calculateSimilarityScore(
differences,
fieldsCompared
);
} else {
comparisonType = "mixed";
differences.push({
path: "",
expected,
actual,
type: "type-mismatch",
severity: "error",
message: `Type mismatch: expected ${expectedType}, got ${actualType}`
});
fieldsCompared = 1;
fieldsMatched = 0;
contentSimilarity = 0;
}
}
}
const score = differences.length === 0 ? 1 : calculateSimilarityScore(differences, fieldsCompared);
const match = style === "strict" ? exactMatch && differences.length === 0 : score >= threshold;
const details = {
exactMatch,
schemaValid,
structureMatch,
contentSimilarity,
fieldsCompared,
fieldsMatched,
comparisonType
};
return {
match,
score,
differences,
details,
metadata
};
}
function evaluateBatch(tests) {
const results = [];
for (const test of tests) {
const result = evaluate({
expected: test.expected,
actual: test.actual,
style: test.style,
threshold: test.threshold,
metadata: test.metadata
});
results.push({
name: test.name,
passed: result.match,
result,
metadata: test.metadata
});
}
const passCount = results.filter((r) => r.passed).length;
const failCount = results.length - passCount;
const passed = failCount === 0;
const averageScore = results.reduce((sum, r) => sum + r.result.score, 0) / results.length;
const summary = {
exactMatches: results.filter((r) => r.result.details.exactMatch).length,
schemaValid: results.filter((r) => r.result.details.schemaValid).length,
fuzzyMatches: results.filter(
(r) => r.passed && !r.result.details.exactMatch && r.result.details.comparisonType === "fuzzy"
).length,
totalDifferences: results.reduce(
(sum, r) => sum + r.result.differences.length,
0
)
};
return {
passed,
passCount,
failCount,
total: results.length,
averageScore,
results,
summary
};
}
function validateSchema(schema, value) {
const result = schema.safeParse(value);
if (result.success) {
return {
valid: true,
data: result.data,
differences: []
};
}
const differences = result.error.errors.map((err) => ({
path: err.path.join("."),
expected: err.message,
actual: value,
type: "schema-violation",
severity: "error",
message: `${err.path.join(".")}: ${err.message}`
}));
return {
valid: false,
errors: result.error,
differences
};
}
function isZodSchema(value) {
return value && typeof value === "object" && "_def" in value && "parse" in value && "safeParse" in value;
}
function assertMatch(options) {
const result = evaluate(options);
if (!result.match) {
const errorMsg = [
"Evaluation failed:",
`Score: ${(result.score * 100).toFixed(1)}%`,
`Differences (${result.differences.length}):`,
...result.differences.map((d) => ` - ${d.path}: ${d.message}`)
].join("\n");
throw new Error(errorMsg);
}
}
function similarity(a, b, options = {}) {
const result = evaluate({
expected: a,
actual: b,
style: "lenient",
...options
});
return result.score;
}
function matches(a, b, options = {}) {
const result = evaluate({
expected: a,
actual: b,
...options
});
return result.match;
}
function getDifferences(expected, actual, options = {}) {
const result = evaluate({
expected,
actual,
...options
});
return result.differences;
}
function createMatcher(expected, options = {}) {
return (actual) => {
return matches(expected, actual, options);
};
}
function snapshot(name, actual, snapshots, options = {}) {
if (!snapshots.has(name)) {
snapshots.set(name, actual);
return {
match: true,
score: 1,
differences: [],
details: {
exactMatch: true,
schemaValid: true,
structureMatch: true,
contentSimilarity: 1,
fieldsCompared: countFields(actual),
fieldsMatched: countFields(actual),
comparisonType: "exact"
},
metadata: { snapshot: "created", name }
};
}
const expected = snapshots.get(name);
return evaluate({
expected,
actual,
...options,
metadata: { ...options.metadata, snapshot: "compared", name }
});
}
export {
assertMatch,
createMatcher,
evaluate,
evaluateBatch,
getDifferences,
matches,
similarity,
snapshot,
validateSchema
};
//# sourceMappingURL=evaluate.js.map