zon-format
Version:
ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors
151 lines (150 loc) • 5.22 kB
JavaScript
;
/**
* Built-in Metrics for ZON LLM Evaluation
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.BUILTIN_METRICS = exports.latency = exports.hallucination = exports.partialMatch = exports.formatCorrectness = exports.structuralValidity = exports.tokenEfficiency = exports.exactMatch = void 0;
exports.registerBuiltinMetrics = registerBuiltinMetrics;
const decoder_1 = require("../core/decoder");
const schema_1 = require("../schema/schema");
/**
* Exact match metric - checks if answer exactly matches expected
*/
exports.exactMatch = {
name: 'exactMatch',
description: 'Percentage of questions answered exactly correctly',
compute: (expected, actual, context) => {
const normalizedExpected = JSON.stringify(expected);
const normalizedActual = JSON.stringify(actual);
return normalizedExpected === normalizedActual ? 1.0 : 0.0;
},
higherIsBetter: true
};
/**
* Token efficiency metric - accuracy per 1000 tokens
*/
exports.tokenEfficiency = {
name: 'tokenEfficiency',
description: 'Accuracy percentage per 1000 tokens used',
compute: (expected, actual, context) => {
const isCorrect = JSON.stringify(expected) === JSON.stringify(actual);
const accuracy = isCorrect ? 1.0 : 0.0;
const tokens = (context === null || context === void 0 ? void 0 : context.tokens) || 1;
return (accuracy / tokens) * 1000;
},
higherIsBetter: true
};
/**
* Structural validity metric - checks if output matches schema
*/
exports.structuralValidity = {
name: 'structuralValidity',
description: 'Percentage of responses that match the expected schema',
compute: (expected, actual, context) => {
if (!(context === null || context === void 0 ? void 0 : context.schema)) {
return 1.0;
}
try {
const validation = (0, schema_1.validate)(actual, context.schema);
return validation.success ? 1.0 : 0.0;
}
catch (_a) {
return 0.0;
}
},
higherIsBetter: true
};
/**
* Format correctness metric - checks if output is valid ZON
*/
exports.formatCorrectness = {
name: 'formatCorrectness',
description: 'Percentage of responses that parse as valid ZON',
compute: (expected, actual, context) => {
if (typeof actual !== 'string') {
return 1.0;
}
try {
(0, decoder_1.decode)(actual);
return 1.0;
}
catch (_a) {
return 0.0;
}
},
higherIsBetter: true
};
/**
* Partial match metric - scores based on field-level correctness
*/
exports.partialMatch = {
name: 'partialMatch',
description: 'Percentage of fields that match between expected and actual',
compute: (expected, actual, context) => {
if (typeof expected !== 'object' || typeof actual !== 'object') {
return expected === actual ? 1.0 : 0.0;
}
const expectedKeys = Object.keys(expected);
if (expectedKeys.length === 0)
return 1.0;
let matchCount = 0;
for (const key of expectedKeys) {
if (key in actual && JSON.stringify(expected[key]) === JSON.stringify(actual[key])) {
matchCount++;
}
}
return matchCount / expectedKeys.length;
},
higherIsBetter: true
};
/**
* Hallucination detection metric (placeholder for LLM-as-judge)
*
* Note: This is a simplified version. In production, this would call
* another LLM to judge if the answer contains hallucinations.
*/
exports.hallucination = {
name: 'hallucination',
description: 'Score indicating likelihood of hallucination (0 = no hallucination, 1 = definite hallucination)',
compute: async (expected, actual, context) => {
// Placeholder implementation
if (!(context === null || context === void 0 ? void 0 : context.sourceData))
return 0.0;
const sourceStr = JSON.stringify(context.sourceData).toLowerCase();
const actualStr = JSON.stringify(actual).toLowerCase();
const containsSourceData = sourceStr.includes(actualStr.substring(0, Math.min(50, actualStr.length)));
return containsSourceData ? 0.0 : 0.3;
},
higherIsBetter: false
};
/**
* Latency metric - measures response time
*/
exports.latency = {
name: 'latency',
description: 'Average response time in milliseconds',
compute: (expected, actual, context) => {
return (context === null || context === void 0 ? void 0 : context.latencyMs) || 0;
},
higherIsBetter: false
};
/**
* All built-in metrics
*/
exports.BUILTIN_METRICS = {
exactMatch: exports.exactMatch,
tokenEfficiency: exports.tokenEfficiency,
structuralValidity: exports.structuralValidity,
formatCorrectness: exports.formatCorrectness,
partialMatch: exports.partialMatch,
hallucination: exports.hallucination,
latency: exports.latency
};
/**
* Register all built-in metrics with an evaluator
*/
function registerBuiltinMetrics(evaluator) {
for (const [name, metric] of Object.entries(exports.BUILTIN_METRICS)) {
evaluator.registerMetric(name, metric);
}
}