UNPKG

zon-format

Version:

ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors

151 lines (150 loc) 5.22 kB
"use strict"; /** * Built-in Metrics for ZON LLM Evaluation */ Object.defineProperty(exports, "__esModule", { value: true }); exports.BUILTIN_METRICS = exports.latency = exports.hallucination = exports.partialMatch = exports.formatCorrectness = exports.structuralValidity = exports.tokenEfficiency = exports.exactMatch = void 0; exports.registerBuiltinMetrics = registerBuiltinMetrics; const decoder_1 = require("../core/decoder"); const schema_1 = require("../schema/schema"); /** * Exact match metric - checks if answer exactly matches expected */ exports.exactMatch = { name: 'exactMatch', description: 'Percentage of questions answered exactly correctly', compute: (expected, actual, context) => { const normalizedExpected = JSON.stringify(expected); const normalizedActual = JSON.stringify(actual); return normalizedExpected === normalizedActual ? 1.0 : 0.0; }, higherIsBetter: true }; /** * Token efficiency metric - accuracy per 1000 tokens */ exports.tokenEfficiency = { name: 'tokenEfficiency', description: 'Accuracy percentage per 1000 tokens used', compute: (expected, actual, context) => { const isCorrect = JSON.stringify(expected) === JSON.stringify(actual); const accuracy = isCorrect ? 1.0 : 0.0; const tokens = (context === null || context === void 0 ? void 0 : context.tokens) || 1; return (accuracy / tokens) * 1000; }, higherIsBetter: true }; /** * Structural validity metric - checks if output matches schema */ exports.structuralValidity = { name: 'structuralValidity', description: 'Percentage of responses that match the expected schema', compute: (expected, actual, context) => { if (!(context === null || context === void 0 ? void 0 : context.schema)) { return 1.0; } try { const validation = (0, schema_1.validate)(actual, context.schema); return validation.success ? 1.0 : 0.0; } catch (_a) { return 0.0; } }, higherIsBetter: true }; /** * Format correctness metric - checks if output is valid ZON */ exports.formatCorrectness = { name: 'formatCorrectness', description: 'Percentage of responses that parse as valid ZON', compute: (expected, actual, context) => { if (typeof actual !== 'string') { return 1.0; } try { (0, decoder_1.decode)(actual); return 1.0; } catch (_a) { return 0.0; } }, higherIsBetter: true }; /** * Partial match metric - scores based on field-level correctness */ exports.partialMatch = { name: 'partialMatch', description: 'Percentage of fields that match between expected and actual', compute: (expected, actual, context) => { if (typeof expected !== 'object' || typeof actual !== 'object') { return expected === actual ? 1.0 : 0.0; } const expectedKeys = Object.keys(expected); if (expectedKeys.length === 0) return 1.0; let matchCount = 0; for (const key of expectedKeys) { if (key in actual && JSON.stringify(expected[key]) === JSON.stringify(actual[key])) { matchCount++; } } return matchCount / expectedKeys.length; }, higherIsBetter: true }; /** * Hallucination detection metric (placeholder for LLM-as-judge) * * Note: This is a simplified version. In production, this would call * another LLM to judge if the answer contains hallucinations. */ exports.hallucination = { name: 'hallucination', description: 'Score indicating likelihood of hallucination (0 = no hallucination, 1 = definite hallucination)', compute: async (expected, actual, context) => { // Placeholder implementation if (!(context === null || context === void 0 ? void 0 : context.sourceData)) return 0.0; const sourceStr = JSON.stringify(context.sourceData).toLowerCase(); const actualStr = JSON.stringify(actual).toLowerCase(); const containsSourceData = sourceStr.includes(actualStr.substring(0, Math.min(50, actualStr.length))); return containsSourceData ? 0.0 : 0.3; }, higherIsBetter: false }; /** * Latency metric - measures response time */ exports.latency = { name: 'latency', description: 'Average response time in milliseconds', compute: (expected, actual, context) => { return (context === null || context === void 0 ? void 0 : context.latencyMs) || 0; }, higherIsBetter: false }; /** * All built-in metrics */ exports.BUILTIN_METRICS = { exactMatch: exports.exactMatch, tokenEfficiency: exports.tokenEfficiency, structuralValidity: exports.structuralValidity, formatCorrectness: exports.formatCorrectness, partialMatch: exports.partialMatch, hallucination: exports.hallucination, latency: exports.latency }; /** * Register all built-in metrics with an evaluator */ function registerBuiltinMetrics(evaluator) { for (const [name, metric] of Object.entries(exports.BUILTIN_METRICS)) { evaluator.registerMetric(name, metric); } }