UNPKG

zon-format

Version:

ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors

201 lines (200 loc) 6.26 kB
"use strict"; /** * Helper Utilities for ZON * * Useful functions for working with ZON data */ Object.defineProperty(exports, "__esModule", { value: true }); exports.size = size; exports.compareFormats = compareFormats; exports.inferSchema = inferSchema; exports.compare = compare; exports.analyze = analyze; exports.isSafe = isSafe; const encoder_1 = require("../core/encoder"); const binary_1 = require("../binary"); /** * Calculate the encoded size of data in different formats */ function size(data, format = 'zon') { switch (format) { case 'zon': return (0, encoder_1.encode)(data).length; case 'binary': return (0, binary_1.encodeBinary)(data).length; case 'json': return JSON.stringify(data).length; } } /** * Compare sizes across all formats */ function compareFormats(data) { const zonSize = size(data, 'zon'); const binarySize = size(data, 'binary'); const jsonSize = size(data, 'json'); return { zon: zonSize, binary: binarySize, json: jsonSize, savings: { zonVsJson: ((1 - zonSize / jsonSize) * 100), binaryVsJson: ((1 - binarySize / jsonSize) * 100), binaryVsZon: ((1 - binarySize / zonSize) * 100) } }; } /** * Infer a basic schema structure from sample data * Note: Returns a simple object representation, not a full ZonSchema */ function inferSchema(data) { if (data === null || data === undefined) { return { type: 'null' }; } if (typeof data === 'boolean') { return { type: 'boolean' }; } if (typeof data === 'number') { return { type: 'number' }; } if (typeof data === 'string') { return { type: 'string' }; } if (Array.isArray(data)) { if (data.length === 0) { return { type: 'array', items: { type: 'any' } }; } // Infer from first element return { type: 'array', items: inferSchema(data[0]) }; } if (typeof data === 'object') { const shape = {}; for (const [key, value] of Object.entries(data)) { shape[key] = inferSchema(value); } return { type: 'object', properties: shape }; } return { type: 'string' }; // Fallback } /** * Deep comparison of two values */ function compare(a, b) { const differences = []; function compareRecursive(val1, val2, path = '') { if (val1 === val2) return; if (typeof val1 !== typeof val2) { differences.push({ path, valueA: val1, valueB: val2 }); return; } if (Array.isArray(val1) && Array.isArray(val2)) { if (val1.length !== val2.length) { differences.push({ path, valueA: val1, valueB: val2 }); return; } for (let i = 0; i < val1.length; i++) { compareRecursive(val1[i], val2[i], `${path}[${i}]`); } return; } if (typeof val1 === 'object' && typeof val2 === 'object' && val1 !== null && val2 !== null) { const keys1 = Object.keys(val1); const keys2 = Object.keys(val2); const allKeys = new Set([...keys1, ...keys2]); for (const key of allKeys) { const newPath = path ? `${path}.${key}` : key; if (!(key in val1)) { differences.push({ path: newPath, valueA: undefined, valueB: val2[key] }); } else if (!(key in val2)) { differences.push({ path: newPath, valueA: val1[key], valueB: undefined }); } else { compareRecursive(val1[key], val2[key], newPath); } } return; } differences.push({ path, valueA: val1, valueB: val2 }); } compareRecursive(a, b); return { equal: differences.length === 0, differences: differences.length > 0 ? differences : undefined }; } /** * Analyze data structure complexity */ function analyze(data) { const stats = { maxDepth: 0, fieldCount: 0, arrayCount: 0, objectCount: 0, primitiveCount: 0, totalNodes: 0, types: new Set() }; function traverse(value, depth) { stats.maxDepth = Math.max(stats.maxDepth, depth); stats.totalNodes++; stats.types.add(typeof value); if (Array.isArray(value)) { stats.arrayCount++; value.forEach(item => traverse(item, depth + 1)); } else if (typeof value === 'object' && value !== null) { stats.objectCount++; const keys = Object.keys(value); stats.fieldCount += keys.length; keys.forEach(key => traverse(value[key], depth + 1)); } else { stats.primitiveCount++; } } traverse(data, 0); return { depth: stats.maxDepth, fieldCount: stats.fieldCount, arrayCount: stats.arrayCount, objectCount: stats.objectCount, primitiveCount: stats.primitiveCount, totalNodes: stats.totalNodes, types: stats.types }; } /** * Check if data is safe for ZON encoding */ function isSafe(data) { const issues = []; function check(value, path = '') { if (value === undefined) { issues.push(`Undefined value at ${path || 'root'}`); } if (typeof value === 'function') { issues.push(`Function at ${path || 'root'} (not serializable)`); } if (typeof value === 'symbol') { issues.push(`Symbol at ${path || 'root'} (not serializable)`); } if (Array.isArray(value)) { value.forEach((item, i) => check(item, `${path}[${i}]`)); } else if (typeof value === 'object' && value !== null) { Object.entries(value).forEach(([key, val]) => { const newPath = path ? `${path}.${key}` : key; check(val, newPath); }); } } check(data); return { safe: issues.length === 0, issues: issues.length > 0 ? issues : undefined }; }