zon-format
Version:
ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors
201 lines (200 loc) • 6.26 kB
JavaScript
;
/**
* Helper Utilities for ZON
*
* Useful functions for working with ZON data
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.size = size;
exports.compareFormats = compareFormats;
exports.inferSchema = inferSchema;
exports.compare = compare;
exports.analyze = analyze;
exports.isSafe = isSafe;
const encoder_1 = require("../core/encoder");
const binary_1 = require("../binary");
/**
* Calculate the encoded size of data in different formats
*/
function size(data, format = 'zon') {
switch (format) {
case 'zon':
return (0, encoder_1.encode)(data).length;
case 'binary':
return (0, binary_1.encodeBinary)(data).length;
case 'json':
return JSON.stringify(data).length;
}
}
/**
* Compare sizes across all formats
*/
function compareFormats(data) {
const zonSize = size(data, 'zon');
const binarySize = size(data, 'binary');
const jsonSize = size(data, 'json');
return {
zon: zonSize,
binary: binarySize,
json: jsonSize,
savings: {
zonVsJson: ((1 - zonSize / jsonSize) * 100),
binaryVsJson: ((1 - binarySize / jsonSize) * 100),
binaryVsZon: ((1 - binarySize / zonSize) * 100)
}
};
}
/**
* Infer a basic schema structure from sample data
* Note: Returns a simple object representation, not a full ZonSchema
*/
function inferSchema(data) {
if (data === null || data === undefined) {
return { type: 'null' };
}
if (typeof data === 'boolean') {
return { type: 'boolean' };
}
if (typeof data === 'number') {
return { type: 'number' };
}
if (typeof data === 'string') {
return { type: 'string' };
}
if (Array.isArray(data)) {
if (data.length === 0) {
return { type: 'array', items: { type: 'any' } };
}
// Infer from first element
return { type: 'array', items: inferSchema(data[0]) };
}
if (typeof data === 'object') {
const shape = {};
for (const [key, value] of Object.entries(data)) {
shape[key] = inferSchema(value);
}
return { type: 'object', properties: shape };
}
return { type: 'string' }; // Fallback
}
/**
* Deep comparison of two values
*/
function compare(a, b) {
const differences = [];
function compareRecursive(val1, val2, path = '') {
if (val1 === val2)
return;
if (typeof val1 !== typeof val2) {
differences.push({ path, valueA: val1, valueB: val2 });
return;
}
if (Array.isArray(val1) && Array.isArray(val2)) {
if (val1.length !== val2.length) {
differences.push({ path, valueA: val1, valueB: val2 });
return;
}
for (let i = 0; i < val1.length; i++) {
compareRecursive(val1[i], val2[i], `${path}[${i}]`);
}
return;
}
if (typeof val1 === 'object' && typeof val2 === 'object' && val1 !== null && val2 !== null) {
const keys1 = Object.keys(val1);
const keys2 = Object.keys(val2);
const allKeys = new Set([...keys1, ...keys2]);
for (const key of allKeys) {
const newPath = path ? `${path}.${key}` : key;
if (!(key in val1)) {
differences.push({ path: newPath, valueA: undefined, valueB: val2[key] });
}
else if (!(key in val2)) {
differences.push({ path: newPath, valueA: val1[key], valueB: undefined });
}
else {
compareRecursive(val1[key], val2[key], newPath);
}
}
return;
}
differences.push({ path, valueA: val1, valueB: val2 });
}
compareRecursive(a, b);
return {
equal: differences.length === 0,
differences: differences.length > 0 ? differences : undefined
};
}
/**
* Analyze data structure complexity
*/
function analyze(data) {
const stats = {
maxDepth: 0,
fieldCount: 0,
arrayCount: 0,
objectCount: 0,
primitiveCount: 0,
totalNodes: 0,
types: new Set()
};
function traverse(value, depth) {
stats.maxDepth = Math.max(stats.maxDepth, depth);
stats.totalNodes++;
stats.types.add(typeof value);
if (Array.isArray(value)) {
stats.arrayCount++;
value.forEach(item => traverse(item, depth + 1));
}
else if (typeof value === 'object' && value !== null) {
stats.objectCount++;
const keys = Object.keys(value);
stats.fieldCount += keys.length;
keys.forEach(key => traverse(value[key], depth + 1));
}
else {
stats.primitiveCount++;
}
}
traverse(data, 0);
return {
depth: stats.maxDepth,
fieldCount: stats.fieldCount,
arrayCount: stats.arrayCount,
objectCount: stats.objectCount,
primitiveCount: stats.primitiveCount,
totalNodes: stats.totalNodes,
types: stats.types
};
}
/**
* Check if data is safe for ZON encoding
*/
function isSafe(data) {
const issues = [];
function check(value, path = '') {
if (value === undefined) {
issues.push(`Undefined value at ${path || 'root'}`);
}
if (typeof value === 'function') {
issues.push(`Function at ${path || 'root'} (not serializable)`);
}
if (typeof value === 'symbol') {
issues.push(`Symbol at ${path || 'root'} (not serializable)`);
}
if (Array.isArray(value)) {
value.forEach((item, i) => check(item, `${path}[${i}]`));
}
else if (typeof value === 'object' && value !== null) {
Object.entries(value).forEach(([key, val]) => {
const newPath = path ? `${path}.${key}` : key;
check(val, newPath);
});
}
}
check(data);
return {
safe: issues.length === 0,
issues: issues.length > 0 ? issues : undefined
};
}