zon-format
Version:
ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors
136 lines (135 loc) • 4.69 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.TypeInferrer = void 0;
class TypeInferrer {
/**
* Infers the ZON type of a value.
*
* @param value - Value to infer type for
* @returns Inferred ZonType
*/
infer(value) {
if (typeof value === 'string') {
const trimmed = value.trim();
if (/^(true|false|yes|no|1|0)$/i.test(trimmed)) {
return { type: 'boolean', coercible: true, original: 'string' };
}
if (/^-?\d+(\.\d+)?([eE][+-]?\d+)?$/.test(trimmed)) {
return { type: 'number', coercible: true, original: 'string' };
}
if (this._isISODate(trimmed)) {
return { type: 'date', coercible: true, original: 'string' };
}
if ((trimmed.startsWith('{') && trimmed.endsWith('}')) ||
(trimmed.startsWith('[') && trimmed.endsWith(']'))) {
try {
JSON.parse(trimmed);
return { type: 'json', coercible: true, original: 'string' };
}
catch (e) {
}
}
}
return { type: typeof value, coercible: false };
}
/**
* Coerces a value to the target type.
*
* @param value - Value to coerce
* @param targetType - Target type definition
* @returns Coerced value or original value
*/
coerce(value, targetType) {
if (!targetType.coercible)
return value;
switch (targetType.type) {
case 'number':
return Number(value);
case 'boolean':
return /^(true|yes|1)$/i.test(String(value));
case 'date':
return new Date(value);
case 'json':
try {
return JSON.parse(value);
}
catch (e) {
return value;
}
default:
return value;
}
}
/**
* Infers the dominant type for a column of values.
*
* @param values - Array of values
* @returns Dominant ZonType
*/
inferColumnType(values) {
const nonNullValues = values.filter(v => v !== null && v !== undefined);
const total = nonNullValues.length;
if (total === 0)
return { type: 'undefined', coercible: false };
const booleanCount = nonNullValues.filter(v => this._isBoolean(v)).length;
if (booleanCount / total >= 0.8) {
return { type: 'boolean', coercible: true, confidence: booleanCount / total };
}
const numberCount = nonNullValues.filter(v => this._isNumber(v)).length;
if (numberCount / total >= 0.8) {
return { type: 'number', coercible: true, confidence: numberCount / total };
}
const dateCount = nonNullValues.filter(v => this._isDate(v)).length;
if (dateCount / total >= 0.8) {
return { type: 'date', coercible: true, confidence: dateCount / total };
}
const jsonCount = nonNullValues.filter(v => this._isJSON(v)).length;
if (jsonCount / total >= 0.8) {
return { type: 'json', coercible: true, confidence: jsonCount / total };
}
return { type: 'mixed', coercible: false };
}
_isNumber(v) {
if (typeof v === 'number')
return true;
if (typeof v === 'string')
return /^-?\d+(\.\d+)?([eE][+-]?\d+)?$/.test(v.trim());
return false;
}
_isBoolean(v) {
if (typeof v === 'boolean')
return true;
if (typeof v === 'string')
return /^(true|false|yes|no|1|0)$/i.test(v.trim());
return false;
}
_isDate(v) {
if (v instanceof Date)
return true;
if (typeof v === 'string')
return this._isISODate(v.trim());
return false;
}
_isJSON(v) {
if (typeof v === 'object')
return true;
if (typeof v === 'string') {
const trimmed = v.trim();
if ((trimmed.startsWith('{') && trimmed.endsWith('}')) ||
(trimmed.startsWith('[') && trimmed.endsWith(']'))) {
try {
JSON.parse(trimmed);
return true;
}
catch (e) {
return false;
}
}
}
return false;
}
_isISODate(s) {
return /^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{3})?(Z|[-+]\d{2}:?\d{2})?)?$/.test(s);
}
}
exports.TypeInferrer = TypeInferrer;