UNPKG

zon-format

Version:

ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors

136 lines (135 loc) 4.69 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.TypeInferrer = void 0; class TypeInferrer { /** * Infers the ZON type of a value. * * @param value - Value to infer type for * @returns Inferred ZonType */ infer(value) { if (typeof value === 'string') { const trimmed = value.trim(); if (/^(true|false|yes|no|1|0)$/i.test(trimmed)) { return { type: 'boolean', coercible: true, original: 'string' }; } if (/^-?\d+(\.\d+)?([eE][+-]?\d+)?$/.test(trimmed)) { return { type: 'number', coercible: true, original: 'string' }; } if (this._isISODate(trimmed)) { return { type: 'date', coercible: true, original: 'string' }; } if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) { try { JSON.parse(trimmed); return { type: 'json', coercible: true, original: 'string' }; } catch (e) { } } } return { type: typeof value, coercible: false }; } /** * Coerces a value to the target type. * * @param value - Value to coerce * @param targetType - Target type definition * @returns Coerced value or original value */ coerce(value, targetType) { if (!targetType.coercible) return value; switch (targetType.type) { case 'number': return Number(value); case 'boolean': return /^(true|yes|1)$/i.test(String(value)); case 'date': return new Date(value); case 'json': try { return JSON.parse(value); } catch (e) { return value; } default: return value; } } /** * Infers the dominant type for a column of values. * * @param values - Array of values * @returns Dominant ZonType */ inferColumnType(values) { const nonNullValues = values.filter(v => v !== null && v !== undefined); const total = nonNullValues.length; if (total === 0) return { type: 'undefined', coercible: false }; const booleanCount = nonNullValues.filter(v => this._isBoolean(v)).length; if (booleanCount / total >= 0.8) { return { type: 'boolean', coercible: true, confidence: booleanCount / total }; } const numberCount = nonNullValues.filter(v => this._isNumber(v)).length; if (numberCount / total >= 0.8) { return { type: 'number', coercible: true, confidence: numberCount / total }; } const dateCount = nonNullValues.filter(v => this._isDate(v)).length; if (dateCount / total >= 0.8) { return { type: 'date', coercible: true, confidence: dateCount / total }; } const jsonCount = nonNullValues.filter(v => this._isJSON(v)).length; if (jsonCount / total >= 0.8) { return { type: 'json', coercible: true, confidence: jsonCount / total }; } return { type: 'mixed', coercible: false }; } _isNumber(v) { if (typeof v === 'number') return true; if (typeof v === 'string') return /^-?\d+(\.\d+)?([eE][+-]?\d+)?$/.test(v.trim()); return false; } _isBoolean(v) { if (typeof v === 'boolean') return true; if (typeof v === 'string') return /^(true|false|yes|no|1|0)$/i.test(v.trim()); return false; } _isDate(v) { if (v instanceof Date) return true; if (typeof v === 'string') return this._isISODate(v.trim()); return false; } _isJSON(v) { if (typeof v === 'object') return true; if (typeof v === 'string') { const trimmed = v.trim(); if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) { try { JSON.parse(trimmed); return true; } catch (e) { return false; } } } return false; } _isISODate(s) { return /^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{3})?(Z|[-+]\d{2}:?\d{2})?)?$/.test(s); } } exports.TypeInferrer = TypeInferrer;