UNPKG

zon-format

Version:

ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors

164 lines (163 loc) 5.42 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ZonStreamDecoder = exports.ZonStreamEncoder = void 0; const encoder_1 = require("./encoder"); const decoder_1 = require("./decoder"); const utils_1 = require("./utils"); /** * Streaming Encoder for ZON format. * Uses Async Generators to process data chunk by chunk, suitable for large datasets. */ class ZonStreamEncoder { constructor() { this.hasWrittenHeader = false; this.columns = null; this.encoder = new encoder_1.ZonEncoder(); } /** * Encodes a stream of objects into ZON format. * Assumes the stream consists of uniform objects (table format). * * @param source - Iterable or AsyncIterable of objects * @returns AsyncGenerator yielding ZON string chunks * @throws Error if the source contains non-object items */ async *encode(source) { for await (const item of source) { if (!this.hasWrittenHeader) { if (typeof item === 'object' && item !== null && !Array.isArray(item)) { this.columns = Object.keys(item).sort(); const header = `@:${this.columns.join(',')}`; yield header; this.hasWrittenHeader = true; } else { throw new Error("ZonStreamEncoder currently only supports streams of objects (tables)."); } } if (this.columns) { const row = this.columns.map(col => { const val = item[col]; return this._formatValue(val); }); yield "\n" + row.join(','); } } } /** * Formats a single value for ZON output. * * @param val - The value to format * @returns The formatted string */ _formatValue(val) { if (val === true) return 'T'; if (val === false) return 'F'; if (val === null || val === undefined) return 'null'; if (typeof val === 'number') { if (Number.isNaN(val) || !Number.isFinite(val)) return 'null'; return val.toString(); } const s = String(val); return (0, utils_1.quoteString)(s); } } exports.ZonStreamEncoder = ZonStreamEncoder; /** * Streaming Decoder for ZON format. * Processes string chunks and yields parsed objects. */ class ZonStreamDecoder { constructor() { this.buffer = ''; this.columns = null; this.isTable = false; this.decoder = new decoder_1.ZonDecoder(); } /** * Decodes a stream of ZON string chunks into objects. * * @param source - Iterable or AsyncIterable of string chunks * @returns AsyncGenerator yielding parsed objects */ async *decode(source) { for await (const chunk of source) { this.buffer += chunk; let newlineIdx; while ((newlineIdx = this.buffer.indexOf('\n')) !== -1) { const line = this.buffer.slice(0, newlineIdx).trim(); this.buffer = this.buffer.slice(newlineIdx + 1); if (!line) continue; if (!this.columns) { if (line.startsWith('@')) { this.isTable = true; const parts = line.split(':'); const colPart = parts[parts.length - 1]; this.columns = colPart.split(','); } } else { const values = this._parseRow(line); const obj = {}; this.columns.forEach((col, i) => { if (i < values.length) { obj[col] = values[i]; } }); yield obj; } } } if (this.buffer.trim()) { const line = this.buffer.trim(); if (this.columns) { const values = this._parseRow(line); const obj = {}; this.columns.forEach((col, i) => { if (i < values.length) { obj[col] = values[i]; } }); yield obj; } } } /** * Parses a single row of ZON data. * * @param line - The line to parse * @returns Array of parsed values */ _parseRow(line) { const values = []; let current = ''; let inQuotes = false; for (let i = 0; i < line.length; i++) { const char = line[i]; if (char === '"') { if (inQuotes && line[i + 1] === '"') { current += '"'; i++; } else { inQuotes = !inQuotes; } } else if (char === ',' && !inQuotes) { values.push((0, utils_1.parseValue)(current)); current = ''; } else { current += char; } } values.push((0, utils_1.parseValue)(current)); return values; } } exports.ZonStreamDecoder = ZonStreamDecoder;