zon-format
Version:
ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors
164 lines (163 loc) • 5.42 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ZonStreamDecoder = exports.ZonStreamEncoder = void 0;
const encoder_1 = require("./encoder");
const decoder_1 = require("./decoder");
const utils_1 = require("./utils");
/**
* Streaming Encoder for ZON format.
* Uses Async Generators to process data chunk by chunk, suitable for large datasets.
*/
class ZonStreamEncoder {
constructor() {
this.hasWrittenHeader = false;
this.columns = null;
this.encoder = new encoder_1.ZonEncoder();
}
/**
* Encodes a stream of objects into ZON format.
* Assumes the stream consists of uniform objects (table format).
*
* @param source - Iterable or AsyncIterable of objects
* @returns AsyncGenerator yielding ZON string chunks
* @throws Error if the source contains non-object items
*/
async *encode(source) {
for await (const item of source) {
if (!this.hasWrittenHeader) {
if (typeof item === 'object' && item !== null && !Array.isArray(item)) {
this.columns = Object.keys(item).sort();
const header = `@:${this.columns.join(',')}`;
yield header;
this.hasWrittenHeader = true;
}
else {
throw new Error("ZonStreamEncoder currently only supports streams of objects (tables).");
}
}
if (this.columns) {
const row = this.columns.map(col => {
const val = item[col];
return this._formatValue(val);
});
yield "\n" + row.join(',');
}
}
}
/**
* Formats a single value for ZON output.
*
* @param val - The value to format
* @returns The formatted string
*/
_formatValue(val) {
if (val === true)
return 'T';
if (val === false)
return 'F';
if (val === null || val === undefined)
return 'null';
if (typeof val === 'number') {
if (Number.isNaN(val) || !Number.isFinite(val))
return 'null';
return val.toString();
}
const s = String(val);
return (0, utils_1.quoteString)(s);
}
}
exports.ZonStreamEncoder = ZonStreamEncoder;
/**
* Streaming Decoder for ZON format.
* Processes string chunks and yields parsed objects.
*/
class ZonStreamDecoder {
constructor() {
this.buffer = '';
this.columns = null;
this.isTable = false;
this.decoder = new decoder_1.ZonDecoder();
}
/**
* Decodes a stream of ZON string chunks into objects.
*
* @param source - Iterable or AsyncIterable of string chunks
* @returns AsyncGenerator yielding parsed objects
*/
async *decode(source) {
for await (const chunk of source) {
this.buffer += chunk;
let newlineIdx;
while ((newlineIdx = this.buffer.indexOf('\n')) !== -1) {
const line = this.buffer.slice(0, newlineIdx).trim();
this.buffer = this.buffer.slice(newlineIdx + 1);
if (!line)
continue;
if (!this.columns) {
if (line.startsWith('@')) {
this.isTable = true;
const parts = line.split(':');
const colPart = parts[parts.length - 1];
this.columns = colPart.split(',');
}
}
else {
const values = this._parseRow(line);
const obj = {};
this.columns.forEach((col, i) => {
if (i < values.length) {
obj[col] = values[i];
}
});
yield obj;
}
}
}
if (this.buffer.trim()) {
const line = this.buffer.trim();
if (this.columns) {
const values = this._parseRow(line);
const obj = {};
this.columns.forEach((col, i) => {
if (i < values.length) {
obj[col] = values[i];
}
});
yield obj;
}
}
}
/**
* Parses a single row of ZON data.
*
* @param line - The line to parse
* @returns Array of parsed values
*/
_parseRow(line) {
const values = [];
let current = '';
let inQuotes = false;
for (let i = 0; i < line.length; i++) {
const char = line[i];
if (char === '"') {
if (inQuotes && line[i + 1] === '"') {
current += '"';
i++;
}
else {
inQuotes = !inQuotes;
}
}
else if (char === ',' && !inQuotes) {
values.push((0, utils_1.parseValue)(current));
current = '';
}
else {
current += char;
}
}
values.push((0, utils_1.parseValue)(current));
return values;
}
}
exports.ZonStreamDecoder = ZonStreamDecoder;