zon-format
Version:
ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors
200 lines (199 loc) • 6.15 kB
TypeScript
export interface EncodeOptions {
/** Anchor interval for streaming (default: 100) */
anchorInterval?: number;
/** Enable dictionary compression (default: true) */
enableDictCompression?: boolean;
/** Enable type coercion (default: false) */
enableTypeCoercion?: boolean;
/** Embed version metadata in output (default: false) */
embedMetadata?: boolean;
/** Version string to embed (default: "1.3.0") */
version?: string;
/** Schema ID to embed */
schemaId?: string;
/** Disable table formatting (default: false) */
disableTables?: boolean;
}
/**
* Encodes data structures into ZON format v1.3.0.
*/
export declare class ZonEncoder {
private anchor_interval;
private safe_str_re;
private enableDictionaryCompression;
private enableTypeCoercion;
private disableTables;
private typeInferrer;
constructor(anchorInterval?: number, enableDictCompression?: boolean, enableTypeCoercion?: boolean, disableTables?: boolean);
/**
* Encodes data to ZON format.
* When disableTables is true, bypasses table generation and formats data directly.
*
* @param data - Data to encode
* @param options - Optional encoding options
* @returns ZON formatted string
*/
encode(data: any, options?: EncodeOptions): string;
/**
* Extracts all uniform arrays that should become tables.
*
* @param data - Input data
* @returns Tuple of [streams Map, metadata]
*/
private _extractStreams;
/**
* Writes metadata section in YAML-like format.
*
* @param metadata - Metadata object
* @returns Array of formatted lines
*/
private _writeMetadata;
/**
* Writes table data with adaptive encoding strategy.
*
* @param stream - Array of data objects
* @param key - Table key name
* @returns Array of formatted lines
*/
private _writeTable;
/**
* Writes standard table format.
*
* @param flatStream - Flattened data rows
* @param cols - Column names
* @param rowCount - Number of rows
* @param key - Table key
* @returns Array of formatted lines
*/
private _writeStandardTable;
/**
* Writes sparse table format for semi-uniform data.
*
* @param flatStream - Flattened data rows
* @param coreColumns - Core column names
* @param optionalColumns - Optional column names
* @param rowCount - Number of rows
* @param key - Table key
* @returns Array of formatted lines
*/
private _writeSparseTable;
/**
* Analyzes column presence across rows.
*
* @param data - Array of data rows
* @param cols - Column names
* @returns Array of column statistics
*/
private _analyzeColumnSparsity;
/**
* Detects dictionary compression opportunities for string columns.
*
* @param data - Array of data rows
* @param cols - Column names
* @returns Map of column names to unique value dictionaries
*/
private _detectDictionaries;
/**
* Writes table with dictionary compression for string columns.
*
* @param flatStream - Flattened data rows
* @param cols - All column names
* @param dictionaries - Map of column names to dictionaries
* @param rowCount - Number of rows
* @param key - Table key name
* @returns Array of formatted lines
*/
private _writeDictionaryTable;
/**
* Analyzes columns for compression opportunities.
*
* @param data - Array of data rows
* @param cols - Column names
* @returns Column analysis results
*/
private _analyzeColumns;
/**
* Calculates schema irregularity score for array of objects.
*
* @param data - Array of objects
* @returns Irregularity score from 0.0 (uniform) to 1.0 (irregular)
*/
private _calculateIrregularity;
/**
* Quotes string for CSV format (RFC 4180).
*
* @param s - String to quote
* @returns Quoted string
*/
private _csvQuote;
/**
* Formats nested structures using ZON syntax.
*
* @param val - Value to format
* @param visited - Set of visited objects for circular reference detection
* @returns Formatted string
*/
private _formatZonNode;
/**
* Formats a value with minimal quoting.
*
* @param val - Value to format
* @returns Formatted string
*/
private _formatValue;
/**
* Checks if string is an ISO 8601 date/datetime.
*
* @param s - String to check
* @returns True if ISO date format
*/
private _isISODate;
/**
* Determines if string needs type protection quoting.
*
* @param s - String to check
* @returns True if quoting needed
*/
private _needsTypeProtection;
/**
* Determines if string needs CSV quoting.
*
* @param s - String to check
* @returns True if quoting needed
*/
private _needsQuotes;
/**
* Flattens nested dictionary with depth limit.
*
* @param d - Dictionary to flatten
* @param parent - Parent key prefix
* @param sep - Key separator
* @param maxDepth - Maximum flattening depth
* @param currentDepth - Current depth level
* @param visited - Set of visited objects
* @returns Flattened dictionary
*/
private _flatten;
}
/**
* Encodes data to ZON format v1.1.0.
*
* @param data - Data to encode
* @param options - Optional encoding options
* @returns ZON formatted string
*/
export declare function encode(data: any, options?: EncodeOptions): string;
export interface LLMContext {
model?: 'gpt-4' | 'claude' | 'gemini' | 'llama';
task: 'retrieval' | 'generation' | 'analysis';
contextWindow?: number;
}
/**
* Encodes data optimized for LLM consumption.
* Optimizes field order and compression based on LLM task type.
*
* @param data - Data to encode
* @param context - LLM context including model and task type
* @returns Optimized ZON string
*/
export declare function encodeLLM(data: any, context: LLMContext): string;