UNPKG

zon-format

Version:

ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors

200 lines (199 loc) 6.15 kB
export interface EncodeOptions { /** Anchor interval for streaming (default: 100) */ anchorInterval?: number; /** Enable dictionary compression (default: true) */ enableDictCompression?: boolean; /** Enable type coercion (default: false) */ enableTypeCoercion?: boolean; /** Embed version metadata in output (default: false) */ embedMetadata?: boolean; /** Version string to embed (default: "1.3.0") */ version?: string; /** Schema ID to embed */ schemaId?: string; /** Disable table formatting (default: false) */ disableTables?: boolean; } /** * Encodes data structures into ZON format v1.3.0. */ export declare class ZonEncoder { private anchor_interval; private safe_str_re; private enableDictionaryCompression; private enableTypeCoercion; private disableTables; private typeInferrer; constructor(anchorInterval?: number, enableDictCompression?: boolean, enableTypeCoercion?: boolean, disableTables?: boolean); /** * Encodes data to ZON format. * When disableTables is true, bypasses table generation and formats data directly. * * @param data - Data to encode * @param options - Optional encoding options * @returns ZON formatted string */ encode(data: any, options?: EncodeOptions): string; /** * Extracts all uniform arrays that should become tables. * * @param data - Input data * @returns Tuple of [streams Map, metadata] */ private _extractStreams; /** * Writes metadata section in YAML-like format. * * @param metadata - Metadata object * @returns Array of formatted lines */ private _writeMetadata; /** * Writes table data with adaptive encoding strategy. * * @param stream - Array of data objects * @param key - Table key name * @returns Array of formatted lines */ private _writeTable; /** * Writes standard table format. * * @param flatStream - Flattened data rows * @param cols - Column names * @param rowCount - Number of rows * @param key - Table key * @returns Array of formatted lines */ private _writeStandardTable; /** * Writes sparse table format for semi-uniform data. * * @param flatStream - Flattened data rows * @param coreColumns - Core column names * @param optionalColumns - Optional column names * @param rowCount - Number of rows * @param key - Table key * @returns Array of formatted lines */ private _writeSparseTable; /** * Analyzes column presence across rows. * * @param data - Array of data rows * @param cols - Column names * @returns Array of column statistics */ private _analyzeColumnSparsity; /** * Detects dictionary compression opportunities for string columns. * * @param data - Array of data rows * @param cols - Column names * @returns Map of column names to unique value dictionaries */ private _detectDictionaries; /** * Writes table with dictionary compression for string columns. * * @param flatStream - Flattened data rows * @param cols - All column names * @param dictionaries - Map of column names to dictionaries * @param rowCount - Number of rows * @param key - Table key name * @returns Array of formatted lines */ private _writeDictionaryTable; /** * Analyzes columns for compression opportunities. * * @param data - Array of data rows * @param cols - Column names * @returns Column analysis results */ private _analyzeColumns; /** * Calculates schema irregularity score for array of objects. * * @param data - Array of objects * @returns Irregularity score from 0.0 (uniform) to 1.0 (irregular) */ private _calculateIrregularity; /** * Quotes string for CSV format (RFC 4180). * * @param s - String to quote * @returns Quoted string */ private _csvQuote; /** * Formats nested structures using ZON syntax. * * @param val - Value to format * @param visited - Set of visited objects for circular reference detection * @returns Formatted string */ private _formatZonNode; /** * Formats a value with minimal quoting. * * @param val - Value to format * @returns Formatted string */ private _formatValue; /** * Checks if string is an ISO 8601 date/datetime. * * @param s - String to check * @returns True if ISO date format */ private _isISODate; /** * Determines if string needs type protection quoting. * * @param s - String to check * @returns True if quoting needed */ private _needsTypeProtection; /** * Determines if string needs CSV quoting. * * @param s - String to check * @returns True if quoting needed */ private _needsQuotes; /** * Flattens nested dictionary with depth limit. * * @param d - Dictionary to flatten * @param parent - Parent key prefix * @param sep - Key separator * @param maxDepth - Maximum flattening depth * @param currentDepth - Current depth level * @param visited - Set of visited objects * @returns Flattened dictionary */ private _flatten; } /** * Encodes data to ZON format v1.1.0. * * @param data - Data to encode * @param options - Optional encoding options * @returns ZON formatted string */ export declare function encode(data: any, options?: EncodeOptions): string; export interface LLMContext { model?: 'gpt-4' | 'claude' | 'gemini' | 'llama'; task: 'retrieval' | 'generation' | 'analysis'; contextWindow?: number; } /** * Encodes data optimized for LLM consumption. * Optimizes field order and compression based on LLM task type. * * @param data - Data to encode * @param context - LLM context including model and task type * @returns Optimized ZON string */ export declare function encodeLLM(data: any, context: LLMContext): string;