UNPKG

zon-format

Version:

ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors

205 lines (204 loc) 7.8 kB
"use strict"; /** * Adaptive Encoding API * * Provides intelligent format selection based on data characteristics. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.AdaptiveEncoder = exports.DataComplexityAnalyzer = void 0; exports.encodeAdaptive = encodeAdaptive; exports.recommendMode = recommendMode; const encoder_1 = require("./encoder"); const analyzer_1 = require("./analyzer"); const constants_1 = require("./constants"); const printer_1 = require("../tools/printer"); var analyzer_2 = require("./analyzer"); Object.defineProperty(exports, "DataComplexityAnalyzer", { enumerable: true, get: function () { return analyzer_2.DataComplexityAnalyzer; } }); /** * Adaptive encoder that selects optimal encoding strategy. */ class AdaptiveEncoder { constructor() { this.analyzer = new analyzer_1.DataComplexityAnalyzer(); } /** * Encodes data using adaptive strategy selection. * * @param data - Data to encode * @param options - Adaptive encoding options * @returns Encoded string or detailed result if debug=true */ encode(data, options) { const mode = (options === null || options === void 0 ? void 0 : options.mode) || 'compact'; const decisions = []; const analysis = this.analyzer.analyze(data); const metrics = analysis; decisions.push(`Analyzed data: ${analysis.reason}`); let encodeOptions; let modeUsed = mode; switch (mode) { case 'compact': encodeOptions = this.getCompactOptions(decisions); break; case 'readable': encodeOptions = this.getReadableOptions(decisions); break; case 'llm-optimized': encodeOptions = this.getLLMOptimizedOptions(analysis, decisions); break; } // Merge with user options encodeOptions = { ...encodeOptions, ...options }; let output = (0, encoder_1.encode)(data, encodeOptions); // Apply formatting for readable mode // Only expand if it's NOT a table (tables are already readable/compact) if (modeUsed === 'readable' && !output.startsWith(constants_1.TABLE_MARKER)) { output = (0, printer_1.expandPrint)(output, options === null || options === void 0 ? void 0 : options.indent); } if (options === null || options === void 0 ? void 0 : options.debug) { return { output, metrics, modeUsed, decisions }; } return output; } /** * Selects encoding options for auto mode. */ selectAutoOptions(analysis, options, decisions) { const threshold = (options === null || options === void 0 ? void 0 : options.complexityThreshold) || 0.6; const maxNesting = (options === null || options === void 0 ? void 0 : options.maxNestingForTable) || 3; // Deep nesting → disable table format if (analysis.nesting > maxNesting) { decisions.push(`Deep nesting (${analysis.nesting}) → inline format preferred`); return { enableDictCompression: false, // Inline format doesn't use dict compression }; } // High irregularity → use basic encoding if (analysis.irregularity > threshold) { decisions.push(`High irregularity (${(analysis.irregularity * 100).toFixed(0)}%) → basic encoding`); return { enableDictCompression: false, enableTypeCoercion: false }; } // Uniform data → enable all optimizations decisions.push('Uniform data → table format with optimizations'); return { enableDictCompression: true, enableTypeCoercion: true }; } /** * Gets encoding options for compact mode. */ getCompactOptions(decisions) { decisions.push('Compact mode: maximum compression enabled'); return { enableDictCompression: true, enableTypeCoercion: false // Use T/F for max compression }; } /** * Gets encoding options for readable mode. * Tables are enabled for uniform data, but dictionary compression and delta encoding are disabled. */ getReadableOptions(decisions) { decisions.push('Readable mode: optimizing for human readability with tables for uniform data'); return { enableDictCompression: false, // Disable dictionary compression for readability enableTypeCoercion: false, // No type transformations disableTables: false // Enable tables for uniform data patterns }; } /** * Gets encoding options for LLM-optimized mode. * Dictionary compression and delta encoding disabled for clarity - shows actual values not indices. */ getLLMOptimizedOptions(analysis, decisions) { decisions.push('LLM-optimized mode: balancing tokens and clarity'); // For LLMs, prioritize clarity over compression // Dictionary compression (showing indices like "0" instead of "Engineering") // and delta encoding (+1) reduce readability and make data harder for LLMs return { enableDictCompression: false, // Show actual values, not dictionary indices enableTypeCoercion: true // Use true/false for LLM clarity }; } /** * Determines effective mode based on analysis. */ determineEffectiveMode(analysis) { if (analysis.recommendation === 'json' || analysis.nesting > 4) { return 'llm-optimized'; // Use safe standard encoding instead of readable (YAML-like) } else if (analysis.recommendation === 'table' && analysis.confidence > 0.8) { return 'compact'; } return 'llm-optimized'; } } exports.AdaptiveEncoder = AdaptiveEncoder; /** * Global adaptive encoder instance. */ const globalAdaptiveEncoder = new AdaptiveEncoder(); /** * Encodes data with adaptive strategy selection. * * @param data - Data to encode * @param options - Adaptive encoding options * @returns Encoded ZON string * * @example * ```typescript * // Auto mode - analyzes and selects best strategy * const output = encodeAdaptive(data); * * // Explicit mode * const output = encodeAdaptive(data, { mode: 'compact' }); * * // With debugging * const result = encodeAdaptive(data, { debug: true }); * console.log(result.decisions); // See encoding decisions * ``` */ function encodeAdaptive(data, options) { return globalAdaptiveEncoder.encode(data, options); } /** * Analyzes data and recommends optimal encoding mode. * * @param data - Data to analyze * @returns Recommended mode and reasoning */ function recommendMode(data) { const analyzer = new analyzer_1.DataComplexityAnalyzer(); const analysis = analyzer.analyze(data); if (analysis.irregularity > 0.7) { return { mode: 'llm-optimized', // Fallback to LLM-optimized (standard ZON) for safety reason: 'Highly irregular data benefits from standard structure', confidence: 0.9 }; } else if (analysis.recommendation === 'table' && analysis.confidence > 0.5) { // Lowered from 0.8 to 0.5 to better match uniform data detection return { mode: 'compact', reason: 'Uniform tabular data allows maximum compression', confidence: analysis.confidence }; } else { return { mode: 'llm-optimized', reason: 'Balanced approach for mixed or moderate complexity data', confidence: 0.75 }; } }