zon-format
Version:
ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors
205 lines (204 loc) • 7.8 kB
JavaScript
;
/**
* Adaptive Encoding API
*
* Provides intelligent format selection based on data characteristics.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.AdaptiveEncoder = exports.DataComplexityAnalyzer = void 0;
exports.encodeAdaptive = encodeAdaptive;
exports.recommendMode = recommendMode;
const encoder_1 = require("./encoder");
const analyzer_1 = require("./analyzer");
const constants_1 = require("./constants");
const printer_1 = require("../tools/printer");
var analyzer_2 = require("./analyzer");
Object.defineProperty(exports, "DataComplexityAnalyzer", { enumerable: true, get: function () { return analyzer_2.DataComplexityAnalyzer; } });
/**
* Adaptive encoder that selects optimal encoding strategy.
*/
class AdaptiveEncoder {
constructor() {
this.analyzer = new analyzer_1.DataComplexityAnalyzer();
}
/**
* Encodes data using adaptive strategy selection.
*
* @param data - Data to encode
* @param options - Adaptive encoding options
* @returns Encoded string or detailed result if debug=true
*/
encode(data, options) {
const mode = (options === null || options === void 0 ? void 0 : options.mode) || 'compact';
const decisions = [];
const analysis = this.analyzer.analyze(data);
const metrics = analysis;
decisions.push(`Analyzed data: ${analysis.reason}`);
let encodeOptions;
let modeUsed = mode;
switch (mode) {
case 'compact':
encodeOptions = this.getCompactOptions(decisions);
break;
case 'readable':
encodeOptions = this.getReadableOptions(decisions);
break;
case 'llm-optimized':
encodeOptions = this.getLLMOptimizedOptions(analysis, decisions);
break;
}
// Merge with user options
encodeOptions = { ...encodeOptions, ...options };
let output = (0, encoder_1.encode)(data, encodeOptions);
// Apply formatting for readable mode
// Only expand if it's NOT a table (tables are already readable/compact)
if (modeUsed === 'readable' && !output.startsWith(constants_1.TABLE_MARKER)) {
output = (0, printer_1.expandPrint)(output, options === null || options === void 0 ? void 0 : options.indent);
}
if (options === null || options === void 0 ? void 0 : options.debug) {
return {
output,
metrics,
modeUsed,
decisions
};
}
return output;
}
/**
* Selects encoding options for auto mode.
*/
selectAutoOptions(analysis, options, decisions) {
const threshold = (options === null || options === void 0 ? void 0 : options.complexityThreshold) || 0.6;
const maxNesting = (options === null || options === void 0 ? void 0 : options.maxNestingForTable) || 3;
// Deep nesting → disable table format
if (analysis.nesting > maxNesting) {
decisions.push(`Deep nesting (${analysis.nesting}) → inline format preferred`);
return {
enableDictCompression: false, // Inline format doesn't use dict compression
};
}
// High irregularity → use basic encoding
if (analysis.irregularity > threshold) {
decisions.push(`High irregularity (${(analysis.irregularity * 100).toFixed(0)}%) → basic encoding`);
return {
enableDictCompression: false,
enableTypeCoercion: false
};
}
// Uniform data → enable all optimizations
decisions.push('Uniform data → table format with optimizations');
return {
enableDictCompression: true,
enableTypeCoercion: true
};
}
/**
* Gets encoding options for compact mode.
*/
getCompactOptions(decisions) {
decisions.push('Compact mode: maximum compression enabled');
return {
enableDictCompression: true,
enableTypeCoercion: false // Use T/F for max compression
};
}
/**
* Gets encoding options for readable mode.
* Tables are enabled for uniform data, but dictionary compression and delta encoding are disabled.
*/
getReadableOptions(decisions) {
decisions.push('Readable mode: optimizing for human readability with tables for uniform data');
return {
enableDictCompression: false, // Disable dictionary compression for readability
enableTypeCoercion: false, // No type transformations
disableTables: false // Enable tables for uniform data patterns
};
}
/**
* Gets encoding options for LLM-optimized mode.
* Dictionary compression and delta encoding disabled for clarity - shows actual values not indices.
*/
getLLMOptimizedOptions(analysis, decisions) {
decisions.push('LLM-optimized mode: balancing tokens and clarity');
// For LLMs, prioritize clarity over compression
// Dictionary compression (showing indices like "0" instead of "Engineering")
// and delta encoding (+1) reduce readability and make data harder for LLMs
return {
enableDictCompression: false, // Show actual values, not dictionary indices
enableTypeCoercion: true // Use true/false for LLM clarity
};
}
/**
* Determines effective mode based on analysis.
*/
determineEffectiveMode(analysis) {
if (analysis.recommendation === 'json' || analysis.nesting > 4) {
return 'llm-optimized'; // Use safe standard encoding instead of readable (YAML-like)
}
else if (analysis.recommendation === 'table' && analysis.confidence > 0.8) {
return 'compact';
}
return 'llm-optimized';
}
}
exports.AdaptiveEncoder = AdaptiveEncoder;
/**
* Global adaptive encoder instance.
*/
const globalAdaptiveEncoder = new AdaptiveEncoder();
/**
* Encodes data with adaptive strategy selection.
*
* @param data - Data to encode
* @param options - Adaptive encoding options
* @returns Encoded ZON string
*
* @example
* ```typescript
* // Auto mode - analyzes and selects best strategy
* const output = encodeAdaptive(data);
*
* // Explicit mode
* const output = encodeAdaptive(data, { mode: 'compact' });
*
* // With debugging
* const result = encodeAdaptive(data, { debug: true });
* console.log(result.decisions); // See encoding decisions
* ```
*/
function encodeAdaptive(data, options) {
return globalAdaptiveEncoder.encode(data, options);
}
/**
* Analyzes data and recommends optimal encoding mode.
*
* @param data - Data to analyze
* @returns Recommended mode and reasoning
*/
function recommendMode(data) {
const analyzer = new analyzer_1.DataComplexityAnalyzer();
const analysis = analyzer.analyze(data);
if (analysis.irregularity > 0.7) {
return {
mode: 'llm-optimized', // Fallback to LLM-optimized (standard ZON) for safety
reason: 'Highly irregular data benefits from standard structure',
confidence: 0.9
};
}
else if (analysis.recommendation === 'table' && analysis.confidence > 0.5) {
// Lowered from 0.8 to 0.5 to better match uniform data detection
return {
mode: 'compact',
reason: 'Uniform tabular data allows maximum compression',
confidence: analysis.confidence
};
}
else {
return {
mode: 'llm-optimized',
reason: 'Balanced approach for mixed or moderate complexity data',
confidence: 0.75
};
}
}