zon-format
Version:
ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors
71 lines (70 loc) • 2.69 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ZonSplitter = void 0;
const encoder_1 = require("../core/encoder");
class ZonSplitter {
constructor(options) {
const ratio = options.tokenRatio || 4;
this.maxChars = options.maxTokens * ratio;
this.overlap = options.overlap || 0;
}
/**
* Splits a large array of data into ZON-encoded chunks.
*
* @param data - Array of objects to split
* @returns ChunkResult containing encoded strings and metadata
*/
split(data) {
if (!Array.isArray(data) || data.length === 0) {
return { chunks: [], metadata: { totalChunks: 0, totalTokens: 0, chunkSizes: [] } };
}
const chunks = [];
const chunkSizes = [];
let currentChunkItems = [];
let totalTokens = 0;
const estimateTokens = (str) => Math.ceil(str.length / 4);
for (let i = 0; i < data.length; i++) {
const item = data[i];
const candidateItems = [...currentChunkItems, item];
const encoded = (0, encoder_1.encode)(candidateItems);
if (encoded.length > this.maxChars) {
if (currentChunkItems.length > 0) {
const chunkEncoded = (0, encoder_1.encode)(currentChunkItems);
chunks.push(chunkEncoded);
const tokens = estimateTokens(chunkEncoded);
chunkSizes.push(tokens);
totalTokens += tokens;
const overlapItems = this.overlap > 0 ? currentChunkItems.slice(-this.overlap) : [];
currentChunkItems = [...overlapItems, item];
}
else {
const chunkEncoded = (0, encoder_1.encode)([item]);
chunks.push(chunkEncoded);
const tokens = estimateTokens(chunkEncoded);
chunkSizes.push(tokens);
totalTokens += tokens;
currentChunkItems = [];
}
}
else {
currentChunkItems.push(item);
}
}
if (currentChunkItems.length > 0) {
const finalEncoded = (0, encoder_1.encode)(currentChunkItems);
chunks.push(finalEncoded);
const tokens = estimateTokens(finalEncoded);
chunkSizes.push(tokens);
totalTokens += tokens;
}
return {
chunks,
metadata: {
totalChunks: chunks.length,
totalTokens,
chunkSizes
}
};
}
}
exports.ZonSplitter = ZonSplitter;