UNPKG

zon-format

Version:

ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors

71 lines (70 loc) 2.69 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ZonSplitter = void 0; const encoder_1 = require("../core/encoder"); class ZonSplitter { constructor(options) { const ratio = options.tokenRatio || 4; this.maxChars = options.maxTokens * ratio; this.overlap = options.overlap || 0; } /** * Splits a large array of data into ZON-encoded chunks. * * @param data - Array of objects to split * @returns ChunkResult containing encoded strings and metadata */ split(data) { if (!Array.isArray(data) || data.length === 0) { return { chunks: [], metadata: { totalChunks: 0, totalTokens: 0, chunkSizes: [] } }; } const chunks = []; const chunkSizes = []; let currentChunkItems = []; let totalTokens = 0; const estimateTokens = (str) => Math.ceil(str.length / 4); for (let i = 0; i < data.length; i++) { const item = data[i]; const candidateItems = [...currentChunkItems, item]; const encoded = (0, encoder_1.encode)(candidateItems); if (encoded.length > this.maxChars) { if (currentChunkItems.length > 0) { const chunkEncoded = (0, encoder_1.encode)(currentChunkItems); chunks.push(chunkEncoded); const tokens = estimateTokens(chunkEncoded); chunkSizes.push(tokens); totalTokens += tokens; const overlapItems = this.overlap > 0 ? currentChunkItems.slice(-this.overlap) : []; currentChunkItems = [...overlapItems, item]; } else { const chunkEncoded = (0, encoder_1.encode)([item]); chunks.push(chunkEncoded); const tokens = estimateTokens(chunkEncoded); chunkSizes.push(tokens); totalTokens += tokens; currentChunkItems = []; } } else { currentChunkItems.push(item); } } if (currentChunkItems.length > 0) { const finalEncoded = (0, encoder_1.encode)(currentChunkItems); chunks.push(finalEncoded); const tokens = estimateTokens(finalEncoded); chunkSizes.push(tokens); totalTokens += tokens; } return { chunks, metadata: { totalChunks: chunks.length, totalTokens, chunkSizes } }; } } exports.ZonSplitter = ZonSplitter;