UNPKG

@yogesh0333/yogiway

Version:

YOGIWAY Format - Ultra-compact, nested-aware data format for LLM prompts. Handles deeply nested JSON efficiently, 10-15% more efficient than TOON.

787 lines 29.6 kB
"use strict"; /** * PATHX - Path-Optimized Binary Exchange Format * Binary format optimized for deep nesting with path caching * FREE AND OPEN SOURCE - No license required */ Object.defineProperty(exports, "__esModule", { value: true }); exports.pathxEncode = pathxEncode; exports.pathxDecode = pathxDecode; exports.pathxToDebugString = pathxToDebugString; exports.validatePathx = validatePathx; var PathxType; (function (PathxType) { PathxType[PathxType["NULL"] = 0] = "NULL"; PathxType[PathxType["BOOL_TRUE"] = 1] = "BOOL_TRUE"; PathxType[PathxType["BOOL_FALSE"] = 2] = "BOOL_FALSE"; PathxType[PathxType["INT8"] = 3] = "INT8"; PathxType[PathxType["INT16"] = 4] = "INT16"; PathxType[PathxType["INT32"] = 5] = "INT32"; PathxType[PathxType["INT64"] = 6] = "INT64"; PathxType[PathxType["FLOAT32"] = 7] = "FLOAT32"; PathxType[PathxType["FLOAT64"] = 8] = "FLOAT64"; PathxType[PathxType["STRING"] = 9] = "STRING"; PathxType[PathxType["LIST"] = 10] = "LIST"; PathxType[PathxType["DICT"] = 11] = "DICT"; PathxType[PathxType["BINARY"] = 12] = "BINARY"; PathxType[PathxType["PATH_REF"] = 13] = "PATH_REF"; PathxType[PathxType["SHORT_PATH"] = 14] = "SHORT_PATH"; PathxType[PathxType["NESTED_MAP"] = 15] = "NESTED_MAP"; PathxType[PathxType["SHORT_STRING"] = 16] = "SHORT_STRING"; PathxType[PathxType["TINY_INT"] = 17] = "TINY_INT"; PathxType[PathxType["EMPTY_LIST"] = 18] = "EMPTY_LIST"; PathxType[PathxType["EMPTY_DICT"] = 19] = "EMPTY_DICT"; PathxType[PathxType["TYPED_ARRAY"] = 20] = "TYPED_ARRAY"; PathxType[PathxType["CHECKSUM"] = 23] = "CHECKSUM"; PathxType[PathxType["STREAM_CHUNK"] = 24] = "STREAM_CHUNK"; })(PathxType || (PathxType = {})); class PathxEncoder { constructor(options = {}) { this.pathCache = new Map(); this.encodedPaths = []; this.currentPath = []; this.compress = options.compress || false; this.optimizePaths = options.optimizePaths !== false; this.maxDepth = options.maxDepth || 1000; this.checksum = options.checksum || false; this.typedArrays = options.typedArrays !== false; this.streamingChunkSize = options.streamingChunkSize || 8192; this.streaming = options.streaming || false; } encode(data) { // Reset state this.pathCache.clear(); this.encodedPaths = []; this.currentPath = []; // Handle streaming for large arrays if (this.streaming && Array.isArray(data) && data.length > this.streamingChunkSize) { return this.encodeStreaming(data); } const result = []; // Write magic and version result.push(...Array.from(new TextEncoder().encode("PATH"))); result.push(0x01); // Version 1 // Encode main data const dataResult = this.encodeValue(data, 0); result.push(...dataResult); // Add checksum if enabled if (this.checksum) { this.addChecksum(result); } const final = new Uint8Array(result); // Compress if requested if (this.compress && final.length > 100) { // Note: zlib compression would require a library like pako // For now, return uncompressed but mark for compression // In production, use pako or similar return final; } return final; } encodeStreaming(data) { const result = []; // Write streaming magic result.push(...Array.from(new TextEncoder().encode("PATHS"))); result.push(0x01); // Version 1 // Write total count this.encodeVarint(data.length, result); // Encode in chunks for (let i = 0; i < data.length; i += this.streamingChunkSize) { const chunk = data.slice(i, i + this.streamingChunkSize); const chunkData = this.encodeValue(chunk, 0); // Write chunk size this.encodeVarint(chunkData.length, result); // Write chunk data result.push(...chunkData); } return new Uint8Array(result); } addChecksum(result) { // Simple checksum (CRC32 would be better but requires library) // For now, use simple sum let sum = 0; for (let i = 0; i < result.length; i++) { sum = (sum + result[i]) & 0xffffffff; } result.push(PathxType.CHECKSUM); // Store as 4 bytes result.push((sum >> 24) & 0xff); result.push((sum >> 16) & 0xff); result.push((sum >> 8) & 0xff); result.push(sum & 0xff); } encodeValue(data, depth) { if (depth > this.maxDepth) { throw new Error("Maximum recursion depth exceeded"); } const result = []; if (data === null || data === undefined) { result.push(PathxType.NULL); } else if (typeof data === "boolean") { result.push(data ? PathxType.BOOL_TRUE : PathxType.BOOL_FALSE); } else if (typeof data === "number") { if (Number.isInteger(data)) { this.encodeInt(data, result); } else { this.encodeFloat(data, result); } } else if (typeof data === "string") { this.encodeString(data, result); } else if (data instanceof Uint8Array || data instanceof ArrayBuffer) { this.encodeBinary(data, result); } else if (Array.isArray(data)) { this.encodeList(data, result, depth); } else if (typeof data === "object") { this.encodeDict(data, result, depth); } else { // Fallback to JSON string const jsonStr = JSON.stringify(data); this.encodeString(jsonStr, result); } return new Uint8Array(result); } encodeInt(value, result) { if (value >= -16 && value <= 15) { result.push(PathxType.TINY_INT); result.push((value + 16) & 0xff); } else if (value >= -128 && value <= 127) { result.push(PathxType.INT8); result.push(value & 0xff); } else if (value >= -32768 && value <= 32767) { result.push(PathxType.INT16); result.push((value >> 8) & 0xff); result.push(value & 0xff); } else if (value >= -2147483648 && value <= 2147483647) { result.push(PathxType.INT32); result.push((value >> 24) & 0xff); result.push((value >> 16) & 0xff); result.push((value >> 8) & 0xff); result.push(value & 0xff); } else { result.push(PathxType.INT64); const high = Math.floor(value / 0x100000000); const low = value >>> 0; for (let i = 7; i >= 0; i--) { result.push(i >= 4 ? (high >> ((i - 4) * 8)) & 0xff : (low >> (i * 8)) & 0xff); } } } encodeFloat(value, result) { result.push(PathxType.FLOAT64); const buffer = new ArrayBuffer(8); const view = new DataView(buffer); view.setFloat64(0, value, false); // big-endian result.push(...new Uint8Array(buffer)); } encodeString(value, result) { const utf8 = new TextEncoder().encode(value); const length = utf8.length; if (length <= 15) { result.push(PathxType.SHORT_STRING); result.push(length); result.push(...utf8); } else { result.push(PathxType.STRING); this.encodeVarint(length, result); result.push(...utf8); } } encodeBinary(data, result) { const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data; result.push(PathxType.BINARY); this.encodeVarint(bytes.length, result); result.push(...bytes); } encodeList(value, result, depth) { if (value.length === 0) { result.push(PathxType.EMPTY_LIST); return; } // Check for typed array optimization if (this.typedArrays && value.length > 10) { const arrayType = this.detectArrayType(value); if (arrayType) { this.encodeTypedArray(value, arrayType, result); return; } } result.push(PathxType.LIST); this.encodeVarint(value.length, result); for (let i = 0; i < value.length; i++) { this.currentPath.push(`[${i}]`); const itemData = this.encodeValue(value[i], depth + 1); result.push(...itemData); this.currentPath.pop(); } } detectArrayType(value) { if (value.length === 0) return null; const firstType = typeof value[0]; // Check if all elements have same type if (!value.every((v) => typeof v === firstType)) { return null; } if (firstType === "number") { // Check if all are integers if (!value.every((v) => Number.isInteger(v))) { return "float64"; } // Check integer range const allInRange = (min, max) => value.every((v) => v >= min && v <= max); if (allInRange(-128, 127)) return "int8"; if (allInRange(-32768, 32767)) return "int16"; if (allInRange(-2147483648, 2147483647)) return "int32"; return "int64"; } else if (firstType === "boolean") { return "bool"; } return null; } encodeTypedArray(value, arrayType, result) { result.push(PathxType.TYPED_ARRAY); // Type map: int8=0, int16=1, int32=2, int64=3, float64=4, bool=5 const typeMap = { int8: 0, int16: 1, int32: 2, int64: 3, float64: 4, bool: 5, }; result.push(typeMap[arrayType]); this.encodeVarint(value.length, result); // Encode based on type if (arrayType === "int8") { value.forEach((v) => result.push((v + 128) & 0xff)); } else if (arrayType === "int16") { value.forEach((v) => { const val = v < 0 ? v + 65536 : v; result.push((val >> 8) & 0xff); result.push(val & 0xff); }); } else if (arrayType === "int32") { value.forEach((v) => { const val = v < 0 ? v + 0x100000000 : v; result.push((val >> 24) & 0xff); result.push((val >> 16) & 0xff); result.push((val >> 8) & 0xff); result.push(val & 0xff); }); } else if (arrayType === "int64") { value.forEach((v) => { const high = Math.floor(v / 0x100000000); const low = v >>> 0; for (let i = 7; i >= 0; i--) { result.push(i >= 4 ? (high >> ((i - 4) * 8)) & 0xff : (low >> (i * 8)) & 0xff); } }); } else if (arrayType === "float64") { const buffer = new ArrayBuffer(8); const view = new DataView(buffer); value.forEach((v) => { view.setFloat64(0, v, false); for (let i = 0; i < 8; i++) { result.push(view.getUint8(i)); } }); } else if (arrayType === "bool") { value.forEach((v) => result.push(v ? 1 : 0)); } } encodeDict(data, result, depth) { if (Object.keys(data).length === 0) { result.push(PathxType.EMPTY_DICT); return; } // Use nested map optimization for deep structures if (this.optimizePaths && depth > 2 && this.isRegularStructure(data)) { result.push(PathxType.NESTED_MAP); this.encodeVarint(Object.keys(data).length, result); for (const [key, value] of Object.entries(data)) { const fullPath = this.currentPath.length > 0 ? `${this.currentPath.join(".")}.${key}` : key; this.encodePathKey(fullPath, result); this.currentPath.push(key); const valueData = this.encodeValue(value, depth + 1); result.push(...valueData); this.currentPath.pop(); } } else { result.push(PathxType.DICT); this.encodeVarint(Object.keys(data).length, result); for (const [key, value] of Object.entries(data)) { this.encodeString(key, result); this.currentPath.push(key); const valueData = this.encodeValue(value, depth + 1); result.push(...valueData); this.currentPath.pop(); } } } encodePathKey(fullPath, result) { if (!this.optimizePaths) { this.encodeString(fullPath, result); return; } // Check if we've seen this path before if (this.pathCache.has(fullPath)) { result.push(PathxType.PATH_REF); this.encodeVarint(this.pathCache.get(fullPath), result); } else { // Encode new path const pathIndex = this.encodedPaths.length; this.pathCache.set(fullPath, pathIndex); this.encodedPaths.push(fullPath); const utf8 = new TextEncoder().encode(fullPath); if (utf8.length <= 15) { result.push(PathxType.SHORT_PATH); result.push(utf8.length); result.push(...utf8); } else { this.encodeString(fullPath, result); } } } isRegularStructure(data) { if (Object.keys(data).length === 0) return false; const sampleValues = Object.values(data).slice(0, 3); return sampleValues.every((v) => v === null || typeof v === "object" || typeof v === "string" || typeof v === "number" || typeof v === "boolean" || Array.isArray(v)); } encodeVarint(value, result) { if (value <= 0x7f) { result.push(value); } else if (value <= 0x3fff) { result.push(0x80 | (value >> 8)); result.push(value & 0xff); } else if (value <= 0x3fffff) { result.push(0xc0 | (value >> 16)); result.push((value >> 8) & 0xff); result.push(value & 0xff); } else { result.push(0xe0); result.push((value >> 24) & 0xff); result.push((value >> 16) & 0xff); result.push((value >> 8) & 0xff); result.push(value & 0xff); } } writeBytes(target, offset, source) { target.set(source, offset); return offset + source.length; } } class PathxDecoder { constructor(options = {}) { this.data = new Uint8Array(0); this.pos = 0; this.decodedPaths = []; this.maxDepth = options.maxDepth || 1000; this.partialDecode = options.partialDecode || false; this.verifyChecksum = options.verifyChecksum !== false; } decode(data) { try { return this.decodeRobust(data); } catch (error) { if (this.partialDecode) { return this.decodePartial(data, error); } throw error; } } decodeRobust(data) { if (data.length < 5) { throw new Error("Invalid PATHX format: too short"); } // Check for streaming format if (data.length >= 5 && new TextDecoder().decode(data.slice(0, 5)) === "PATHS") { return this.decodeStreaming(data); } const magic = new TextDecoder().decode(data.slice(0, 4)); if (magic !== "PATH") { throw new Error("Invalid PATHX format: bad magic"); } const version = data[4]; if (version !== 0x01 && version !== 0x02) { throw new Error(`Unsupported PATHX version: ${version}`); } this.data = data.slice(5); this.pos = 0; this.decodedPaths = []; // Check for checksum if (this.verifyChecksum && this.data.length > 5) { const lastByte = this.data[this.data.length - 5]; if (lastByte === PathxType.CHECKSUM) { const storedChecksum = (this.data[this.data.length - 4] << 24) | (this.data[this.data.length - 3] << 16) | (this.data[this.data.length - 2] << 8) | this.data[this.data.length - 1]; // Calculate checksum let sum = 0; for (let i = 0; i < this.data.length - 5; i++) { sum = (sum + this.data[i]) & 0xffffffff; } if (sum !== storedChecksum) { throw new Error("PATHX checksum verification failed"); } // Remove checksum from data this.data = this.data.slice(0, -5); } } return this.decodeValue(0); } decodeStreaming(data) { this.data = data.slice(5); // Skip 'PATHS' this.pos = 0; const totalCount = this.decodeVarint(); const result = []; while (this.pos < this.data.length && result.length < totalCount) { const chunkSize = this.decodeVarint(); if (this.pos + chunkSize > this.data.length) { break; } const chunkData = this.data.slice(this.pos, this.pos + chunkSize); this.pos += chunkSize; try { const chunkDecoder = new PathxDecoder({ maxDepth: this.maxDepth }); const chunkResult = chunkDecoder.decodeRobust(new Uint8Array([ ...Array.from(new TextEncoder().encode("PATH")), 0x01, ...Array.from(chunkData), ])); if (Array.isArray(chunkResult)) { result.push(...chunkResult); } else { result.push(chunkResult); } } catch (e) { if (!this.partialDecode) throw e; break; } } return result; } decodePartial(data, error) { // Try to decode as much as possible try { const partial = this.decodeRobust(data.slice(0, Math.min(data.length, 1000))); return { error: error.message, partial_data: partial }; } catch { return { error: error.message, partial_data: null, raw_size: data.length, }; } } decodeValue(depth) { if (depth > this.maxDepth) { throw new Error("Maximum recursion depth exceeded"); } if (this.pos >= this.data.length) { throw new Error("Unexpected end of data"); } const typeByte = this.data[this.pos++]; switch (typeByte) { case PathxType.NULL: return null; case PathxType.BOOL_TRUE: return true; case PathxType.BOOL_FALSE: return false; case PathxType.TINY_INT: return this.data[this.pos++] - 16; case PathxType.INT8: return (this.data[this.pos++] << 24) >> 24; // Sign extend case PathxType.INT16: return ((((this.data[this.pos++] << 8) | this.data[this.pos++]) << 16) >> 16); case PathxType.INT32: return ((this.data[this.pos++] << 24) | (this.data[this.pos++] << 16) | (this.data[this.pos++] << 8) | this.data[this.pos++]); case PathxType.INT64: // Simplified - full 64-bit handling would be more complex const high = (this.data[this.pos++] << 24) | (this.data[this.pos++] << 16) | (this.data[this.pos++] << 8) | this.data[this.pos++]; const low = (this.data[this.pos++] << 24) | (this.data[this.pos++] << 16) | (this.data[this.pos++] << 8) | this.data[this.pos++]; return high * 0x100000000 + low; case PathxType.FLOAT32: case PathxType.FLOAT64: const buffer = new ArrayBuffer(typeByte === PathxType.FLOAT32 ? 4 : 8); const view = new DataView(buffer); for (let i = 0; i < buffer.byteLength; i++) { view.setUint8(i, this.data[this.pos++]); } return typeByte === PathxType.FLOAT32 ? view.getFloat32(0, false) : view.getFloat64(0, false); case PathxType.SHORT_STRING: const shortLen = this.data[this.pos++]; const shortStr = new TextDecoder().decode(this.data.slice(this.pos, this.pos + shortLen)); this.pos += shortLen; return shortStr; case PathxType.STRING: const len = this.decodeVarint(); const str = new TextDecoder().decode(this.data.slice(this.pos, this.pos + len)); this.pos += len; return str; case PathxType.BINARY: const binLen = this.decodeVarint(); const bin = this.data.slice(this.pos, this.pos + binLen); this.pos += binLen; return bin; case PathxType.EMPTY_LIST: return []; case PathxType.LIST: const listLen = this.decodeVarint(); return Array.from({ length: listLen }, () => this.decodeValue(depth + 1)); case PathxType.EMPTY_DICT: return {}; case PathxType.DICT: const dictLen = this.decodeVarint(); const dict = {}; for (let i = 0; i < dictLen; i++) { const key = this.decodeValue(depth + 1); const value = this.decodeValue(depth + 1); dict[String(key)] = value; } return dict; case PathxType.NESTED_MAP: const mapLen = this.decodeVarint(); const map = {}; for (let i = 0; i < mapLen; i++) { const key = this.decodePathKey(); const value = this.decodeValue(depth + 1); map[key] = value; } return map; case PathxType.PATH_REF: const refIndex = this.decodeVarint(); if (refIndex >= this.decodedPaths.length) { throw new Error(`Invalid path reference: ${refIndex}`); } return this.decodedPaths[refIndex]; case PathxType.SHORT_PATH: const pathLen = this.data[this.pos++]; const pathStr = new TextDecoder().decode(this.data.slice(this.pos, this.pos + pathLen)); this.pos += pathLen; this.decodedPaths.push(pathStr); return pathStr; case PathxType.TYPED_ARRAY: return this.decodeTypedArray(); default: throw new Error(`Unknown type byte: 0x${typeByte.toString(16).padStart(2, "0")}`); } } decodeTypedArray() { const typeByte = this.data[this.pos++]; const length = this.decodeVarint(); const typeMap = { 0: "int8", 1: "int16", 2: "int32", 3: "int64", 4: "float64", 5: "bool", }; const arrayType = typeMap[typeByte] || "int32"; const result = []; if (arrayType === "int8") { for (let i = 0; i < length; i++) { result.push(this.data[this.pos++] - 128); } } else if (arrayType === "int16") { for (let i = 0; i < length; i++) { const val = (this.data[this.pos++] << 8) | this.data[this.pos++]; result.push(val > 32767 ? val - 65536 : val); } } else if (arrayType === "int32") { for (let i = 0; i < length; i++) { const val = (this.data[this.pos++] << 24) | (this.data[this.pos++] << 16) | (this.data[this.pos++] << 8) | this.data[this.pos++]; result.push(val > 2147483647 ? val - 0x100000000 : val); } } else if (arrayType === "int64") { for (let i = 0; i < length; i++) { const high = (this.data[this.pos++] << 24) | (this.data[this.pos++] << 16) | (this.data[this.pos++] << 8) | this.data[this.pos++]; const low = (this.data[this.pos++] << 24) | (this.data[this.pos++] << 16) | (this.data[this.pos++] << 8) | this.data[this.pos++]; result.push(high * 0x100000000 + low); } } else if (arrayType === "float64") { const buffer = new ArrayBuffer(8); const view = new DataView(buffer); for (let i = 0; i < length; i++) { for (let j = 0; j < 8; j++) { view.setUint8(j, this.data[this.pos++]); } result.push(view.getFloat64(0, false)); } } else if (arrayType === "bool") { for (let i = 0; i < length; i++) { result.push(this.data[this.pos++] !== 0); } } return result; } decodePathKey() { if (this.pos >= this.data.length) { throw new Error("Unexpected end of data"); } const typeByte = this.data[this.pos]; if (typeByte === PathxType.PATH_REF) { this.pos++; const refIndex = this.decodeVarint(); if (refIndex >= this.decodedPaths.length) { throw new Error(`Invalid path reference: ${refIndex}`); } return this.decodedPaths[refIndex]; } else if (typeByte === PathxType.SHORT_PATH) { this.pos++; const pathLen = this.data[this.pos++]; const pathStr = new TextDecoder().decode(this.data.slice(this.pos, this.pos + pathLen)); this.pos += pathLen; this.decodedPaths.push(pathStr); return pathStr; } else { return this.decodeValue(0); } } decodeVarint() { const firstByte = this.data[this.pos++]; if ((firstByte & 0x80) === 0) { return firstByte; } else if ((firstByte & 0xc0) === 0x80) { return ((firstByte & 0x3f) << 8) | this.data[this.pos++]; } else if ((firstByte & 0xe0) === 0xc0) { return (((firstByte & 0x1f) << 16) | (this.data[this.pos++] << 8) | this.data[this.pos++]); } else { return ((this.data[this.pos++] << 24) | (this.data[this.pos++] << 16) | (this.data[this.pos++] << 8) | this.data[this.pos++]); } } } /** * Encode data to PATHX binary format */ function pathxEncode(data, options = {}) { // Free and open source - no license required return new PathxEncoder(options).encode(data); } /** * Decode PATHX binary format to JavaScript object */ function pathxDecode(data, options = {}) { // Free and open source - no license required return new PathxDecoder(options).decode(data); } /** * Convert PATHX binary data to human-readable debug format * This does NOT impact performance of normal encode/decode operations */ function pathxToDebugString(data, options = {}) { try { const decoded = pathxDecode(data, { ...options, verifyChecksum: false }); return JSON.stringify(decoded, null, 2); } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); const hexPreview = Array.from(data.slice(0, Math.min(100, data.length))) .map((b) => b.toString(16).padStart(2, "0")) .join(" "); return (`PATHX Debug Info:\n` + `Size: ${data.length} bytes\n` + `Error: ${errorMsg}\n` + `Hex preview: ${hexPreview}${data.length > 100 ? "..." : ""}`); } } /** * Validate PATHX data integrity */ function validatePathx(data) { try { pathxDecode(data, { verifyChecksum: true }); return true; } catch { return false; } } exports.default = { encode: pathxEncode, decode: pathxDecode, toDebugString: pathxToDebugString, validate: validatePathx, }; //# sourceMappingURL=pathx.js.map