@yogesh0333/yogiway
Version:
YOGIWAY Format - Ultra-compact, nested-aware data format for LLM prompts. Handles deeply nested JSON efficiently, 10-15% more efficient than TOON.
787 lines • 29.6 kB
JavaScript
"use strict";
/**
* PATHX - Path-Optimized Binary Exchange Format
* Binary format optimized for deep nesting with path caching
* FREE AND OPEN SOURCE - No license required
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.pathxEncode = pathxEncode;
exports.pathxDecode = pathxDecode;
exports.pathxToDebugString = pathxToDebugString;
exports.validatePathx = validatePathx;
var PathxType;
(function (PathxType) {
PathxType[PathxType["NULL"] = 0] = "NULL";
PathxType[PathxType["BOOL_TRUE"] = 1] = "BOOL_TRUE";
PathxType[PathxType["BOOL_FALSE"] = 2] = "BOOL_FALSE";
PathxType[PathxType["INT8"] = 3] = "INT8";
PathxType[PathxType["INT16"] = 4] = "INT16";
PathxType[PathxType["INT32"] = 5] = "INT32";
PathxType[PathxType["INT64"] = 6] = "INT64";
PathxType[PathxType["FLOAT32"] = 7] = "FLOAT32";
PathxType[PathxType["FLOAT64"] = 8] = "FLOAT64";
PathxType[PathxType["STRING"] = 9] = "STRING";
PathxType[PathxType["LIST"] = 10] = "LIST";
PathxType[PathxType["DICT"] = 11] = "DICT";
PathxType[PathxType["BINARY"] = 12] = "BINARY";
PathxType[PathxType["PATH_REF"] = 13] = "PATH_REF";
PathxType[PathxType["SHORT_PATH"] = 14] = "SHORT_PATH";
PathxType[PathxType["NESTED_MAP"] = 15] = "NESTED_MAP";
PathxType[PathxType["SHORT_STRING"] = 16] = "SHORT_STRING";
PathxType[PathxType["TINY_INT"] = 17] = "TINY_INT";
PathxType[PathxType["EMPTY_LIST"] = 18] = "EMPTY_LIST";
PathxType[PathxType["EMPTY_DICT"] = 19] = "EMPTY_DICT";
PathxType[PathxType["TYPED_ARRAY"] = 20] = "TYPED_ARRAY";
PathxType[PathxType["CHECKSUM"] = 23] = "CHECKSUM";
PathxType[PathxType["STREAM_CHUNK"] = 24] = "STREAM_CHUNK";
})(PathxType || (PathxType = {}));
class PathxEncoder {
constructor(options = {}) {
this.pathCache = new Map();
this.encodedPaths = [];
this.currentPath = [];
this.compress = options.compress || false;
this.optimizePaths = options.optimizePaths !== false;
this.maxDepth = options.maxDepth || 1000;
this.checksum = options.checksum || false;
this.typedArrays = options.typedArrays !== false;
this.streamingChunkSize = options.streamingChunkSize || 8192;
this.streaming = options.streaming || false;
}
encode(data) {
// Reset state
this.pathCache.clear();
this.encodedPaths = [];
this.currentPath = [];
// Handle streaming for large arrays
if (this.streaming &&
Array.isArray(data) &&
data.length > this.streamingChunkSize) {
return this.encodeStreaming(data);
}
const result = [];
// Write magic and version
result.push(...Array.from(new TextEncoder().encode("PATH")));
result.push(0x01); // Version 1
// Encode main data
const dataResult = this.encodeValue(data, 0);
result.push(...dataResult);
// Add checksum if enabled
if (this.checksum) {
this.addChecksum(result);
}
const final = new Uint8Array(result);
// Compress if requested
if (this.compress && final.length > 100) {
// Note: zlib compression would require a library like pako
// For now, return uncompressed but mark for compression
// In production, use pako or similar
return final;
}
return final;
}
encodeStreaming(data) {
const result = [];
// Write streaming magic
result.push(...Array.from(new TextEncoder().encode("PATHS")));
result.push(0x01); // Version 1
// Write total count
this.encodeVarint(data.length, result);
// Encode in chunks
for (let i = 0; i < data.length; i += this.streamingChunkSize) {
const chunk = data.slice(i, i + this.streamingChunkSize);
const chunkData = this.encodeValue(chunk, 0);
// Write chunk size
this.encodeVarint(chunkData.length, result);
// Write chunk data
result.push(...chunkData);
}
return new Uint8Array(result);
}
addChecksum(result) {
// Simple checksum (CRC32 would be better but requires library)
// For now, use simple sum
let sum = 0;
for (let i = 0; i < result.length; i++) {
sum = (sum + result[i]) & 0xffffffff;
}
result.push(PathxType.CHECKSUM);
// Store as 4 bytes
result.push((sum >> 24) & 0xff);
result.push((sum >> 16) & 0xff);
result.push((sum >> 8) & 0xff);
result.push(sum & 0xff);
}
encodeValue(data, depth) {
if (depth > this.maxDepth) {
throw new Error("Maximum recursion depth exceeded");
}
const result = [];
if (data === null || data === undefined) {
result.push(PathxType.NULL);
}
else if (typeof data === "boolean") {
result.push(data ? PathxType.BOOL_TRUE : PathxType.BOOL_FALSE);
}
else if (typeof data === "number") {
if (Number.isInteger(data)) {
this.encodeInt(data, result);
}
else {
this.encodeFloat(data, result);
}
}
else if (typeof data === "string") {
this.encodeString(data, result);
}
else if (data instanceof Uint8Array || data instanceof ArrayBuffer) {
this.encodeBinary(data, result);
}
else if (Array.isArray(data)) {
this.encodeList(data, result, depth);
}
else if (typeof data === "object") {
this.encodeDict(data, result, depth);
}
else {
// Fallback to JSON string
const jsonStr = JSON.stringify(data);
this.encodeString(jsonStr, result);
}
return new Uint8Array(result);
}
encodeInt(value, result) {
if (value >= -16 && value <= 15) {
result.push(PathxType.TINY_INT);
result.push((value + 16) & 0xff);
}
else if (value >= -128 && value <= 127) {
result.push(PathxType.INT8);
result.push(value & 0xff);
}
else if (value >= -32768 && value <= 32767) {
result.push(PathxType.INT16);
result.push((value >> 8) & 0xff);
result.push(value & 0xff);
}
else if (value >= -2147483648 && value <= 2147483647) {
result.push(PathxType.INT32);
result.push((value >> 24) & 0xff);
result.push((value >> 16) & 0xff);
result.push((value >> 8) & 0xff);
result.push(value & 0xff);
}
else {
result.push(PathxType.INT64);
const high = Math.floor(value / 0x100000000);
const low = value >>> 0;
for (let i = 7; i >= 0; i--) {
result.push(i >= 4 ? (high >> ((i - 4) * 8)) & 0xff : (low >> (i * 8)) & 0xff);
}
}
}
encodeFloat(value, result) {
result.push(PathxType.FLOAT64);
const buffer = new ArrayBuffer(8);
const view = new DataView(buffer);
view.setFloat64(0, value, false); // big-endian
result.push(...new Uint8Array(buffer));
}
encodeString(value, result) {
const utf8 = new TextEncoder().encode(value);
const length = utf8.length;
if (length <= 15) {
result.push(PathxType.SHORT_STRING);
result.push(length);
result.push(...utf8);
}
else {
result.push(PathxType.STRING);
this.encodeVarint(length, result);
result.push(...utf8);
}
}
encodeBinary(data, result) {
const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
result.push(PathxType.BINARY);
this.encodeVarint(bytes.length, result);
result.push(...bytes);
}
encodeList(value, result, depth) {
if (value.length === 0) {
result.push(PathxType.EMPTY_LIST);
return;
}
// Check for typed array optimization
if (this.typedArrays && value.length > 10) {
const arrayType = this.detectArrayType(value);
if (arrayType) {
this.encodeTypedArray(value, arrayType, result);
return;
}
}
result.push(PathxType.LIST);
this.encodeVarint(value.length, result);
for (let i = 0; i < value.length; i++) {
this.currentPath.push(`[${i}]`);
const itemData = this.encodeValue(value[i], depth + 1);
result.push(...itemData);
this.currentPath.pop();
}
}
detectArrayType(value) {
if (value.length === 0)
return null;
const firstType = typeof value[0];
// Check if all elements have same type
if (!value.every((v) => typeof v === firstType)) {
return null;
}
if (firstType === "number") {
// Check if all are integers
if (!value.every((v) => Number.isInteger(v))) {
return "float64";
}
// Check integer range
const allInRange = (min, max) => value.every((v) => v >= min && v <= max);
if (allInRange(-128, 127))
return "int8";
if (allInRange(-32768, 32767))
return "int16";
if (allInRange(-2147483648, 2147483647))
return "int32";
return "int64";
}
else if (firstType === "boolean") {
return "bool";
}
return null;
}
encodeTypedArray(value, arrayType, result) {
result.push(PathxType.TYPED_ARRAY);
// Type map: int8=0, int16=1, int32=2, int64=3, float64=4, bool=5
const typeMap = {
int8: 0,
int16: 1,
int32: 2,
int64: 3,
float64: 4,
bool: 5,
};
result.push(typeMap[arrayType]);
this.encodeVarint(value.length, result);
// Encode based on type
if (arrayType === "int8") {
value.forEach((v) => result.push((v + 128) & 0xff));
}
else if (arrayType === "int16") {
value.forEach((v) => {
const val = v < 0 ? v + 65536 : v;
result.push((val >> 8) & 0xff);
result.push(val & 0xff);
});
}
else if (arrayType === "int32") {
value.forEach((v) => {
const val = v < 0 ? v + 0x100000000 : v;
result.push((val >> 24) & 0xff);
result.push((val >> 16) & 0xff);
result.push((val >> 8) & 0xff);
result.push(val & 0xff);
});
}
else if (arrayType === "int64") {
value.forEach((v) => {
const high = Math.floor(v / 0x100000000);
const low = v >>> 0;
for (let i = 7; i >= 0; i--) {
result.push(i >= 4 ? (high >> ((i - 4) * 8)) & 0xff : (low >> (i * 8)) & 0xff);
}
});
}
else if (arrayType === "float64") {
const buffer = new ArrayBuffer(8);
const view = new DataView(buffer);
value.forEach((v) => {
view.setFloat64(0, v, false);
for (let i = 0; i < 8; i++) {
result.push(view.getUint8(i));
}
});
}
else if (arrayType === "bool") {
value.forEach((v) => result.push(v ? 1 : 0));
}
}
encodeDict(data, result, depth) {
if (Object.keys(data).length === 0) {
result.push(PathxType.EMPTY_DICT);
return;
}
// Use nested map optimization for deep structures
if (this.optimizePaths && depth > 2 && this.isRegularStructure(data)) {
result.push(PathxType.NESTED_MAP);
this.encodeVarint(Object.keys(data).length, result);
for (const [key, value] of Object.entries(data)) {
const fullPath = this.currentPath.length > 0
? `${this.currentPath.join(".")}.${key}`
: key;
this.encodePathKey(fullPath, result);
this.currentPath.push(key);
const valueData = this.encodeValue(value, depth + 1);
result.push(...valueData);
this.currentPath.pop();
}
}
else {
result.push(PathxType.DICT);
this.encodeVarint(Object.keys(data).length, result);
for (const [key, value] of Object.entries(data)) {
this.encodeString(key, result);
this.currentPath.push(key);
const valueData = this.encodeValue(value, depth + 1);
result.push(...valueData);
this.currentPath.pop();
}
}
}
encodePathKey(fullPath, result) {
if (!this.optimizePaths) {
this.encodeString(fullPath, result);
return;
}
// Check if we've seen this path before
if (this.pathCache.has(fullPath)) {
result.push(PathxType.PATH_REF);
this.encodeVarint(this.pathCache.get(fullPath), result);
}
else {
// Encode new path
const pathIndex = this.encodedPaths.length;
this.pathCache.set(fullPath, pathIndex);
this.encodedPaths.push(fullPath);
const utf8 = new TextEncoder().encode(fullPath);
if (utf8.length <= 15) {
result.push(PathxType.SHORT_PATH);
result.push(utf8.length);
result.push(...utf8);
}
else {
this.encodeString(fullPath, result);
}
}
}
isRegularStructure(data) {
if (Object.keys(data).length === 0)
return false;
const sampleValues = Object.values(data).slice(0, 3);
return sampleValues.every((v) => v === null ||
typeof v === "object" ||
typeof v === "string" ||
typeof v === "number" ||
typeof v === "boolean" ||
Array.isArray(v));
}
encodeVarint(value, result) {
if (value <= 0x7f) {
result.push(value);
}
else if (value <= 0x3fff) {
result.push(0x80 | (value >> 8));
result.push(value & 0xff);
}
else if (value <= 0x3fffff) {
result.push(0xc0 | (value >> 16));
result.push((value >> 8) & 0xff);
result.push(value & 0xff);
}
else {
result.push(0xe0);
result.push((value >> 24) & 0xff);
result.push((value >> 16) & 0xff);
result.push((value >> 8) & 0xff);
result.push(value & 0xff);
}
}
writeBytes(target, offset, source) {
target.set(source, offset);
return offset + source.length;
}
}
class PathxDecoder {
constructor(options = {}) {
this.data = new Uint8Array(0);
this.pos = 0;
this.decodedPaths = [];
this.maxDepth = options.maxDepth || 1000;
this.partialDecode = options.partialDecode || false;
this.verifyChecksum = options.verifyChecksum !== false;
}
decode(data) {
try {
return this.decodeRobust(data);
}
catch (error) {
if (this.partialDecode) {
return this.decodePartial(data, error);
}
throw error;
}
}
decodeRobust(data) {
if (data.length < 5) {
throw new Error("Invalid PATHX format: too short");
}
// Check for streaming format
if (data.length >= 5 &&
new TextDecoder().decode(data.slice(0, 5)) === "PATHS") {
return this.decodeStreaming(data);
}
const magic = new TextDecoder().decode(data.slice(0, 4));
if (magic !== "PATH") {
throw new Error("Invalid PATHX format: bad magic");
}
const version = data[4];
if (version !== 0x01 && version !== 0x02) {
throw new Error(`Unsupported PATHX version: ${version}`);
}
this.data = data.slice(5);
this.pos = 0;
this.decodedPaths = [];
// Check for checksum
if (this.verifyChecksum && this.data.length > 5) {
const lastByte = this.data[this.data.length - 5];
if (lastByte === PathxType.CHECKSUM) {
const storedChecksum = (this.data[this.data.length - 4] << 24) |
(this.data[this.data.length - 3] << 16) |
(this.data[this.data.length - 2] << 8) |
this.data[this.data.length - 1];
// Calculate checksum
let sum = 0;
for (let i = 0; i < this.data.length - 5; i++) {
sum = (sum + this.data[i]) & 0xffffffff;
}
if (sum !== storedChecksum) {
throw new Error("PATHX checksum verification failed");
}
// Remove checksum from data
this.data = this.data.slice(0, -5);
}
}
return this.decodeValue(0);
}
decodeStreaming(data) {
this.data = data.slice(5); // Skip 'PATHS'
this.pos = 0;
const totalCount = this.decodeVarint();
const result = [];
while (this.pos < this.data.length && result.length < totalCount) {
const chunkSize = this.decodeVarint();
if (this.pos + chunkSize > this.data.length) {
break;
}
const chunkData = this.data.slice(this.pos, this.pos + chunkSize);
this.pos += chunkSize;
try {
const chunkDecoder = new PathxDecoder({ maxDepth: this.maxDepth });
const chunkResult = chunkDecoder.decodeRobust(new Uint8Array([
...Array.from(new TextEncoder().encode("PATH")),
0x01,
...Array.from(chunkData),
]));
if (Array.isArray(chunkResult)) {
result.push(...chunkResult);
}
else {
result.push(chunkResult);
}
}
catch (e) {
if (!this.partialDecode)
throw e;
break;
}
}
return result;
}
decodePartial(data, error) {
// Try to decode as much as possible
try {
const partial = this.decodeRobust(data.slice(0, Math.min(data.length, 1000)));
return { error: error.message, partial_data: partial };
}
catch {
return {
error: error.message,
partial_data: null,
raw_size: data.length,
};
}
}
decodeValue(depth) {
if (depth > this.maxDepth) {
throw new Error("Maximum recursion depth exceeded");
}
if (this.pos >= this.data.length) {
throw new Error("Unexpected end of data");
}
const typeByte = this.data[this.pos++];
switch (typeByte) {
case PathxType.NULL:
return null;
case PathxType.BOOL_TRUE:
return true;
case PathxType.BOOL_FALSE:
return false;
case PathxType.TINY_INT:
return this.data[this.pos++] - 16;
case PathxType.INT8:
return (this.data[this.pos++] << 24) >> 24; // Sign extend
case PathxType.INT16:
return ((((this.data[this.pos++] << 8) | this.data[this.pos++]) << 16) >> 16);
case PathxType.INT32:
return ((this.data[this.pos++] << 24) |
(this.data[this.pos++] << 16) |
(this.data[this.pos++] << 8) |
this.data[this.pos++]);
case PathxType.INT64:
// Simplified - full 64-bit handling would be more complex
const high = (this.data[this.pos++] << 24) |
(this.data[this.pos++] << 16) |
(this.data[this.pos++] << 8) |
this.data[this.pos++];
const low = (this.data[this.pos++] << 24) |
(this.data[this.pos++] << 16) |
(this.data[this.pos++] << 8) |
this.data[this.pos++];
return high * 0x100000000 + low;
case PathxType.FLOAT32:
case PathxType.FLOAT64:
const buffer = new ArrayBuffer(typeByte === PathxType.FLOAT32 ? 4 : 8);
const view = new DataView(buffer);
for (let i = 0; i < buffer.byteLength; i++) {
view.setUint8(i, this.data[this.pos++]);
}
return typeByte === PathxType.FLOAT32
? view.getFloat32(0, false)
: view.getFloat64(0, false);
case PathxType.SHORT_STRING:
const shortLen = this.data[this.pos++];
const shortStr = new TextDecoder().decode(this.data.slice(this.pos, this.pos + shortLen));
this.pos += shortLen;
return shortStr;
case PathxType.STRING:
const len = this.decodeVarint();
const str = new TextDecoder().decode(this.data.slice(this.pos, this.pos + len));
this.pos += len;
return str;
case PathxType.BINARY:
const binLen = this.decodeVarint();
const bin = this.data.slice(this.pos, this.pos + binLen);
this.pos += binLen;
return bin;
case PathxType.EMPTY_LIST:
return [];
case PathxType.LIST:
const listLen = this.decodeVarint();
return Array.from({ length: listLen }, () => this.decodeValue(depth + 1));
case PathxType.EMPTY_DICT:
return {};
case PathxType.DICT:
const dictLen = this.decodeVarint();
const dict = {};
for (let i = 0; i < dictLen; i++) {
const key = this.decodeValue(depth + 1);
const value = this.decodeValue(depth + 1);
dict[String(key)] = value;
}
return dict;
case PathxType.NESTED_MAP:
const mapLen = this.decodeVarint();
const map = {};
for (let i = 0; i < mapLen; i++) {
const key = this.decodePathKey();
const value = this.decodeValue(depth + 1);
map[key] = value;
}
return map;
case PathxType.PATH_REF:
const refIndex = this.decodeVarint();
if (refIndex >= this.decodedPaths.length) {
throw new Error(`Invalid path reference: ${refIndex}`);
}
return this.decodedPaths[refIndex];
case PathxType.SHORT_PATH:
const pathLen = this.data[this.pos++];
const pathStr = new TextDecoder().decode(this.data.slice(this.pos, this.pos + pathLen));
this.pos += pathLen;
this.decodedPaths.push(pathStr);
return pathStr;
case PathxType.TYPED_ARRAY:
return this.decodeTypedArray();
default:
throw new Error(`Unknown type byte: 0x${typeByte.toString(16).padStart(2, "0")}`);
}
}
decodeTypedArray() {
const typeByte = this.data[this.pos++];
const length = this.decodeVarint();
const typeMap = {
0: "int8",
1: "int16",
2: "int32",
3: "int64",
4: "float64",
5: "bool",
};
const arrayType = typeMap[typeByte] || "int32";
const result = [];
if (arrayType === "int8") {
for (let i = 0; i < length; i++) {
result.push(this.data[this.pos++] - 128);
}
}
else if (arrayType === "int16") {
for (let i = 0; i < length; i++) {
const val = (this.data[this.pos++] << 8) | this.data[this.pos++];
result.push(val > 32767 ? val - 65536 : val);
}
}
else if (arrayType === "int32") {
for (let i = 0; i < length; i++) {
const val = (this.data[this.pos++] << 24) |
(this.data[this.pos++] << 16) |
(this.data[this.pos++] << 8) |
this.data[this.pos++];
result.push(val > 2147483647 ? val - 0x100000000 : val);
}
}
else if (arrayType === "int64") {
for (let i = 0; i < length; i++) {
const high = (this.data[this.pos++] << 24) |
(this.data[this.pos++] << 16) |
(this.data[this.pos++] << 8) |
this.data[this.pos++];
const low = (this.data[this.pos++] << 24) |
(this.data[this.pos++] << 16) |
(this.data[this.pos++] << 8) |
this.data[this.pos++];
result.push(high * 0x100000000 + low);
}
}
else if (arrayType === "float64") {
const buffer = new ArrayBuffer(8);
const view = new DataView(buffer);
for (let i = 0; i < length; i++) {
for (let j = 0; j < 8; j++) {
view.setUint8(j, this.data[this.pos++]);
}
result.push(view.getFloat64(0, false));
}
}
else if (arrayType === "bool") {
for (let i = 0; i < length; i++) {
result.push(this.data[this.pos++] !== 0);
}
}
return result;
}
decodePathKey() {
if (this.pos >= this.data.length) {
throw new Error("Unexpected end of data");
}
const typeByte = this.data[this.pos];
if (typeByte === PathxType.PATH_REF) {
this.pos++;
const refIndex = this.decodeVarint();
if (refIndex >= this.decodedPaths.length) {
throw new Error(`Invalid path reference: ${refIndex}`);
}
return this.decodedPaths[refIndex];
}
else if (typeByte === PathxType.SHORT_PATH) {
this.pos++;
const pathLen = this.data[this.pos++];
const pathStr = new TextDecoder().decode(this.data.slice(this.pos, this.pos + pathLen));
this.pos += pathLen;
this.decodedPaths.push(pathStr);
return pathStr;
}
else {
return this.decodeValue(0);
}
}
decodeVarint() {
const firstByte = this.data[this.pos++];
if ((firstByte & 0x80) === 0) {
return firstByte;
}
else if ((firstByte & 0xc0) === 0x80) {
return ((firstByte & 0x3f) << 8) | this.data[this.pos++];
}
else if ((firstByte & 0xe0) === 0xc0) {
return (((firstByte & 0x1f) << 16) |
(this.data[this.pos++] << 8) |
this.data[this.pos++]);
}
else {
return ((this.data[this.pos++] << 24) |
(this.data[this.pos++] << 16) |
(this.data[this.pos++] << 8) |
this.data[this.pos++]);
}
}
}
/**
* Encode data to PATHX binary format
*/
function pathxEncode(data, options = {}) {
// Free and open source - no license required
return new PathxEncoder(options).encode(data);
}
/**
* Decode PATHX binary format to JavaScript object
*/
function pathxDecode(data, options = {}) {
// Free and open source - no license required
return new PathxDecoder(options).decode(data);
}
/**
* Convert PATHX binary data to human-readable debug format
* This does NOT impact performance of normal encode/decode operations
*/
function pathxToDebugString(data, options = {}) {
try {
const decoded = pathxDecode(data, { ...options, verifyChecksum: false });
return JSON.stringify(decoded, null, 2);
}
catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
const hexPreview = Array.from(data.slice(0, Math.min(100, data.length)))
.map((b) => b.toString(16).padStart(2, "0"))
.join(" ");
return (`PATHX Debug Info:\n` +
`Size: ${data.length} bytes\n` +
`Error: ${errorMsg}\n` +
`Hex preview: ${hexPreview}${data.length > 100 ? "..." : ""}`);
}
}
/**
* Validate PATHX data integrity
*/
function validatePathx(data) {
try {
pathxDecode(data, { verifyChecksum: true });
return true;
}
catch {
return false;
}
}
exports.default = {
encode: pathxEncode,
decode: pathxDecode,
toDebugString: pathxToDebugString,
validate: validatePathx,
};
//# sourceMappingURL=pathx.js.map