UNPKG

@yogesh0333/yogiway

Version:

YOGIWAY Format - Ultra-compact, nested-aware data format for LLM prompts. Handles deeply nested JSON efficiently, 10-15% more efficient than TOON.

847 lines 31.8 kB
"use strict"; /** * YOGIWAY Format v2 - Ultra-compact, nested-aware data format for LLM prompts * Improved with circular reference detection, path compression, and adaptive flattening * FREE AND OPEN SOURCE - No license required */ Object.defineProperty(exports, "__esModule", { value: true }); exports.smartEncode = exports.fromYogiway = exports.toYogiway = exports.fromPathx = exports.toPathx = exports.pathxToYogiway = exports.yogiwayToPathx = exports.pathxDecode = exports.pathxEncode = void 0; exports.encode = encode; exports.decode = decode; /** * Compress path by abbreviating common patterns */ function compressPath(path) { // Abbreviate common patterns return path .replace(/level(\d+)/g, "l$1") // level1 -> l1 .replace(/data\./g, "d.") // data. -> d. .replace(/profile\./g, "p.") // profile. -> p. .replace(/personal\./g, "ps.") // personal. -> ps. .replace(/address\./g, "a.") // address. -> a. .replace(/config/gi, "cfg") // config -> cfg .replace(/metadata/gi, "meta") // metadata -> meta .replace(/preferences/gi, "pref") // preferences -> pref .replace(/notifications/gi, "notif"); // notifications -> notif } /** * Expand compressed path back to full path */ function expandPath(path) { // Reverse compression (simplified - full implementation would need mapping) return path .replace(/l(\d+)/g, "level$1") .replace(/^d\./, "data.") .replace(/\.d\./g, ".data.") .replace(/^p\./, "profile.") .replace(/\.p\./g, ".profile."); } /** * Find common path prefixes and create abbreviation map */ function createPathAbbreviationMap(paths) { const map = {}; const reverse = {}; // Find common prefixes const prefixCounts = {}; paths.forEach((path) => { const parts = path.split("."); for (let i = 1; i < parts.length; i++) { const prefix = parts.slice(0, i).join("."); prefixCounts[prefix] = (prefixCounts[prefix] || 0) + 1; } }); // Create abbreviations for common prefixes (used 3+ times) let abbrIndex = 0; const usedAbbrs = new Set(); Object.entries(prefixCounts) .filter(([_, count]) => count >= 3) .sort(([_, a], [__, b]) => b - a) // Most common first .forEach(([prefix]) => { const abbr = String.fromCharCode(97 + (abbrIndex % 26)); // a-z if (!usedAbbrs.has(abbr)) { map[prefix] = abbr; reverse[abbr] = prefix; usedAbbrs.add(abbr); abbrIndex++; } }); return { map, reverse }; } /** * Flatten an object with lazy path resolution for better performance */ function flattenObjectLazy(obj, prefix = "", maxDepth = 100, currentDepth = 0, visited = new WeakSet(), compressPaths = true, pathRefs) { // For deep nesting, use path references if available if (currentDepth > 5 && pathRefs) { const pathKey = prefix || "root"; if (pathRefs[pathKey]) { // Use cached path reference - defer full flattening const result = {}; // Only flatten if it's a simple value, otherwise defer if (typeof obj !== "object" || obj === null || Array.isArray(obj)) { return flattenObject(obj, prefix, maxDepth, currentDepth, visited, compressPaths); } result[pathRefs[pathKey]] = obj; return result; } } // Fall back to normal flattening return flattenObject(obj, prefix, maxDepth, currentDepth, visited, compressPaths); } /** * Flatten an object with circular reference detection and path compression */ function flattenObject(obj, prefix = "", maxDepth = 100, currentDepth = 0, visited = new WeakSet(), compressPaths = true) { if (currentDepth >= maxDepth) { return { [prefix || "value"]: JSON.stringify(obj) }; } // Circular reference detection if (typeof obj === "object" && obj !== null) { if (visited.has(obj)) { throw new Error(`Circular reference detected at path: ${prefix || "root"}`); } visited.add(obj); } const result = {}; try { if (obj === null || obj === undefined) { return { [prefix || "value"]: null }; } if (Array.isArray(obj)) { obj.forEach((item, index) => { const newPrefix = prefix ? `${prefix}[${index}]` : `[${index}]`; if (typeof item === "object" && item !== null && !Array.isArray(item)) { Object.assign(result, flattenObject(item, newPrefix, maxDepth, currentDepth + 1, visited, compressPaths)); } else { result[newPrefix] = item; } }); return result; } if (typeof obj === "object") { for (const [key, value] of Object.entries(obj)) { const newPrefix = prefix ? `${prefix}.${key}` : key; if (value === null || value === undefined) { result[newPrefix] = null; } else if (Array.isArray(value)) { if (value.length === 0) { result[newPrefix] = "[]"; } else if (value.every((item) => typeof item !== "object" || item === null)) { result[newPrefix] = value .map((v) => typeof v === "string" && (v.includes("\t") || v.includes("\n")) ? JSON.stringify(v) : String(v)) .join("|"); } else { value.forEach((item, index) => { const arrayPrefix = `${newPrefix}[${index}]`; if (typeof item === "object" && item !== null) { Object.assign(result, flattenObject(item, arrayPrefix, maxDepth, currentDepth + 1, visited, compressPaths)); } else { result[arrayPrefix] = item; } }); } } else if (typeof value === "object") { Object.assign(result, flattenObject(value, newPrefix, maxDepth, currentDepth + 1, visited, compressPaths)); } else { result[newPrefix] = value; } } return result; } return { [prefix || "value"]: obj }; } finally { // Remove from visited set when done with this branch if (typeof obj === "object" && obj !== null) { visited.delete(obj); } } } /** * Calculate if flattening is beneficial */ function shouldFlatten(paths, avgDepth) { // Don't flatten if average depth is too high (path overhead) if (avgDepth > 15) return false; // Don't flatten if too many unique paths (overhead) if (paths.length > 100) return false; // Calculate estimated size const pathOverhead = paths.reduce((sum, p) => sum + p.length, 0); const estimatedFlattenedSize = pathOverhead + paths.length * 10; // rough estimate // If paths are very long, flattening might not help if (pathOverhead > 5000) return false; return true; } /** * Create path reference map for repeated path prefixes */ function createPathReferenceMap(paths) { const pathRefs = {}; const refDefinitions = []; const prefixCounts = {}; const prefixToRef = {}; // Count prefix usage paths.forEach((path) => { const parts = path.split("."); for (let i = 1; i < parts.length; i++) { const prefix = parts.slice(0, i).join("."); prefixCounts[prefix] = (prefixCounts[prefix] || 0) + 1; } }); // Create references for prefixes used 3+ times let refIndex = 0; Object.entries(prefixCounts) .filter(([_, count]) => count >= 3) .sort(([_, a], [__, b]) => b - a) .forEach(([prefix]) => { const ref = `@p${refIndex}`; prefixToRef[prefix] = ref; refDefinitions.push(`${ref}=${prefix}`); refIndex++; }); // Map paths to use references paths.forEach((path) => { // Find longest matching prefix const parts = path.split("."); let bestRef; let bestPrefix; for (let i = parts.length - 1; i > 0; i--) { const prefix = parts.slice(0, i).join("."); if (prefixToRef[prefix]) { bestRef = prefixToRef[prefix]; bestPrefix = prefix; break; } } if (bestRef && bestPrefix) { const suffix = path.substring(bestPrefix.length + 1); pathRefs[path] = `${bestRef}.${suffix}`; } }); return { pathRefs, refDefinitions }; } /** * Get all unique paths from an array of objects with adaptive flattening */ function getAllPaths(items, maxDepth = 100, adaptiveFlattening = true, compressPaths = true, usePathReferences = true) { const pathSet = new Set(); const depths = []; items.forEach((item) => { const visited = new WeakSet(); const flattened = flattenObject(item, "", maxDepth, 0, visited, compressPaths); Object.keys(flattened).forEach((path) => { pathSet.add(path); depths.push(path.split(".").length + path.split("[").length - 1); }); }); const paths = Array.from(pathSet).sort(); const avgDepth = depths.length > 0 ? depths.reduce((a, b) => a + b, 0) / depths.length : 0; let pathMap; let pathRefs; let refDefinitions; let shouldFlattenResult = true; if (adaptiveFlattening) { shouldFlattenResult = shouldFlatten(paths, avgDepth); if (shouldFlattenResult) { if (usePathReferences) { // Create path references for repeated prefixes const refInfo = createPathReferenceMap(paths); pathRefs = refInfo.pathRefs; refDefinitions = refInfo.refDefinitions; } if (compressPaths) { // Create path abbreviation map const { map } = createPathAbbreviationMap(paths); pathMap = map; // Compress paths paths.forEach((path, idx) => { const compressed = compressPath(path); if (compressed !== path) { paths[idx] = compressed; } }); } } } return { paths, shouldFlatten: shouldFlattenResult, pathMap, pathRefs, refDefinitions, }; } /** * Encode JavaScript object to YOGIWAY format */ function encode(data, options = {}) { // Free and open source - no license required const { useTabs = true, abbreviateNames = true, includeTypes = false, maxDepth = 100, flattenNested = true, compressPaths = true, adaptiveFlattening = true, usePathReferences = true, mode = "performance", compress = false, lazyPaths = true, streamingChunkSize = 8192, } = options; const delimiter = useTabs ? "\t" : ","; const lines = []; // Handle streaming for large arrays if (Array.isArray(data) && data.length > streamingChunkSize && mode === "performance") { return encodeStreaming(data, options); } try { // Mode-based encoding optimizations if (mode === "minimal") { // Minimal mode: maximum compression return encodeMinimal(data, options); } else if (mode === "compatibility") { // Compatibility mode: more JSON-like return encodeCompatibility(data, options); } // Performance mode (default) if (Array.isArray(data)) { return encodeArray(data, "", delimiter, abbreviateNames, includeTypes, maxDepth, flattenNested, compressPaths, adaptiveFlattening, usePathReferences, lazyPaths); } if (typeof data === "object" && data !== null) { for (const [key, value] of Object.entries(data)) { if (Array.isArray(value)) { lines.push(encodeArray(value, abbreviateNames ? abbreviate(key) : key, delimiter, abbreviateNames, includeTypes, maxDepth, flattenNested, compressPaths, adaptiveFlattening, usePathReferences, lazyPaths)); } else if (typeof value === "object" && value !== null) { lines.push(encodeArray([value], abbreviateNames ? abbreviate(key) : key, delimiter, abbreviateNames, includeTypes, maxDepth, flattenNested, compressPaths, adaptiveFlattening, usePathReferences, lazyPaths)); } } } let result = lines.join("\n"); // Apply compression if requested if (compress && result.length > 100) { // Note: zlib compression would require a library like pako // For now, return uncompressed but mark for compression // In production, use pako or similar } return result; } catch (error) { if (error instanceof Error && error.message.includes("Circular reference")) { throw new Error(`YOGIWAY encoding failed: ${error.message}. Consider using JSON.stringify for circular structures.`); } throw error; } } /** * Encode in minimal mode - maximum compression */ function encodeMinimal(data, options) { return encode(data, { ...options, mode: "performance", compressPaths: true, usePathReferences: true, abbreviateNames: true, lazyPaths: true, }); } /** * Encode in compatibility mode - more JSON-like */ function encodeCompatibility(data, options) { return encode(data, { ...options, mode: "performance", compressPaths: false, usePathReferences: false, abbreviateNames: false, lazyPaths: false, }); } /** * Encode large arrays in streaming mode */ function encodeStreaming(data, options) { const chunkSize = options.streamingChunkSize || 8192; const chunks = []; for (let i = 0; i < data.length; i += chunkSize) { const chunk = data.slice(i, i + chunkSize); chunks.push(encode(chunk, { ...options, mode: "performance", lazyPaths: options.lazyPaths, })); } return chunks.join("\n---\n"); } function encodeArray(arr, name, delimiter, abbreviateNames, includeTypes, maxDepth, flattenNested, compressPaths, adaptiveFlattening, usePathReferences, lazyPaths = true) { if (arr.length === 0) return ""; let keys; let items; let pathMap; let pathRefs; let refDefinitions; let shouldFlattenResult = flattenNested; if (flattenNested && adaptiveFlattening) { const pathInfo = getAllPaths(arr, maxDepth, adaptiveFlattening, compressPaths, usePathReferences); keys = pathInfo.paths; shouldFlattenResult = pathInfo.shouldFlatten; pathMap = pathInfo.pathMap; pathRefs = pathInfo.pathRefs; refDefinitions = pathInfo.refDefinitions; if (shouldFlattenResult) { const visited = new WeakSet(); // Use lazy path resolution if enabled and path refs available if (lazyPaths && pathRefs) { items = arr.map((item) => flattenObjectLazy(item, "", maxDepth, 0, visited, compressPaths, pathRefs)); } else { items = arr.map((item) => flattenObject(item, "", maxDepth, 0, visited, compressPaths)); } } else { // Fall back to simple encoding const allKeys = new Set(); arr.forEach((item) => { if (typeof item === "object" && item !== null) { Object.keys(item).forEach((key) => allKeys.add(key)); } }); keys = Array.from(allKeys); items = arr; } } else if (flattenNested) { const pathInfo = getAllPaths(arr, maxDepth, false, compressPaths, usePathReferences); keys = pathInfo.paths; pathRefs = pathInfo.pathRefs; refDefinitions = pathInfo.refDefinitions; const visited = new WeakSet(); items = arr.map((item) => flattenObject(item, "", maxDepth, 0, visited, compressPaths)); } else { const allKeys = new Set(); arr.forEach((item) => { if (typeof item === "object" && item !== null) { Object.keys(item).forEach((key) => allKeys.add(key)); } }); keys = Array.from(allKeys); items = arr; } if (keys.length === 0) return ""; // Create field mapping (a, b, c, ...) const fieldMap = {}; keys.forEach((key, index) => { const letter = String.fromCharCode(97 + (index % 26)); if (index >= 26) { const firstLetter = String.fromCharCode(97 + Math.floor((index - 26) / 26)); const secondLetter = String.fromCharCode(97 + ((index - 26) % 26)); fieldMap[key] = firstLetter + secondLetter; } else { fieldMap[key] = letter; } }); // Build header with path notation const fieldLetters = keys.map((key) => fieldMap[key]).join(""); const header = name ? `${name}[${arr.length}]${fieldLetters}` : `[${arr.length}]${fieldLetters}`; // Add path mapping comment (for decoding) // Apply path references and compression const pathMappingParts = []; // Add path reference definitions first (if any) if (refDefinitions && refDefinitions.length > 0) { pathMappingParts.push(...refDefinitions); } // Add field mappings with path references keys.forEach((key, idx) => { const letter = fieldMap[key]; // Use path reference if available, otherwise use compressed/original path let mappedPath; if (pathRefs && pathRefs[key]) { mappedPath = pathRefs[key]; } else { mappedPath = compressPaths ? compressPath(key) : key; } pathMappingParts.push(`${letter}=${mappedPath}`); }); const pathMapping = pathMappingParts.join(";"); // Add type hints if requested if (includeTypes) { const types = keys.map((key) => { const sample = items.find((item) => item?.[key] !== undefined)?.[key]; return inferType(sample); }); const typeHint = `:${types.join("")}`; const rows = items.map((item) => { return keys .map((key) => { const value = item?.[key]; return formatValue(value, delimiter); }) .join(delimiter); }); return `${header}${typeHint}#${pathMapping}\n${rows.join("\n")}`; } // Build rows const rows = items.map((item) => { return keys .map((key) => { const value = item?.[key]; return formatValue(value, delimiter); }) .join(delimiter); }); return `${header}#${pathMapping}\n${rows.join("\n")}`; } function formatValue(value, delimiter, visited = new WeakSet()) { if (value === null || value === undefined) { return ""; } if (typeof value === "string") { let escaped = value .replace(/\\/g, "\\\\") .replace(/\t/g, "\\t") .replace(/\n/g, "\\n"); if (delimiter === "\t" && (escaped.includes("\t") || escaped.includes("\n"))) { return `"${escaped}"`; } if (delimiter === "," && (escaped.includes(",") || escaped.includes("\n"))) { return `"${escaped}"`; } return escaped; } if (typeof value === "number" || typeof value === "boolean") { return String(value); } if (Array.isArray(value)) { return value .map((v) => typeof v === "string" && (v.includes(delimiter) || v.includes("\n")) ? JSON.stringify(v) : String(v)) .join("|"); } if (typeof value === "object") { // Check for circular reference before stringifying if (visited.has(value)) { return "[Circular]"; } visited.add(value); try { return JSON.stringify(value); } catch (e) { if (e instanceof Error && e.message.includes("circular")) { return "[Circular]"; } throw e; } finally { visited.delete(value); } } return String(value); } function abbreviate(name) { if (name.length <= 3) return name; const first = name[0]; const rest = name.slice(1).replace(/[aeiou]/gi, ""); return (first + rest.slice(0, 2)).toLowerCase(); } function inferType(value) { if (value === null || value === undefined) return "s"; if (typeof value === "string") return "s"; if (typeof value === "number") return "n"; if (typeof value === "boolean") return "b"; if (Array.isArray(value)) return "a"; if (typeof value === "object") return "o"; return "s"; } /** * Restore nested structure from flattened paths */ function restoreNested(flatObj) { const result = {}; for (const [path, value] of Object.entries(flatObj)) { // Expand compressed paths const expandedPath = expandPath(path); const parts = expandedPath.split("."); let current = result; for (let i = 0; i < parts.length - 1; i++) { const part = parts[i]; const arrayMatch = part.match(/^(.+)\[(\d+)\]$/); if (arrayMatch) { const [, arrayName, indexStr] = arrayMatch; const index = parseInt(indexStr, 10); if (!current[arrayName]) { current[arrayName] = []; } if (!current[arrayName][index]) { current[arrayName][index] = {}; } current = current[arrayName][index]; } else { if (!current[part]) { current[part] = {}; } current = current[part]; } } const lastPart = parts[parts.length - 1]; const arrayMatch = lastPart.match(/^(.+)\[(\d+)\]$/); if (arrayMatch) { const [, arrayName, indexStr] = arrayMatch; const index = parseInt(indexStr, 10); if (!current[arrayName]) { current[arrayName] = []; } current[arrayName][index] = value; } else { current[lastPart] = value; } } return result; } /** * Decode YOGIWAY format to JavaScript object */ function decode(yogiway, options = {}) { // Free and open source - no license required const { restoreNested: shouldRestoreNested = true, maxDepth = 100 } = options; const lines = yogiway.trim().split("\n"); if (lines.length === 0) return {}; const result = {}; let i = 0; while (i < lines.length) { const line = lines[i]; if (!line || line.trim() === "") { i++; continue; } const headerMatch = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*)?\[(\d+)\]([a-z]+)(?::([a-z]+))?#(.+)$/); const headerMatchSimple = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*)?\[(\d+)\]([a-z]+)(?::([a-z]+))?$/); const match = headerMatch || headerMatchSimple; if (!match) { i++; continue; } const [, name, countStr, fieldLetters, typeHints, pathMappingStr] = match; const count = parseInt(countStr, 10); if (count > 10000) { throw new Error(`YOGIWAY decode error: Array count too large (${count}). Maximum allowed: 10000.`); } // Parse path mapping (including path references) const pathMapping = {}; const pathRefMap = {}; // @p0 -> full path if (pathMappingStr) { try { pathMappingStr.split(";").forEach((mapping) => { const [key, path] = mapping.split("="); if (key && path) { // Check if it's a path reference definition (@p0=path) if (key.startsWith("@p")) { pathRefMap[key] = path; } else { // Regular field mapping (a=path or a=@p0.suffix) let resolvedPath = path; // Resolve path references if (path.includes("@p")) { // Replace @p0.suffix with full path const refMatch = path.match(/^(@p\d+)(\.(.+))?$/); if (refMatch) { const ref = refMatch[1]; const suffix = refMatch[3] || ""; if (pathRefMap[ref]) { resolvedPath = suffix ? `${pathRefMap[ref]}.${suffix}` : pathRefMap[ref]; } } } pathMapping[key] = resolvedPath; } } }); } catch (e) { throw new Error(`YOGIWAY decode error: Invalid path mapping format. ${e instanceof Error ? e.message : String(e)}`); } } let delimiter = "\t"; if (i + 1 < lines.length) { const nextLine = lines[i + 1]; if (nextLine && !nextLine.includes("\t") && nextLine.includes(",")) { delimiter = ","; } } const fieldPaths = []; for (let j = 0; j < fieldLetters.length; j++) { const letter = fieldLetters[j]; const path = pathMapping[letter] || `field${j}`; fieldPaths.push(path); } const items = []; for (let rowNum = 0; rowNum < count && i + 1 < lines.length; rowNum++) { i++; const rowLine = lines[i]; if (!rowLine || rowLine.trim() === "") break; let values; if (delimiter === "\t") { values = rowLine.split("\t").map((v) => v.trim()); } else { values = parseRow(rowLine, delimiter, fieldPaths.length); } if (values.length !== fieldPaths.length) { throw new Error(`YOGIWAY decode error: Row ${rowNum + 1} has ${values.length} values but expected ${fieldPaths.length} fields.`); } const flatItem = {}; fieldPaths.forEach((path, idx) => { const value = values[idx] || ""; flatItem[path] = parseValue(value, typeHints?.[idx]); }); if (shouldRestoreNested && Object.keys(pathMapping).length > 0) { try { items.push(restoreNested(flatItem)); } catch (e) { throw new Error(`YOGIWAY decode error: Failed to restore nested structure. ${e instanceof Error ? e.message : String(e)}`); } } else { items.push(flatItem); } } const collectionName = name || "data"; if (result[collectionName]) { if (Array.isArray(result[collectionName])) { result[collectionName].push(...items); } else { result[collectionName] = [result[collectionName], ...items]; } } else { result[collectionName] = items.length === 1 ? items[0] : items; } i++; } return result; } function parseRow(row, delimiter, expectedFields) { const values = []; let current = ""; let inQuotes = false; let i = 0; while (i < row.length && values.length < expectedFields) { const char = row[i]; if (char === '"' && (i === 0 || row[i - 1] !== "\\")) { inQuotes = !inQuotes; i++; continue; } if (!inQuotes && char === delimiter) { values.push(current); current = ""; i++; continue; } if (char === "\\" && i + 1 < row.length) { const next = row[i + 1]; if (next === "t") { current += "\t"; i += 2; continue; } if (next === "n") { current += "\n"; i += 2; continue; } if (next === "\\") { current += "\\"; i += 2; continue; } } current += char; i++; } if (current !== "" || values.length < expectedFields) { values.push(current); } return values; } function parseValue(value, typeHint) { if (value === "") return null; if (value.includes("|") && !value.startsWith('"')) { return value.split("|").map((v) => parseValue(v.trim(), typeHint)); } if (typeHint === "n") { const num = Number(value); return isNaN(num) ? value : num; } if (typeHint === "b") { return value === "true" || value === "1"; } if (/^-?\d+\.?\d*$/.test(value)) { const num = Number(value); if (!isNaN(num)) return num; } if (value === "true" || value === "false") { return value === "true"; } if ((value.startsWith("{") && value.endsWith("}")) || (value.startsWith("[") && value.endsWith("]"))) { try { return JSON.parse(value); } catch { return value; } } return value; } // Re-export PATHX and converters var pathx_1 = require("./pathx"); Object.defineProperty(exports, "pathxEncode", { enumerable: true, get: function () { return pathx_1.pathxEncode; } }); Object.defineProperty(exports, "pathxDecode", { enumerable: true, get: function () { return pathx_1.pathxDecode; } }); var converter_1 = require("./converter"); Object.defineProperty(exports, "yogiwayToPathx", { enumerable: true, get: function () { return converter_1.yogiwayToPathx; } }); Object.defineProperty(exports, "pathxToYogiway", { enumerable: true, get: function () { return converter_1.pathxToYogiway; } }); Object.defineProperty(exports, "toPathx", { enumerable: true, get: function () { return converter_1.toPathx; } }); Object.defineProperty(exports, "fromPathx", { enumerable: true, get: function () { return converter_1.fromPathx; } }); Object.defineProperty(exports, "toYogiway", { enumerable: true, get: function () { return converter_1.toYogiway; } }); Object.defineProperty(exports, "fromYogiway", { enumerable: true, get: function () { return converter_1.fromYogiway; } }); Object.defineProperty(exports, "smartEncode", { enumerable: true, get: function () { return converter_1.smartEncode; } }); // Export default exports.default = { encode, decode }; //# sourceMappingURL=index.js.map