zon-format
Version:
ZON: The most token-efficient serialization format for LLMs - beats CSV, TOON, JSON, and all competitors
1,099 lines (1,098 loc) • 46.8 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ZonDecoder = void 0;
exports.decode = decode;
const constants_1 = require("./constants");
const exceptions_1 = require("./exceptions");
const utils_1 = require("./utils");
const versioning_1 = require("./versioning");
class ZonDecoder {
constructor(options = {}) {
var _a;
this.strict = (_a = options.strict) !== null && _a !== void 0 ? _a : true;
this.currentLine = 0;
}
/**
* Decodes ZON format string to original data structure.
*
* @param zonStr - ZON formatted string
* @param options - Optional decode options
* @returns Decoded data or DecodeResult if extractMetadata is true
*/
decode(zonStr, options) {
if (!zonStr) {
return {};
}
if (zonStr.length > constants_1.MAX_DOCUMENT_SIZE) {
throw new exceptions_1.ZonDecodeError(`[E301] Document size exceeds maximum (${constants_1.MAX_DOCUMENT_SIZE} bytes)`, { code: 'E301' });
}
const lines = this._splitByDelimiter(zonStr.trim(), '\n');
if (lines.length === 0) {
return {};
}
if (lines.length === 1) {
const line = lines[0].trim();
if (line.startsWith('[')) {
return this._parseZonNode(line);
}
const hasBlock = /^[a-zA-Z0-9_]+\s*[\{\[]/.test(line);
if (!line.includes(constants_1.META_SEPARATOR) && !line.startsWith(constants_1.TABLE_MARKER) && !hasBlock) {
return this._parseValue(line);
}
}
const metadata = {};
const tables = {};
let currentTable = null;
let currentTableName = null;
let pendingDictionaries = new Map();
for (let i = 0; i < lines.length; i++) {
this.currentLine = i + 1;
const line = lines[i];
const trimmedLine = line.trimEnd();
if (trimmedLine.length > constants_1.MAX_LINE_LENGTH) {
throw new exceptions_1.ZonDecodeError(`[E302] Line length exceeds maximum (${constants_1.MAX_LINE_LENGTH} chars)`, { code: 'E302', line: this.currentLine });
}
if (!trimmedLine) {
continue;
}
const dictMatch = trimmedLine.match(/^([\w\.]+)\[(\d+)\]:(.+)$/);
if (dictMatch && !trimmedLine.startsWith(constants_1.TABLE_MARKER)) {
const [, col, , vals] = dictMatch;
pendingDictionaries.set(col, vals.split(','));
continue;
}
if (trimmedLine.startsWith(constants_1.TABLE_MARKER)) {
const [tableName, tableInfo] = this._parseTableHeader(trimmedLine);
if (pendingDictionaries.size > 0) {
tableInfo.dictionaries = new Map(pendingDictionaries);
pendingDictionaries.clear();
}
currentTableName = tableName;
currentTable = tableInfo;
tables[currentTableName] = currentTable;
}
else if (currentTable !== null && currentTable.row_index < currentTable.expected_rows) {
const row = this._parseTableRow(trimmedLine, currentTable);
currentTable.rows.push(row);
if (currentTable.row_index >= currentTable.expected_rows) {
currentTable = null;
}
}
else {
let splitIdx = -1;
let splitChar = '';
let depth = 0;
let inQuote = false;
for (let j = 0; j < trimmedLine.length; j++) {
const char = trimmedLine[j];
if (char === '"')
inQuote = !inQuote;
if (!inQuote) {
if (char === '{' || char === '[')
depth++;
if (char === '}' || char === ']')
depth--;
if (depth === 1 && (char === '{' || char === '[')) {
if (splitIdx === -1) {
splitIdx = j;
splitChar = char;
break;
}
}
if (char === ':' && depth === 0) {
splitIdx = j;
splitChar = ':';
break;
}
}
}
if (splitIdx !== -1) {
let key;
let val;
if (splitChar === ':') {
key = trimmedLine.substring(0, splitIdx).trim();
val = trimmedLine.substring(splitIdx + 1).trim();
}
else {
key = trimmedLine.substring(0, splitIdx).trim();
val = trimmedLine.substring(splitIdx).trim();
}
if (!val && !trimmedLine.trim().endsWith('{') && !trimmedLine.trim().endsWith('[')) {
const currentIndent = line.search(/\S/);
if (i + 1 < lines.length) {
const nextIndent = lines[i + 1].search(/\S/);
if (nextIndent > currentIndent) {
const blockLines = [];
while (i + 1 < lines.length) {
const nextLine = lines[i + 1];
if (!nextLine.trim()) {
blockLines.push('');
i++;
this.currentLine = i + 1;
continue;
}
const nextLineIndent = nextLine.search(/\S/);
if (nextLineIndent <= currentIndent)
break;
blockLines.push(nextLine);
i++;
this.currentLine = i + 1;
}
const normalizedLines = blockLines.map((line, idx) => {
if (!line.trim())
return line;
const lineIndent = line.search(/\S/);
if (lineIndent === -1)
return line;
return lineIndent >= nextIndent ? line.substring(nextIndent) : line;
});
val = normalizedLines.join('\n');
}
}
}
if (val.startsWith(constants_1.TABLE_MARKER)) {
const [_, tableInfo] = this._parseTableHeader(val);
if (pendingDictionaries.size > 0) {
tableInfo.dictionaries = new Map(pendingDictionaries);
pendingDictionaries.clear();
}
currentTableName = key;
currentTable = tableInfo;
tables[currentTableName] = currentTable;
}
else {
currentTable = null;
metadata[key] = this._parseZonNode(val);
}
}
}
}
for (const [tableName, table] of Object.entries(tables)) {
if (this.strict && table.rows.length !== table.expected_rows) {
throw new exceptions_1.ZonDecodeError(`[E001] Row count mismatch in table '${tableName}': expected ${table.expected_rows}, got ${table.rows.length}`, { code: 'E001', context: `Table: ${tableName}` });
}
metadata[tableName] = this._reconstructTable(table);
}
const result = this._unflatten(metadata);
if (options === null || options === void 0 ? void 0 : options.extractMetadata) {
const meta = (0, versioning_1.extractVersion)(result);
if (meta) {
return {
data: (0, versioning_1.stripVersion)(result),
metadata: meta
};
}
else {
return {
data: result,
metadata: undefined
};
}
}
if (Object.keys(result).length === 1 && 'data' in result && Array.isArray(result.data)) {
return result.data;
}
return result;
}
/**
* Parses table header line.
*
* @param line - Header line to parse
* @returns Tuple of [tableName, tableInfo]
*/
_parseTableHeader(line) {
const v2NamedPattern = /^@(\w+)\((\d+)\)(\[\w+\])*:(.+)$/;
const v2NamedMatch = line.match(v2NamedPattern);
if (v2NamedMatch) {
const tableName = v2NamedMatch[1];
const count = parseInt(v2NamedMatch[2], 10);
const omittedStr = v2NamedMatch[3] || '';
const colsStr = v2NamedMatch[4];
const omittedCols = [];
if (omittedStr) {
const matches = omittedStr.matchAll(/\[(\w+)\]/g);
for (const m of matches) {
omittedCols.push(m[1]);
}
}
const rawCols = colsStr.split(',').map(c => c.trim());
const cols = [];
for (const rawCol of rawCols) {
cols.push(rawCol);
}
return [tableName, {
cols,
omittedCols,
rows: [],
prev_vals: Object.fromEntries(cols.map(col => [col, null])),
row_index: 0,
expected_rows: count
}];
}
const v2ValuePattern = /^@\((\d+)\)(\[\w+\])*:(.+)$/;
const v2ValueMatch = line.match(v2ValuePattern);
if (v2ValueMatch) {
const count = parseInt(v2ValueMatch[1], 10);
const omittedStr = v2ValueMatch[2] || '';
const colsStr = v2ValueMatch[3];
const omittedCols = [];
if (omittedStr) {
const matches = omittedStr.matchAll(/\[(\w+)\]/g);
for (const m of matches) {
omittedCols.push(m[1]);
}
}
const rawCols = colsStr.split(',').map(c => c.trim());
const cols = [];
for (const rawCol of rawCols) {
cols.push(rawCol);
}
return ['data', {
cols,
omittedCols,
rows: [],
prev_vals: Object.fromEntries(cols.map(col => [col, null])),
row_index: 0,
expected_rows: count
}];
}
const v2Pattern = /^@(\d+)(\[\w+\])*:(.+)$/;
const v2Match = line.match(v2Pattern);
if (v2Match) {
const count = parseInt(v2Match[1], 10);
const omittedStr = v2Match[2] || '';
const colsStr = v2Match[3];
const omittedCols = [];
if (omittedStr) {
const matches = omittedStr.matchAll(/\[(\w+)\]/g);
for (const m of matches) {
omittedCols.push(m[1]);
}
}
const rawCols = colsStr.split(',').map(c => c.trim());
const cols = [];
for (const rawCol of rawCols) {
cols.push(rawCol);
}
return ['data', {
cols,
omittedCols,
rows: [],
prev_vals: Object.fromEntries(cols.map(col => [col, null])),
row_index: 0,
expected_rows: count
}];
}
const v1Pattern = /^@(\w+)\((\d+)\):(.+)$/;
const v1Match = line.match(v1Pattern);
if (!v1Match) {
throw new exceptions_1.ZonDecodeError(`Invalid table header: ${line}`);
}
const tableName = v1Match[1];
const count = parseInt(v1Match[2], 10);
const colsStr = v1Match[3];
const rawCols = colsStr.split(',').map(c => c.trim());
const cols = [];
for (const rawCol of rawCols) {
cols.push(rawCol);
}
return [tableName, {
cols,
rows: [],
prev_vals: Object.fromEntries(cols.map(col => [col, null])),
row_index: 0,
expected_rows: count
}];
}
/**
* Parses a table row with sparse encoding support.
*
* @param line - Row line to parse
* @param table - Table information
* @returns Parsed row object
*/
_parseTableRow(line, table) {
const tokens = this._splitByDelimiter(line, ',');
const coreFieldCount = tokens.length;
let sparseFieldCount = 0;
for (let i = table.cols.length; i < tokens.length; i++) {
const tok = tokens[i];
if (tok.includes(':') && !this._isURL(tok) && !this._isTimestamp(tok)) {
sparseFieldCount++;
}
}
const actualCoreFields = Math.min(coreFieldCount, table.cols.length);
if (this.strict && coreFieldCount < table.cols.length && sparseFieldCount === 0) {
throw new exceptions_1.ZonDecodeError(`[E002] Field count mismatch on row ${table.row_index + 1}: expected ${table.cols.length} fields, got ${coreFieldCount}`, {
code: 'E002',
line: this.currentLine,
context: line.substring(0, 50) + (line.length > 50 ? '...' : '')
});
}
while (tokens.length < table.cols.length) {
tokens.push('');
}
const row = {};
let tokenIdx = 0;
for (const col of table.cols) {
if (tokenIdx < tokens.length) {
const tok = tokens[tokenIdx];
let val = this._parseZonNode(tok);
if (table.dictionaries && table.dictionaries.has(col) && typeof val === 'number') {
const dict = table.dictionaries.get(col);
if (val >= 0 && val < dict.length) {
val = dict[val];
}
}
row[col] = val;
tokenIdx++;
}
}
while (tokenIdx < tokens.length) {
const tok = tokens[tokenIdx];
if (tok.includes(':') && !this._isURL(tok) && !this._isTimestamp(tok)) {
const colonIdx = tok.indexOf(':');
const key = tok.substring(0, colonIdx).trim();
const val = tok.substring(colonIdx + 1).trim();
if (/^[a-zA-Z_][\w\.]*$/.test(key)) {
row[key] = this._parseValue(val);
}
}
tokenIdx++;
}
if (table.omittedCols) {
for (const col of table.omittedCols) {
row[col] = table.row_index + 1;
}
}
table.row_index++;
return row;
}
/**
* Checks if string is a URL.
*
* @param s - String to check
* @returns True if URL format
*/
_isURL(s) {
return /^https?:\/\//.test(s) || /^[\/]/.test(s);
}
/**
* Checks if string is a timestamp.
*
* @param s - String to check
* @returns True if timestamp format
*/
_isTimestamp(s) {
return /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/.test(s) || /^\d{2}:\d{2}:\d{2}/.test(s);
}
/**
* Reconstructs table from parsed rows.
*
* @param table - Table information
* @returns Array of reconstructed objects
*/
_reconstructTable(table) {
return table.rows.map(row => this._unflatten(row));
}
/**
* Recursively parses ZON nested structures.
*
* @param text - Text to parse
* @param depth - Current nesting depth
* @returns Parsed value
*/
_parseZonNode(text, depth = 0) {
if (depth > 100) {
throw new exceptions_1.ZonDecodeError('Maximum nesting depth exceeded (100)');
}
const trimmed = text.trim();
if (!trimmed) {
return null;
}
if (trimmed.startsWith('{') && trimmed.endsWith('}')) {
// Verify that the first brace matches the last brace
let depth = 0;
let matchIndex = -1;
let inQuote = false;
let quoteChar = '';
for (let i = 0; i < trimmed.length; i++) {
const char = trimmed[i];
if (['"', "'"].includes(char)) {
if (!inQuote) {
inQuote = true;
quoteChar = char;
}
else if (char === quoteChar) {
inQuote = false;
}
}
else if (!inQuote) {
if (char === '{')
depth++;
else if (char === '}') {
depth--;
if (depth === 0) {
matchIndex = i;
break;
}
}
}
}
// Only parse as single object if the matching brace is the last character
if (matchIndex === trimmed.length - 1) {
const content = trimmed.substring(1, trimmed.length - 1).trim();
if (!content) {
return {};
}
const obj = {};
const pairs = this._splitObjectProperties(content);
if (pairs.length > constants_1.MAX_OBJECT_KEYS) {
throw new exceptions_1.ZonDecodeError(`[E304] Object key count exceeds maximum (${constants_1.MAX_OBJECT_KEYS} keys)`, { code: 'E304' });
}
for (const pair of pairs) {
let keyStr;
let valStr;
let splitIdx = -1;
let splitChar = '';
let inQuote = false;
let quoteChar = '';
let depth = 0;
for (let i = 0; i < pair.length; i++) {
const char = pair[i];
if (char === '\\' && i + 1 < pair.length) {
i++;
continue;
}
if (['"', "'"].includes(char)) {
if (!inQuote) {
inQuote = true;
quoteChar = char;
}
else if (char === quoteChar) {
inQuote = false;
}
}
else if (!inQuote) {
if (char === ':') {
if (depth === 0) {
splitIdx = i;
splitChar = ':';
break;
}
}
else if (char === '{' || char === '[') {
if (depth === 0 && splitIdx === -1) {
splitIdx = i;
splitChar = char;
break;
}
depth++;
}
else if (char === '}' || char === ']') {
depth--;
}
}
}
if (splitIdx !== -1) {
if (splitChar === ':') {
keyStr = pair.substring(0, splitIdx).trim();
valStr = pair.substring(splitIdx + 1).trim();
}
else {
keyStr = pair.substring(0, splitIdx).trim();
valStr = pair.substring(splitIdx).trim();
}
}
else {
continue;
}
const key = this._parseValue(keyStr);
const val = this._parseZonNode(valStr, depth + 1);
obj[key] = val;
}
return obj;
}
}
if (trimmed.startsWith('[') && trimmed.endsWith(']')) {
const content = trimmed.substring(1, trimmed.length - 1).trim();
if (!content) {
return [];
}
const items = this._splitByDelimiter(content, ',', true);
if (items.length > constants_1.MAX_ARRAY_LENGTH) {
throw new exceptions_1.ZonDecodeError(`[E303] Array length exceeds maximum (${constants_1.MAX_ARRAY_LENGTH} items)`, { code: 'E303' });
}
return items.map(item => this._parseZonNode(item, depth + 1));
}
// Check for implicit structure (multiline or colon-prefixed or dash-prefixed)
if (!trimmed.startsWith('{') && !trimmed.startsWith('[') && (trimmed.includes('\n') || trimmed.startsWith(':') || trimmed.startsWith('- '))) {
// Check for dash-separated list (YAML-like)
// Use original text to preserve indentation
const lines = text.split('\n');
const validLines = lines.filter(l => l.trim().length > 0);
if (validLines.length > 0 && validLines[0].trim().startsWith('- ')) {
// It's a dash-separated list
const reconstructed = [];
let currentItemLines = [];
// Determine base indentation from the first dash line
let baseIndent = -1;
for (const line of validLines) {
if (line.trim().startsWith('- ')) {
baseIndent = line.search(/\S/);
break;
}
}
if (baseIndent === -1)
baseIndent = 0; // Should not happen given the check above
for (const line of validLines) {
const indent = line.search(/\S/);
const cleanLine = line.trim();
// Heuristic: If baseIndent is 0 (due to trim) and we have a complete previous line,
// and this line is indented, assume the indentation belongs to the block level (siblings).
if (baseIndent === 0 && currentItemLines.length === 1 && !currentItemLines[0].trim().endsWith(':')) {
if (indent > 0) {
baseIndent = indent;
}
}
// Check if this is a new item: must start with '- ' AND have same indentation as base
if (cleanLine.startsWith('- ') && indent === baseIndent) {
// Start of new item
if (currentItemLines.length > 0) {
reconstructed.push(this._parseZonNode(currentItemLines.join('\n'), depth + 1));
}
// Remove '- ' prefix (2 chars) and strip base indent
// The line is like " - item". indent=2. baseIndent=2.
// We want "item".
// line.substring(baseIndent + 2) might work, but we should trimStart?
// cleanLine.substring(2).trim() gives "item".
currentItemLines = [cleanLine.substring(2).trim()];
}
else {
// Continuation of current item
// Strip baseIndent characters to normalize indentation
let contentLine = line;
if (indent >= baseIndent) {
contentLine = line.substring(baseIndent);
}
else {
// Indent is less than base? Should not happen for valid nested content.
// Treat as is (or trim?)
contentLine = line.trim();
}
currentItemLines.push(contentLine);
}
}
if (currentItemLines.length > 0) {
reconstructed.push(this._parseZonNode(currentItemLines.join('\n'), depth + 1));
}
return reconstructed;
}
// Check for implicit object (newline or comma separated)
const objPairs = this._splitObjectProperties(trimmed);
if (objPairs.length > 1 || (objPairs.length === 1 && objPairs[0].includes(':'))) {
// It's an object
const obj = {};
for (const pair of objPairs) {
// Parse key:value
// Reuse logic from brace parsing?
// Or simple split by first colon?
// _splitObjectProperties returns "key: value" strings.
// We need to split by first colon, respecting quotes.
let keyStr;
let valStr;
let splitIdx = -1;
let inQuote = false;
let quoteChar = '';
for (let i = 0; i < pair.length; i++) {
const char = pair[i];
if (['"', "'"].includes(char)) {
if (!inQuote) {
inQuote = true;
quoteChar = char;
}
else if (char === quoteChar) {
inQuote = false;
}
}
else if (!inQuote && char === ':') {
splitIdx = i;
break;
}
}
if (splitIdx !== -1) {
keyStr = pair.substring(0, splitIdx).trim();
valStr = pair.substring(splitIdx + 1).trim();
// Handle implicit array item starting with dash in value?
// No, _parseZonNode handles that recursively.
const key = this._parseValue(keyStr);
const val = this._parseZonNode(valStr, depth + 1);
obj[key] = val;
}
}
return obj;
}
// Try splitting by comma ONLY first (for arrays)
const items = this._splitByDelimiter(trimmed, ',', false);
if (items.length > 1 || (items.length === 1 && items[0].startsWith(':'))) {
// Check if it looks like an object (all items are k:v)
let isObject = true;
const parsedItems = [];
for (const item of items) {
const cleanItem = item.trim();
if (cleanItem.startsWith(':')) {
// Array item marker
isObject = false;
parsedItems.push(this._parseZonNode(cleanItem.substring(1), depth + 1));
}
else {
// Check if item is an Implicit Object (multiline with multiple KVs)
// If an item contains newlines and looks like multiple KVs, then it's an object structure,
// so the parent must be an Array (list of objects).
if (cleanItem.includes('\n')) {
const subItems = this._splitByDelimiter(cleanItem, '\n', false); // Split by newline to check structure
// Filter empty lines
const validSubItems = subItems.filter(s => s.trim().length > 0);
if (validSubItems.length > 1) {
// Check if sub-items look like KVs
let allKVs = true;
for (const sub of validSubItems) {
const colonIdx = this._findDelimiter(sub.trim(), ':');
if (colonIdx === -1 || colonIdx === 0) {
allKVs = false;
break;
}
}
if (allKVs) {
// Item is an Implicit Object.
// So parent is an Array.
isObject = false;
parsedItems.push(this._parseZonNode(cleanItem, depth + 1));
continue;
}
}
}
// Check for k:v pattern
const colonIdx = this._findDelimiter(cleanItem, ':');
if (colonIdx === -1 || colonIdx === 0) {
isObject = false;
parsedItems.push(this._parseZonNode(cleanItem, depth + 1));
}
else {
// Potential KV
const key = cleanItem.substring(0, colonIdx).trim();
if (!/^[a-zA-Z_][\w\.]*$/.test(key)) {
isObject = false;
parsedItems.push(this._parseZonNode(cleanItem, depth + 1));
}
else {
// It's a KV pair
parsedItems.push(cleanItem);
}
}
}
}
if (isObject) {
// Parse as object
const obj = {};
for (const item of parsedItems) {
const colonIdx = this._findDelimiter(item, ':');
const keyStr = item.substring(0, colonIdx).trim();
const valStr = item.substring(colonIdx + 1).trim();
const key = this._parseValue(keyStr);
const val = this._parseZonNode(valStr, depth + 1);
obj[key] = val;
}
return obj;
}
else {
// Return array
return parsedItems;
}
}
else {
// Single item (no commas).
// Check if it's an Implicit Object (newline separated KVs).
const lines = this._splitByDelimiter(trimmed, '\n', false); // Split by newline
const validLines = lines.filter(l => l.trim().length > 0);
if (validLines.length > 1) {
// Check if all lines are KVs
let allKVs = true;
for (const line of validLines) {
const colonIdx = this._findDelimiter(line.trim(), ':');
if (colonIdx === -1 || colonIdx === 0) {
allKVs = false;
break;
}
}
if (allKVs) {
// Parse as Implicit Object
const obj = {};
for (const line of validLines) {
const colonIdx = this._findDelimiter(line.trim(), ':');
const keyStr = line.substring(0, colonIdx).trim();
const valStr = line.substring(colonIdx + 1).trim();
const key = this._parseValue(keyStr);
const val = this._parseZonNode(valStr, depth + 1);
obj[key] = val;
}
return obj;
}
}
}
}
return this._parseValue(trimmed);
}
/**
* Finds first occurrence of delimiter outside quotes.
*
* @param text - Text to search
* @param delim - Delimiter to find
* @returns Index of delimiter or -1
*/
_findDelimiter(text, delim) {
let inQuote = false;
let quoteChar = null;
let depth = 0;
for (let i = 0; i < text.length; i++) {
const char = text[i];
if (char === '\\' && i + 1 < text.length) {
i++;
continue;
}
if (['"', "'"].includes(char)) {
// Only treat as quote if we are not already in a quote AND
// (it's the first char OR the previous char was a delimiter/whitespace that implies start of value)
// Actually for _findDelimiter, we assume we are scanning a value from the start.
// So we only enter quote mode if i === 0 or we are strictly at the start of a value.
// But _findDelimiter is generic.
// Let's stick to: Only enter quote mode if we haven't seen non-whitespace content yet?
// But _findDelimiter doesn't track "seen content".
// Let's assume for _findDelimiter, if we hit a quote and we are NOT in a quote,
// it's a start quote ONLY IF it's at i=0.
// Wait, what if we are parsing `key="val"`? _findDelimiter might be called on `"val"`.
// If we are parsing `key=val's`, _findDelimiter called on `val's`.
if (!inQuote) {
// Only start quoting if we are at the beginning of the string (ignoring whitespace)
// Since we don't track whitespace easily here without lookbehind or extra state,
// let's check if the string up to i is empty/whitespace.
const prefix = text.substring(0, i);
if (prefix.trim().length === 0) {
inQuote = true;
quoteChar = char;
}
}
else if (char === quoteChar) {
inQuote = false;
quoteChar = null;
}
}
else if (!inQuote && depth === 0 && char === delim) {
return i;
}
if (!inQuote) {
if (char === '{' || char === '[')
depth++;
if (char === '}' || char === ']')
depth--;
}
}
return -1;
}
/**
* Splits text by delimiter while respecting quotes and nesting.
*
* @param text - Text to split
* @param delim - Delimiter character (default: ',')
* @param splitByNewline - Whether to treat newline as delimiter (default: false)
* @returns Array of split parts
*/
_splitByDelimiter(text, delim = ',', splitByNewline = false) {
const parts = [];
const current = [];
let inQuote = false;
let quoteChar = null;
let depth = 0;
for (let i = 0; i < text.length; i++) {
const char = text[i];
if (char === '\\' && i + 1 < text.length) {
current.push(char);
current.push(text[++i]);
continue;
}
if (['"', "'"].includes(char)) {
if (!inQuote) {
// Only start quoting if the current token is empty or just whitespace
if (current.every(c => c.trim() === '')) {
inQuote = true;
quoteChar = char;
current.push(char);
}
else {
// Treat as literal quote inside a word
current.push(char);
}
}
else if (char === quoteChar) {
if (i + 1 < text.length && text[i + 1] === quoteChar) {
// Handle escaped quote ("" or '')
current.push(char);
current.push(text[i + 1]);
i++;
}
else {
inQuote = false;
quoteChar = null;
current.push(char);
}
}
else {
current.push(char);
}
}
else if (!inQuote) {
if (['{', '['].includes(char)) {
depth++;
current.push(char);
}
else if (['}', ']'].includes(char)) {
depth--;
current.push(char);
}
else if ((char === delim || (splitByNewline && char === '\n')) && depth === 0) {
// Treat newline as delimiter if enabled
if (current.length > 0) {
// Only trim for comma delimiters, preserve whitespace for newline delimiters
const part = current.join('');
parts.push(delim === '\n' ? part : part.trim());
current.length = 0;
}
}
else {
current.push(char);
}
}
else {
current.push(char);
}
}
if (current.length > 0) {
const final = current.join('');
const trimmedFinal = final.trim();
if (trimmedFinal) {
// Only trim for comma delimiters, preserve whitespace for newline delimiters
parts.push(delim === '\n' ? final : trimmedFinal);
}
}
return parts;
}
/**
* Parses a primitive value.
*
* @param val - Value string to parse
* @returns Parsed primitive value
*/
/**
* Parses a value, handling primitives and nested ZON structures.
*
* @param val - Value string to parse
* @returns Parsed value
*/
_parseValue(val) {
const trimmedVal = val.trim();
const parsed = (0, utils_1.parseValue)(val);
if (trimmedVal.startsWith('"')) {
return parsed;
}
if (typeof parsed === 'string') {
// Don't recurse back to _parseZonNode to avoid infinite loops.
// _parseZonNode should handle structure before calling _parseValue.
return parsed;
}
return parsed;
}
/**
* Unflattens dictionary with dotted keys.
*
* @param d - Flattened dictionary
* @returns Unflattened object
*/
_unflatten(d) {
const result = {};
for (const [key, value] of Object.entries(d)) {
if (!key.includes('.')) {
result[key] = value;
continue;
}
const parts = key.split('.');
if (parts.some(p => p === '__proto__' || p === 'constructor' || p === 'prototype')) {
continue;
}
let target = result;
for (let i = 0; i < parts.length - 1; i++) {
const part = parts[i];
const nextPart = parts[i + 1];
if (/^\d+$/.test(nextPart)) {
const idx = parseInt(nextPart, 10);
if (!(part in target)) {
target[part] = [];
}
while (target[part].length <= idx) {
target[part].push({});
}
target = target[part][idx];
parts.splice(i + 1, 1);
break;
}
else {
if (!(part in target)) {
target[part] = {};
}
if (typeof target[part] === 'object' && !Array.isArray(target[part])) {
target = target[part];
}
else {
break;
}
}
}
const finalKey = parts[parts.length - 1];
if (!/^\d+$/.test(finalKey)) {
target[finalKey] = value;
}
}
return result;
}
/**
* Checks if quotes are balanced in string.
*/
_areQuotesBalanced(s) {
let inQuote = false;
let quoteChar = '';
for (let i = 0; i < s.length; i++) {
const char = s[i];
if (char === '\\' && i + 1 < s.length) {
i++;
continue;
}
if (['"', "'"].includes(char)) {
if (!inQuote) {
inQuote = true;
quoteChar = char;
}
else if (char === quoteChar) {
inQuote = false;
}
}
}
return !inQuote;
}
/**
* Splits object properties respecting indentation.
*
* @param text - Object content
* @returns Array of property strings
*/
_splitObjectProperties(text) {
// If no newlines, fall back to comma splitting
if (!text.includes('\n')) {
return this._splitByDelimiter(text, ',', true);
}
const lines = text.split('\n');
const properties = [];
let currentProperty = [];
let baseIndent = -1;
let braceDepth = 0;
let bracketDepth = 0;
let hasEnteredBrace = false;
let hasEnteredBracket = false;
for (const line of lines) {
if (!line.trim())
continue; // Skip empty lines
const indent = line.search(/\S/);
if (indent === -1)
continue; // Should be covered by trim check
if (baseIndent === -1)
baseIndent = indent;
// Heuristic: If baseIndent is 0 (due to trim) and we have a complete previous line,
// and this line is indented, assume the indentation belongs to the block level (siblings).
if (baseIndent === 0 && currentProperty.length === 1 && braceDepth === 0 && bracketDepth === 0) {
const prevLine = currentProperty[0].trim();
// Check if previous line looks complete (not ending in separator/opener) and has balanced quotes
if (!prevLine.endsWith(':') && !prevLine.endsWith('{') && !prevLine.endsWith('[') && !prevLine.endsWith(',') && this._areQuotesBalanced(prevLine)) {
if (indent > 0) {
baseIndent = indent;
}
}
}
// Split if:
// 1. At base indent AND not inside braces/brackets
// 2. OR baseIndent is 0 (trimmed), we are not inside braces/brackets, AND we have previously entered/exited a block (implies completion)
const isSplit = (indent === baseIndent || (baseIndent === 0 && (hasEnteredBrace || hasEnteredBracket))) && braceDepth === 0 && bracketDepth === 0;
if (isSplit) {
// New property at base level
if (currentProperty.length > 0) {
// Join and remove trailing comma
properties.push(currentProperty.join('\n').trim().replace(/,$/, ''));
}
currentProperty = [line];
// Reset block flags for new property
hasEnteredBrace = false;
hasEnteredBracket = false;
}
else {
// Continuation or nested
currentProperty.push(line);
}
// Update depths
let inQuote = false;
let quoteChar = '';
for (let i = 0; i < line.length; i++) {
const char = line[i];
if (char === '\\' && i + 1 < line.length) {
i++;
continue;
}
if (['"', "'"].includes(char)) {
if (!inQuote) {
inQuote = true;
quoteChar = char;
}
else if (char === quoteChar) {
inQuote = false;
}
}
else if (!inQuote) {
if (char === '{') {
braceDepth++;
hasEnteredBrace = true;
}
else if (char === '}')
braceDepth--;
else if (char === '[') {
bracketDepth++;
hasEnteredBracket = true;
}
else if (char === ']')
bracketDepth--;
}
}
}
if (currentProperty.length > 0) {
properties.push(currentProperty.join('\n').trim().replace(/,$/, ''));
}
return properties;
}
}
exports.ZonDecoder = ZonDecoder;
// End of class
/**
* Decodes ZON format string to original data v1.1.0.
*
* @param data - ZON format string
* @param options - Decode options
* @returns Decoded data or DecodeResult if extractMetadata is true
*/
function decode(data, options) {
return new ZonDecoder(options).decode(data, options);
}