UNPKG

@zenoaihq/tson

Version:

Token-efficient Structured Object Notation – a compact serialization format designed for efficient data exchange with LLMs

482 lines (479 loc) 13.7 kB
// src/utils.ts var SPECIAL_CHARS = /* @__PURE__ */ new Set([",", "|", "@", "#", "{", "}", "[", "]", "\n", "\r", " ", " "]); function needsQuoting(value) { if (value.length === 0) { return true; } if (value === "true" || value === "false" || value === "null") { return true; } if (value[0].trim() === "" || value[value.length - 1].trim() === "") { return true; } if (looksLikeNumber(value)) { return true; } for (const char of value) { if (SPECIAL_CHARS.has(char)) { return true; } } return false; } function looksLikeNumber(value) { if (value.length === 0) { return false; } const num = Number(value); return !isNaN(num) && value.trim() === value && isFinite(num); } function escapeString(value) { return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r").replace(/\t/g, "\\t"); } function unescapeString(value) { return value.replace(/\\t/g, " ").replace(/\\r/g, "\r").replace(/\\n/g, "\n").replace(/\\"/g, '"').replace(/\\\\/g, "\\"); } function formatPrimitive(value) { if (value === null) { return "null"; } if (typeof value === "boolean") { return value ? "true" : "false"; } if (typeof value === "number") { return String(value); } if (typeof value === "string") { if (needsQuoting(value)) { return `"${escapeString(value)}"`; } return value; } throw new Error(`Cannot format non-primitive type: ${typeof value}`); } function parsePrimitive(value) { const trimmed = value.trim(); if (trimmed.length === 0) { return ""; } if (trimmed === "true") { return true; } if (trimmed === "false") { return false; } if (trimmed === "null") { return null; } if (trimmed.startsWith('"') && trimmed.endsWith('"')) { return unescapeString(trimmed.slice(1, -1)); } if (looksLikeNumber(trimmed)) { const num = Number(trimmed); if (!isNaN(num)) { return num; } } return trimmed; } function isUniformObjectArray(data) { if (!Array.isArray(data) || data.length === 0) { return false; } if (!data.every((item) => isPlainObject(item))) { return false; } const firstKeys = Object.keys(data[0]); for (let i = 1; i < data.length; i++) { const keys = Object.keys(data[i]); if (keys.length !== firstKeys.length) { return false; } for (let j = 0; j < keys.length; j++) { if (keys[j] !== firstKeys[j]) { return false; } } } return true; } function isPlainObject(value) { return typeof value === "object" && value !== null && !Array.isArray(value) && Object.prototype.toString.call(value) === "[object Object]"; } function splitByDelimiter(text, delimiter) { const result = []; const current = []; let inQuotes = false; let escapeNext = false; let depthCurly = 0; let depthSquare = 0; let depthParen = 0; for (const char of text) { if (escapeNext) { current.push(char); escapeNext = false; continue; } if (char === "\\") { current.push(char); escapeNext = true; continue; } if (char === '"') { inQuotes = !inQuotes; current.push(char); continue; } if (inQuotes) { current.push(char); continue; } if (char === "{") { depthCurly++; current.push(char); } else if (char === "}") { depthCurly--; current.push(char); } else if (char === "[") { depthSquare++; current.push(char); } else if (char === "]") { depthSquare--; current.push(char); } else if (char === "(") { depthParen++; current.push(char); } else if (char === ")") { depthParen--; current.push(char); } else if (char === delimiter && depthCurly === 0 && depthSquare === 0 && depthParen === 0) { result.push(current.join("").trim()); current.length = 0; } else { current.push(char); } } if (current.length > 0) { result.push(current.join("").trim()); } return result; } function parseKeySchema(keyString) { const trimmed = keyString.trim(); if (!trimmed.includes("(")) { return { keyName: trimmed, schema: null }; } const parenIdx = trimmed.indexOf("("); const keyName = trimmed.slice(0, parenIdx).trim(); if (!trimmed.endsWith(")")) { throw new Error(`Invalid key schema syntax: ${keyString}`); } let schemaStr = trimmed.slice(parenIdx + 1, -1).trim(); if (schemaStr.startsWith("@")) { schemaStr = schemaStr.slice(1); } const schemaKeys = splitByDelimiter(schemaStr, ","); return { keyName, schema: schemaKeys }; } function buildSchemaMap(keys) { const schemaMap = {}; for (const key of keys) { const { keyName, schema } = parseKeySchema(key); schemaMap[keyName] = schema; } return schemaMap; } function parseKeys(keysStr) { if (keysStr.includes("#")) { const lastHashIdx = keysStr.lastIndexOf("#"); const keysPart = keysStr.slice(0, lastHashIdx); const countPart = keysStr.slice(lastHashIdx + 1).trim(); const count = parseInt(countPart, 10); if (isNaN(count)) { throw new Error(`Invalid row count: ${countPart}`); } const keys = splitByDelimiter(keysPart, ","); return { keys, count }; } else { const keys = splitByDelimiter(keysStr, ","); return { keys, count: null }; } } // src/serializer.ts function dumps(data) { return serializeValue(data); } async function dump(data, filePath) { const fs = await import('fs/promises'); await fs.writeFile(filePath, dumps(data), "utf-8"); } function serializeValue(value) { if (value === null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") { return formatPrimitive(value); } if (Array.isArray(value)) { if (isUniformObjectArray(value)) { return serializeTabular(value); } else { return serializeArray(value); } } if (typeof value === "object" && value !== null) { return serializeObject(value); } throw new TypeError(`Cannot serialize type: ${typeof value}`); } function serializeObject(obj) { const keys = Object.keys(obj); if (keys.length === 0) { return "{@}"; } const keyParts = []; for (const key of keys) { let keyStr = String(key); if (needsQuoting(keyStr)) { keyStr = `"${escapeString(keyStr)}"`; } keyParts.push(keyStr); } const valueParts = []; for (const key of keys) { valueParts.push(serializeValue(obj[key])); } const keysStr = keyParts.join(","); const valuesStr = valueParts.join(","); return `{@${keysStr}|${valuesStr}}`; } function serializeArray(arr) { if (arr.length === 0) { return "[]"; } const valueParts = []; for (const value of arr) { valueParts.push(serializeValue(value)); } return "[" + valueParts.join(",") + "]"; } function serializeTabular(arr) { if (arr.length === 0) { return "[]"; } if (!isUniformObjectArray(arr)) { throw new Error("Array is not uniform - cannot use tabular format"); } const keys = Object.keys(arr[0]); const count = arr.length; const nestedSchemas = detectNestedSchemas(arr, keys); const keyParts = []; for (const key of keys) { let keyStr = String(key); if (needsQuoting(keyStr)) { keyStr = `"${escapeString(keyStr)}"`; } if (key in nestedSchemas) { const schemaKeys = nestedSchemas[key]; const schemaStr = schemaKeys.join(","); keyStr = `${keyStr}(@${schemaStr})`; } keyParts.push(keyStr); } const keysStr = keyParts.join(","); const rowParts = []; for (const obj of arr) { const valueParts = []; for (const key of keys) { const value = obj[key]; if (key in nestedSchemas) { valueParts.push(serializeSchematizedObject(value, nestedSchemas[key])); } else { valueParts.push(serializeValue(value)); } } rowParts.push(valueParts.join(",")); } const rowsStr = rowParts.join("|"); return `{@${keysStr}#${count}|${rowsStr}}`; } function detectNestedSchemas(arr, keys) { const nestedSchemas = {}; for (const key of keys) { const values = arr.map((obj) => obj[key]); if (!values.every((v) => typeof v === "object" && v !== null && !Array.isArray(v))) { continue; } if (values.length === 0) { continue; } const firstKeys = Object.keys(values[0]); const allSame = values.slice(1).every((v) => { const objKeys = Object.keys(v); return objKeys.length === firstKeys.length && objKeys.every((k, i) => k === firstKeys[i]); }); if (allSame) { nestedSchemas[key] = firstKeys; } } return nestedSchemas; } function serializeSchematizedObject(obj, schema) { if (Object.keys(obj).length === 0) { return "{}"; } const valueParts = []; for (const key of schema) { const value = obj[key]; valueParts.push(serializeValue(value)); } return "{" + valueParts.join(",") + "}"; } // src/deserializer.ts function loads(s) { const trimmed = s.trim(); if (trimmed.length === 0) { return null; } return parseValue(trimmed); } async function load(filePath) { const fs = await import('fs/promises'); const content = await fs.readFile(filePath, "utf-8"); return loads(content); } function parseValue(text) { const trimmed = text.trim(); if (trimmed.length === 0) { return ""; } const firstChar = trimmed[0]; if (firstChar === "{") { return parseObject(trimmed); } else if (firstChar === "[") { return parseArray(trimmed); } else { return parsePrimitive(trimmed); } } function parseObject(text) { const trimmed = text.trim(); if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) { throw new Error(`Invalid object format: ${text}`); } const content = trimmed.slice(1, -1).trim(); if (content === "@" || content === "") { return {}; } if (content.startsWith("@")) { return parseKeyedObject(content.slice(1)); } else { const values = splitByDelimiter(content, ","); const parsedValues = values.map((v) => parseValue(v)); return parsedValues; } } function parseKeyedObject(content) { const parts = splitByDelimiter(content, "|"); if (parts.length < 1) { throw new Error("Invalid object format: missing keys"); } const keysPart = parts[0]; const { keys, count } = parseKeys(keysPart); const schemaMap = buildSchemaMap(keys); const fieldNames = keys.map((k) => k.split("(")[0]); if (parts.length === 1) { throw new Error("Invalid object format: missing values"); } const valueParts = parts.slice(1); const isTabular = count !== null || valueParts.length > 1; if (isTabular) { return parseTabularArray(fieldNames, valueParts, schemaMap, count); } else { return parseSingleObject(fieldNames, valueParts[0], schemaMap); } } function parseSingleObject(fieldNames, valuesStr, schemaMap) { const values = splitByDelimiter(valuesStr, ","); if (values.length !== fieldNames.length) { throw new Error( `Field count mismatch: ${fieldNames.length} fields but ${values.length} values` ); } const obj = {}; for (let i = 0; i < fieldNames.length; i++) { const fieldName = fieldNames[i]; const valueStr = values[i]; const schema = schemaMap[fieldName]; if (schema) { obj[fieldName] = parseSchematizedValue(valueStr, schema); } else { obj[fieldName] = parseValue(valueStr); } } return obj; } function parseTabularArray(fieldNames, rowParts, schemaMap, expectedCount) { const result = []; for (const rowStr of rowParts) { if (rowStr.trim().length === 0) { continue; } const obj = parseSingleObject(fieldNames, rowStr, schemaMap); result.push(obj); } if (expectedCount !== null && result.length !== expectedCount) { throw new Error( `Row count mismatch: expected ${expectedCount} rows but got ${result.length}` ); } return result; } function parseSchematizedValue(valueStr, schema) { const trimmed = valueStr.trim(); if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) { throw new Error(`Schematized value must be wrapped in braces: ${valueStr}`); } const content = trimmed.slice(1, -1).trim(); if (content.length === 0) { return {}; } const values = splitByDelimiter(content, ","); if (values.length !== schema.length) { throw new Error( `Schema mismatch: ${schema.length} keys but ${values.length} values` ); } const nestedSchemaMap = buildSchemaMap(schema); const fieldNames = schema.map((k) => k.split("(")[0]); const obj = {}; for (let i = 0; i < fieldNames.length; i++) { const fieldName = fieldNames[i]; const valueStr2 = values[i]; const nestedSchema = nestedSchemaMap[fieldName]; if (nestedSchema) { obj[fieldName] = parseSchematizedValue(valueStr2, nestedSchema); } else { obj[fieldName] = parseValue(valueStr2); } } return obj; } function parseArray(text) { const trimmed = text.trim(); if (!trimmed.startsWith("[") || !trimmed.endsWith("]")) { throw new Error(`Invalid array format: ${text}`); } const content = trimmed.slice(1, -1).trim(); if (content.length === 0) { return []; } const values = splitByDelimiter(content, ","); const result = []; for (const valueStr of values) { if (valueStr.trim().length > 0) { result.push(parseValue(valueStr)); } } return result; } export { buildSchemaMap, dump, dumps, escapeString, formatPrimitive, isUniformObjectArray, load, loads, looksLikeNumber, needsQuoting, parseKeySchema, parseKeys, parsePrimitive, splitByDelimiter, unescapeString }; //# sourceMappingURL=index.js.map //# sourceMappingURL=index.js.map