@zenoaihq/tson
Version:
Token-efficient Structured Object Notation – a compact serialization format designed for efficient data exchange with LLMs
482 lines (479 loc) • 13.7 kB
JavaScript
// src/utils.ts
var SPECIAL_CHARS = /* @__PURE__ */ new Set([",", "|", "@", "#", "{", "}", "[", "]", "\n", "\r", " ", " "]);
function needsQuoting(value) {
if (value.length === 0) {
return true;
}
if (value === "true" || value === "false" || value === "null") {
return true;
}
if (value[0].trim() === "" || value[value.length - 1].trim() === "") {
return true;
}
if (looksLikeNumber(value)) {
return true;
}
for (const char of value) {
if (SPECIAL_CHARS.has(char)) {
return true;
}
}
return false;
}
function looksLikeNumber(value) {
if (value.length === 0) {
return false;
}
const num = Number(value);
return !isNaN(num) && value.trim() === value && isFinite(num);
}
function escapeString(value) {
return value.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r").replace(/\t/g, "\\t");
}
function unescapeString(value) {
return value.replace(/\\t/g, " ").replace(/\\r/g, "\r").replace(/\\n/g, "\n").replace(/\\"/g, '"').replace(/\\\\/g, "\\");
}
function formatPrimitive(value) {
if (value === null) {
return "null";
}
if (typeof value === "boolean") {
return value ? "true" : "false";
}
if (typeof value === "number") {
return String(value);
}
if (typeof value === "string") {
if (needsQuoting(value)) {
return `"${escapeString(value)}"`;
}
return value;
}
throw new Error(`Cannot format non-primitive type: ${typeof value}`);
}
function parsePrimitive(value) {
const trimmed = value.trim();
if (trimmed.length === 0) {
return "";
}
if (trimmed === "true") {
return true;
}
if (trimmed === "false") {
return false;
}
if (trimmed === "null") {
return null;
}
if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
return unescapeString(trimmed.slice(1, -1));
}
if (looksLikeNumber(trimmed)) {
const num = Number(trimmed);
if (!isNaN(num)) {
return num;
}
}
return trimmed;
}
function isUniformObjectArray(data) {
if (!Array.isArray(data) || data.length === 0) {
return false;
}
if (!data.every((item) => isPlainObject(item))) {
return false;
}
const firstKeys = Object.keys(data[0]);
for (let i = 1; i < data.length; i++) {
const keys = Object.keys(data[i]);
if (keys.length !== firstKeys.length) {
return false;
}
for (let j = 0; j < keys.length; j++) {
if (keys[j] !== firstKeys[j]) {
return false;
}
}
}
return true;
}
function isPlainObject(value) {
return typeof value === "object" && value !== null && !Array.isArray(value) && Object.prototype.toString.call(value) === "[object Object]";
}
function splitByDelimiter(text, delimiter) {
const result = [];
const current = [];
let inQuotes = false;
let escapeNext = false;
let depthCurly = 0;
let depthSquare = 0;
let depthParen = 0;
for (const char of text) {
if (escapeNext) {
current.push(char);
escapeNext = false;
continue;
}
if (char === "\\") {
current.push(char);
escapeNext = true;
continue;
}
if (char === '"') {
inQuotes = !inQuotes;
current.push(char);
continue;
}
if (inQuotes) {
current.push(char);
continue;
}
if (char === "{") {
depthCurly++;
current.push(char);
} else if (char === "}") {
depthCurly--;
current.push(char);
} else if (char === "[") {
depthSquare++;
current.push(char);
} else if (char === "]") {
depthSquare--;
current.push(char);
} else if (char === "(") {
depthParen++;
current.push(char);
} else if (char === ")") {
depthParen--;
current.push(char);
} else if (char === delimiter && depthCurly === 0 && depthSquare === 0 && depthParen === 0) {
result.push(current.join("").trim());
current.length = 0;
} else {
current.push(char);
}
}
if (current.length > 0) {
result.push(current.join("").trim());
}
return result;
}
function parseKeySchema(keyString) {
const trimmed = keyString.trim();
if (!trimmed.includes("(")) {
return { keyName: trimmed, schema: null };
}
const parenIdx = trimmed.indexOf("(");
const keyName = trimmed.slice(0, parenIdx).trim();
if (!trimmed.endsWith(")")) {
throw new Error(`Invalid key schema syntax: ${keyString}`);
}
let schemaStr = trimmed.slice(parenIdx + 1, -1).trim();
if (schemaStr.startsWith("@")) {
schemaStr = schemaStr.slice(1);
}
const schemaKeys = splitByDelimiter(schemaStr, ",");
return { keyName, schema: schemaKeys };
}
function buildSchemaMap(keys) {
const schemaMap = {};
for (const key of keys) {
const { keyName, schema } = parseKeySchema(key);
schemaMap[keyName] = schema;
}
return schemaMap;
}
function parseKeys(keysStr) {
if (keysStr.includes("#")) {
const lastHashIdx = keysStr.lastIndexOf("#");
const keysPart = keysStr.slice(0, lastHashIdx);
const countPart = keysStr.slice(lastHashIdx + 1).trim();
const count = parseInt(countPart, 10);
if (isNaN(count)) {
throw new Error(`Invalid row count: ${countPart}`);
}
const keys = splitByDelimiter(keysPart, ",");
return { keys, count };
} else {
const keys = splitByDelimiter(keysStr, ",");
return { keys, count: null };
}
}
// src/serializer.ts
function dumps(data) {
return serializeValue(data);
}
async function dump(data, filePath) {
const fs = await import('fs/promises');
await fs.writeFile(filePath, dumps(data), "utf-8");
}
function serializeValue(value) {
if (value === null || typeof value === "boolean" || typeof value === "number" || typeof value === "string") {
return formatPrimitive(value);
}
if (Array.isArray(value)) {
if (isUniformObjectArray(value)) {
return serializeTabular(value);
} else {
return serializeArray(value);
}
}
if (typeof value === "object" && value !== null) {
return serializeObject(value);
}
throw new TypeError(`Cannot serialize type: ${typeof value}`);
}
function serializeObject(obj) {
const keys = Object.keys(obj);
if (keys.length === 0) {
return "{@}";
}
const keyParts = [];
for (const key of keys) {
let keyStr = String(key);
if (needsQuoting(keyStr)) {
keyStr = `"${escapeString(keyStr)}"`;
}
keyParts.push(keyStr);
}
const valueParts = [];
for (const key of keys) {
valueParts.push(serializeValue(obj[key]));
}
const keysStr = keyParts.join(",");
const valuesStr = valueParts.join(",");
return `{@${keysStr}|${valuesStr}}`;
}
function serializeArray(arr) {
if (arr.length === 0) {
return "[]";
}
const valueParts = [];
for (const value of arr) {
valueParts.push(serializeValue(value));
}
return "[" + valueParts.join(",") + "]";
}
function serializeTabular(arr) {
if (arr.length === 0) {
return "[]";
}
if (!isUniformObjectArray(arr)) {
throw new Error("Array is not uniform - cannot use tabular format");
}
const keys = Object.keys(arr[0]);
const count = arr.length;
const nestedSchemas = detectNestedSchemas(arr, keys);
const keyParts = [];
for (const key of keys) {
let keyStr = String(key);
if (needsQuoting(keyStr)) {
keyStr = `"${escapeString(keyStr)}"`;
}
if (key in nestedSchemas) {
const schemaKeys = nestedSchemas[key];
const schemaStr = schemaKeys.join(",");
keyStr = `${keyStr}(@${schemaStr})`;
}
keyParts.push(keyStr);
}
const keysStr = keyParts.join(",");
const rowParts = [];
for (const obj of arr) {
const valueParts = [];
for (const key of keys) {
const value = obj[key];
if (key in nestedSchemas) {
valueParts.push(serializeSchematizedObject(value, nestedSchemas[key]));
} else {
valueParts.push(serializeValue(value));
}
}
rowParts.push(valueParts.join(","));
}
const rowsStr = rowParts.join("|");
return `{@${keysStr}#${count}|${rowsStr}}`;
}
function detectNestedSchemas(arr, keys) {
const nestedSchemas = {};
for (const key of keys) {
const values = arr.map((obj) => obj[key]);
if (!values.every((v) => typeof v === "object" && v !== null && !Array.isArray(v))) {
continue;
}
if (values.length === 0) {
continue;
}
const firstKeys = Object.keys(values[0]);
const allSame = values.slice(1).every((v) => {
const objKeys = Object.keys(v);
return objKeys.length === firstKeys.length && objKeys.every((k, i) => k === firstKeys[i]);
});
if (allSame) {
nestedSchemas[key] = firstKeys;
}
}
return nestedSchemas;
}
function serializeSchematizedObject(obj, schema) {
if (Object.keys(obj).length === 0) {
return "{}";
}
const valueParts = [];
for (const key of schema) {
const value = obj[key];
valueParts.push(serializeValue(value));
}
return "{" + valueParts.join(",") + "}";
}
// src/deserializer.ts
function loads(s) {
const trimmed = s.trim();
if (trimmed.length === 0) {
return null;
}
return parseValue(trimmed);
}
async function load(filePath) {
const fs = await import('fs/promises');
const content = await fs.readFile(filePath, "utf-8");
return loads(content);
}
function parseValue(text) {
const trimmed = text.trim();
if (trimmed.length === 0) {
return "";
}
const firstChar = trimmed[0];
if (firstChar === "{") {
return parseObject(trimmed);
} else if (firstChar === "[") {
return parseArray(trimmed);
} else {
return parsePrimitive(trimmed);
}
}
function parseObject(text) {
const trimmed = text.trim();
if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) {
throw new Error(`Invalid object format: ${text}`);
}
const content = trimmed.slice(1, -1).trim();
if (content === "@" || content === "") {
return {};
}
if (content.startsWith("@")) {
return parseKeyedObject(content.slice(1));
} else {
const values = splitByDelimiter(content, ",");
const parsedValues = values.map((v) => parseValue(v));
return parsedValues;
}
}
function parseKeyedObject(content) {
const parts = splitByDelimiter(content, "|");
if (parts.length < 1) {
throw new Error("Invalid object format: missing keys");
}
const keysPart = parts[0];
const { keys, count } = parseKeys(keysPart);
const schemaMap = buildSchemaMap(keys);
const fieldNames = keys.map((k) => k.split("(")[0]);
if (parts.length === 1) {
throw new Error("Invalid object format: missing values");
}
const valueParts = parts.slice(1);
const isTabular = count !== null || valueParts.length > 1;
if (isTabular) {
return parseTabularArray(fieldNames, valueParts, schemaMap, count);
} else {
return parseSingleObject(fieldNames, valueParts[0], schemaMap);
}
}
function parseSingleObject(fieldNames, valuesStr, schemaMap) {
const values = splitByDelimiter(valuesStr, ",");
if (values.length !== fieldNames.length) {
throw new Error(
`Field count mismatch: ${fieldNames.length} fields but ${values.length} values`
);
}
const obj = {};
for (let i = 0; i < fieldNames.length; i++) {
const fieldName = fieldNames[i];
const valueStr = values[i];
const schema = schemaMap[fieldName];
if (schema) {
obj[fieldName] = parseSchematizedValue(valueStr, schema);
} else {
obj[fieldName] = parseValue(valueStr);
}
}
return obj;
}
function parseTabularArray(fieldNames, rowParts, schemaMap, expectedCount) {
const result = [];
for (const rowStr of rowParts) {
if (rowStr.trim().length === 0) {
continue;
}
const obj = parseSingleObject(fieldNames, rowStr, schemaMap);
result.push(obj);
}
if (expectedCount !== null && result.length !== expectedCount) {
throw new Error(
`Row count mismatch: expected ${expectedCount} rows but got ${result.length}`
);
}
return result;
}
function parseSchematizedValue(valueStr, schema) {
const trimmed = valueStr.trim();
if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) {
throw new Error(`Schematized value must be wrapped in braces: ${valueStr}`);
}
const content = trimmed.slice(1, -1).trim();
if (content.length === 0) {
return {};
}
const values = splitByDelimiter(content, ",");
if (values.length !== schema.length) {
throw new Error(
`Schema mismatch: ${schema.length} keys but ${values.length} values`
);
}
const nestedSchemaMap = buildSchemaMap(schema);
const fieldNames = schema.map((k) => k.split("(")[0]);
const obj = {};
for (let i = 0; i < fieldNames.length; i++) {
const fieldName = fieldNames[i];
const valueStr2 = values[i];
const nestedSchema = nestedSchemaMap[fieldName];
if (nestedSchema) {
obj[fieldName] = parseSchematizedValue(valueStr2, nestedSchema);
} else {
obj[fieldName] = parseValue(valueStr2);
}
}
return obj;
}
function parseArray(text) {
const trimmed = text.trim();
if (!trimmed.startsWith("[") || !trimmed.endsWith("]")) {
throw new Error(`Invalid array format: ${text}`);
}
const content = trimmed.slice(1, -1).trim();
if (content.length === 0) {
return [];
}
const values = splitByDelimiter(content, ",");
const result = [];
for (const valueStr of values) {
if (valueStr.trim().length > 0) {
result.push(parseValue(valueStr));
}
}
return result;
}
export { buildSchemaMap, dump, dumps, escapeString, formatPrimitive, isUniformObjectArray, load, loads, looksLikeNumber, needsQuoting, parseKeySchema, parseKeys, parsePrimitive, splitByDelimiter, unescapeString };
//# sourceMappingURL=index.js.map
//# sourceMappingURL=index.js.map