@tabular-json/tabular-json
Version:
Tabular-JSON: a superset of JSON with CSV-like tables
502 lines • 17 kB
JavaScript
import { setIn } from './objects.js';
/**
* Parse a string containing Tabular-JSON.
*
* The parser is based on the parser from [lossless-json](https://github.com/josdejong/lossless-json)
*
* @param text
* The string to parse as JSON. See the JSON object for a description of JSON syntax.
*
* @returns Returns the Object corresponding to the given Tabular-JSON text.
*
* @throws Throws a SyntaxError exception if the string to parse is not valid Tabular-JSON.
*/
export function parse(text) {
let i = 0;
const value = parseRootTable();
if (value === undefined) {
throwValueExpected();
}
expectEndOfInput();
return value;
function parseObject() {
if (text.charCodeAt(i) === codeOpeningBrace) {
i++;
skipWhitespace();
const object = {};
let initial = true;
while (i < text.length && text.charCodeAt(i) !== codeClosingBrace) {
if (!initial) {
eatComma();
skipWhitespace();
if (text.charCodeAt(i) === codeClosingBrace) {
// trailing comma
break;
}
}
else {
initial = false;
}
const start = i;
const key = parseStringOr(throwObjectKeyExpected);
skipWhitespace();
eatColon();
const value = parseValue();
if (value === undefined) {
throwObjectValueExpected();
return; // To make TS happy
}
if (Object.prototype.hasOwnProperty.call(object, key) && !isDeepEqual(value, object[key])) {
// Note that we could also test `if(key in object) {...}`
// or `if (object[key] !== 'undefined') {...}`, but that is slower.
throwDuplicateKey(key, start + 1);
}
object[key] = value;
}
if (text.charCodeAt(i) !== codeClosingBrace) {
throwObjectKeyOrEndExpected();
}
i++;
return object;
}
}
function parseArray() {
if (text.charCodeAt(i) === codeOpeningBracket) {
i++;
skipWhitespace();
const array = [];
let initial = true;
while (i < text.length && text.charCodeAt(i) !== codeClosingBracket) {
if (!initial) {
eatComma();
skipWhitespace();
if (text.charCodeAt(i) === codeClosingBracket) {
// trailing comma
break;
}
}
else {
initial = false;
}
const value = parseValueOr(throwArrayItemExpected);
array.push(value);
}
if (text.charCodeAt(i) !== codeClosingBracket) {
throwArrayItemOrEndExpected();
}
i++;
return array;
}
}
function parseRootTable() {
const value = parseValue();
if (typeof value === 'string' && text.charCodeAt(i) === codeComma) {
i = 0;
skipWhitespace();
const fields = parseTableFields();
eatTableRowSeparator();
const rows = [];
while (i < text.length) {
rows.push(parseTableRow(fields));
if (i < text.length) {
eatTableRowSeparator();
}
}
return rows;
}
return value;
}
function parseTable() {
if (text.charCodeAt(i) === codeMinus && text.substring(i, i + 3) === '---') {
i += 3;
skipTableWhitespace();
eatTableRowSeparator();
const fields = parseTableFields();
eatTableRowSeparator();
const rows = [];
while (i < text.length && text.substring(i, i + 3) !== '---') {
rows.push(parseTableRow(fields));
eatTableRowSeparator();
}
if (text.substring(i, i + 3) !== '---') {
throwTableRowOrEndExpected();
}
i += 3;
return rows;
}
}
function parseTableFields() {
const fields = [];
let initialField = true;
while (i < text.length && text.charCodeAt(i) !== codeNewline) {
if (!initialField) {
eatComma();
skipTableWhitespace();
}
else {
initialField = false;
}
const keys = [parseStringOr(throwTableFieldExpected)];
skipTableWhitespace();
while (i < text.length && text.charCodeAt(i) === codeDot) {
i++;
skipTableWhitespace();
keys.push(parseStringOr(throwTableFieldExpected));
skipTableWhitespace();
}
const first = keys[0];
const setValue = keys.length === 1
? (record, value) => (record[first] = value)
: (record, value) => setIn(record, keys, value);
fields.push({ keys, setValue });
}
return fields;
}
function parseTableRow(fields) {
const row = {};
fields.forEach(({ setValue }, index) => {
const value = parseElement();
skipTableWhitespace();
if (value !== undefined) {
setValue(row, value);
}
if (index < fields.length - 1) {
eatComma();
skipTableWhitespace();
}
});
return row;
}
function parseValue() {
skipWhitespace();
const value = parseElement();
skipWhitespace();
return value;
}
function parseValueOr(throwError) {
const value = parseValue();
if (value === undefined) {
throwError();
}
return value;
}
function parseElement() {
return (parseObject() ??
parseArray() ??
parseTable() ??
parseString() ??
parseNumber() ??
parseKeyword('true', true) ??
parseKeyword('false', false) ??
parseKeyword('null', null));
}
function parseKeyword(name, value) {
if (text.slice(i, i + name.length) === name) {
i += name.length;
return value;
}
}
function skipWhitespace() {
while (skipWhitespaceChars() || skipLineComment() || skipBlockComment()) {
// repeat until no more whitespace or
}
}
function skipTableWhitespace() {
while (skipTableWhitespaceChars() || skipLineComment() || skipBlockComment()) {
// repeat until no more whitespace or
}
}
function skipWhitespaceChars() {
if (isWhitespace(text.charCodeAt(i))) {
i++;
while (isWhitespace(text.charCodeAt(i))) {
i++;
}
return true;
}
return false;
}
function skipTableWhitespaceChars() {
if (isTableWhitespace(text.charCodeAt(i))) {
i++;
while (isTableWhitespace(text.charCodeAt(i))) {
i++;
}
return true;
}
return false;
}
function skipLineComment() {
// skip a line comment like "// ..."
if (text.charCodeAt(i) === codeSlash && text.charCodeAt(i + 1) === codeSlash) {
i += 2;
while (i < text.length && text.charCodeAt(i) !== codeNewline) {
i++;
}
return true;
}
return false;
}
function skipBlockComment() {
// skip a block comment like "/* ... */"
if (text.charCodeAt(i) === codeSlash && text.charCodeAt(i + 1) === codeAsterisk) {
i += 2;
while ((i < text.length && text.charCodeAt(i) !== codeAsterisk) ||
text.charCodeAt(i + 1) !== codeSlash) {
i++;
}
i += 2;
return true;
}
return false;
}
function parseString() {
if (text.charCodeAt(i) === codeDoubleQuote) {
i++;
let result = '';
while (i < text.length && text.charCodeAt(i) !== codeDoubleQuote) {
if (text.charCodeAt(i) === codeBackslash) {
const char = text[i + 1];
const escapeChar = escapeCharacters[char];
if (escapeChar !== undefined) {
result += escapeChar;
i++;
}
else if (char === 'u') {
if (isHex(text.charCodeAt(i + 2)) &&
isHex(text.charCodeAt(i + 3)) &&
isHex(text.charCodeAt(i + 4)) &&
isHex(text.charCodeAt(i + 5))) {
result += String.fromCharCode(Number.parseInt(text.slice(i + 2, i + 6), 16));
i += 5;
}
else {
throwInvalidUnicodeCharacter(i);
}
}
else {
throwInvalidEscapeCharacter(i);
}
}
else {
if (isValidStringCharacter(text.charCodeAt(i))) {
result += text[i];
}
else {
throwInvalidCharacter(text[i]);
}
}
i++;
}
expectEndOfString();
i++;
return result;
}
}
function parseStringOr(throwError) {
const string = parseString();
if (string === undefined) {
throwError();
}
return string;
}
function parseNumber() {
const start = i;
const special = parseKeyword('inf', Infinity) ?? parseKeyword('-inf', -Infinity) ?? parseKeyword('nan', NaN);
if (special !== undefined) {
return special;
}
if (text.charCodeAt(i) === codeMinus) {
i++;
expectDigit(start);
}
if (text.charCodeAt(i) === codeZero) {
i++;
}
else if (isNonZeroDigit(text.charCodeAt(i))) {
i++;
while (isDigit(text.charCodeAt(i))) {
i++;
}
}
if (text.charCodeAt(i) === codeDot) {
i++;
expectDigit(start);
while (isDigit(text.charCodeAt(i))) {
i++;
}
}
if (text.charCodeAt(i) === codeLowercaseE || text.charCodeAt(i) === codeUppercaseE) {
i++;
if (text.charCodeAt(i) === codeMinus || text.charCodeAt(i) === codePlus) {
i++;
}
expectDigit(start);
while (isDigit(text.charCodeAt(i))) {
i++;
}
}
if (i > start) {
return parseFloat(text.slice(start, i));
}
}
function eatComma() {
if (text.charCodeAt(i) !== codeComma) {
throw new SyntaxError(`Comma ',' expected after value ${gotAt()}`);
}
i++;
}
function eatColon() {
if (text.charCodeAt(i) !== codeColon) {
throw new SyntaxError(`Colon ':' expected after property name ${gotAt()}`);
}
i++;
}
function eatTableRowSeparator() {
// must start with a newline
if (text.charCodeAt(i) !== codeNewline) {
throw new SyntaxError(`Newline '\n' expected after table row ${gotAt()}`);
}
// can optionally be followed by more newlines and whitespace and comments
skipWhitespace();
}
function expectEndOfInput() {
if (i < text.length) {
throw new SyntaxError(`Expected end of input ${gotAt()}`);
}
}
function expectDigit(start) {
if (!isDigit(text.charCodeAt(i))) {
const numSoFar = text.slice(start, i);
throw new SyntaxError(`Invalid number '${numSoFar}', expecting a digit ${gotAt()}`);
}
}
function expectEndOfString() {
if (text.charCodeAt(i) !== codeDoubleQuote) {
throw new SyntaxError(`End of string '"' expected ${gotAt()}`);
}
}
function throwObjectKeyExpected() {
throw new SyntaxError(`Quoted object key expected ${gotAt()}`);
}
function throwTableFieldExpected() {
throw new SyntaxError(`Table field expected ${gotAt()}`);
}
function throwDuplicateKey(key, pos) {
throw new SyntaxError(`Duplicate key '${key}' encountered at position ${pos}`);
}
function throwObjectKeyOrEndExpected() {
throw new SyntaxError(`Quoted object key or end of object '}' expected ${gotAt()}`);
}
function throwArrayItemOrEndExpected() {
throw new SyntaxError(`Array item or end of array ']' expected ${gotAt()}`);
}
function throwTableRowOrEndExpected() {
throw new SyntaxError(`Table row or end of table '---' expected ${gotAt()}`);
}
function throwArrayItemExpected() {
throw new SyntaxError(`Array item expected ${gotAt()}`);
}
function throwValueExpected() {
throw new SyntaxError(`JSON value expected ${gotAt()}`);
}
function throwInvalidCharacter(char) {
throw new SyntaxError(`Invalid character '${char}' ${pos()}`);
}
function throwInvalidEscapeCharacter(start) {
const chars = text.slice(start, start + 2);
throw new SyntaxError(`Invalid escape character '${chars}' ${pos()}`);
}
function throwObjectValueExpected() {
throw new SyntaxError(`Object value expected after ':' ${pos()}`);
}
function throwInvalidUnicodeCharacter(start) {
const chars = text.slice(start, start + 6);
throw new SyntaxError(`Invalid unicode character '${chars}' ${pos()}`);
}
// zero based character position
function pos() {
return `at position ${i}`;
}
function got() {
return i < text.length ? `but got '${text[i]}'` : 'but reached end of input';
}
function gotAt() {
return `${got()} ${pos()}`;
}
}
function isWhitespace(code) {
return code === codeSpace || code === codeNewline || code === codeTab || code === codeReturn;
}
function isTableWhitespace(code) {
return code === codeSpace || code === codeTab || code === codeReturn;
}
function isHex(code) {
return ((code >= codeZero && code <= codeNine) ||
(code >= codeUppercaseA && code <= codeUppercaseF) ||
(code >= codeLowercaseA && code <= codeLowercaseF));
}
function isDigit(code) {
return code >= codeZero && code <= codeNine;
}
function isNonZeroDigit(code) {
return code >= codeOne && code <= codeNine;
}
export function isValidStringCharacter(code) {
return code >= 0x20 && code <= 0x10ffff;
}
export function isDeepEqual(a, b) {
if (a === b) {
return true;
}
if (Array.isArray(a) && Array.isArray(b)) {
return a.length === b.length && a.every((item, index) => isDeepEqual(item, b[index]));
}
if (isObject(a) && isObject(b)) {
const keys = [...new Set([...Object.keys(a), ...Object.keys(b)])];
return keys.every((key) => isDeepEqual(a[key], b[key]));
}
return false;
}
function isObject(value) {
return typeof value === 'object' && value !== null;
}
// map with all escape characters
const escapeCharacters = {
'"': '"',
'\\': '\\',
'/': '/',
b: '\b',
f: '\f',
n: '\n',
r: '\r',
t: '\t'
// note that \u is handled separately in parseString()
};
const codeBackslash = 0x5c; // "\"
const codeSlash = 0x2f; // "/"
const codeAsterisk = 0x2a; // "*"
const codeOpeningBrace = 0x7b; // "{"
const codeClosingBrace = 0x7d; // "}"
const codeOpeningBracket = 0x5b; // "["
const codeClosingBracket = 0x5d; // "]"
const codeSpace = 0x20; // " "
const codeNewline = 0xa; // "\n"
const codeTab = 0x9; // "\t"
const codeReturn = 0xd; // "\r"
const codeDoubleQuote = 0x0022; // "
const codePlus = 0x2b; // "+"
const codeMinus = 0x2d; // "-"
const codeZero = 0x30;
const codeOne = 0x31;
const codeNine = 0x39;
const codeComma = 0x2c; // ","
const codeDot = 0x2e; // "." (dot, period)
const codeColon = 0x3a; // ":"
export const codeUppercaseA = 0x41; // "A"
export const codeLowercaseA = 0x61; // "a"
export const codeUppercaseE = 0x45; // "E"
export const codeLowercaseE = 0x65; // "e"
export const codeUppercaseF = 0x46; // "F"
export const codeLowercaseF = 0x66; // "f"
//# sourceMappingURL=parse.js.map