UNPKG

tokenize-json

Version:

Streaming, environment agnostic JSON tokenizer.

320 lines 9.83 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.isWhitespace = exports.isLowerAlpha = exports.readChars = exports.tokenize = exports.defaultJsonTokenizerOptions = void 0; const error_1 = require("./error"); const token_1 = require("./token"); exports.defaultJsonTokenizerOptions = { bufferSize: 1024, }; async function* tokenize(chunks, options = {}) { const { bufferSize, } = { ...exports.defaultJsonTokenizerOptions, ...options, }; const chars = readChars(chunks); const char = chars[Symbol.asyncIterator](); let current = await char.next(); yield* emitRoot(); async function* emitRoot() { yield* emitWhitespace(); while (!current.done) { yield* emitValue(); yield* emitWhitespace(); } } async function* emitValue() { assertDone(current); switch (current.value) { case "{": yield* emitObject(); break; case "[": yield* emitArray(); break; case "\"": yield* emitString(); break; case "-": yield* emitNumber(); break; default: if (isNumeric(current.value)) { yield* emitNumber(); break; } if (isLowerAlpha(current.value)) { yield* emitKeyword(); break; } throwUnexpected(current.value); } } async function* emitObject() { assertDone(current); assertExpected(current.value, "{"); yield { type: token_1.TokenType.ObjectOpen, value: current.value, }; current = await char.next(); assertDone(current); let expectComma = false; yield* emitWhitespace(); while (current.value !== "}") { if (expectComma) { yield* emitComma(); yield* emitWhitespace(); } else { yield* emitString(); yield* emitWhitespace(); yield* emitColon(); yield* emitWhitespace(); yield* emitValue(); yield* emitWhitespace(); } expectComma = !expectComma; } yield { type: token_1.TokenType.ObjectClose, value: current.value, }; current = await char.next(); } async function* emitArray() { assertDone(current); assertExpected(current.value, "["); yield { type: token_1.TokenType.ArrayOpen, value: current.value, }; current = await char.next(); assertDone(current); let expectComma = false; yield* emitWhitespace(); while (current.value !== "]") { if (expectComma) { yield* emitComma(); yield* emitWhitespace(); } else { yield* emitValue(); yield* emitWhitespace(); } expectComma = !expectComma; } yield { type: token_1.TokenType.ArrayClose, value: current.value, }; current = await char.next(); } async function* emitString() { assertDone(current); assertExpected(current.value, "\""); yield { type: token_1.TokenType.StringOpen, value: current.value, }; current = await char.next(); assertDone(current); let buffer = ""; while (current.value !== "\"") { if (current.value === "\\") { buffer += current.value; current = await char.next(); assertDone(current); } buffer += current.value; current = await char.next(); assertDone(current); if (buffer.length >= bufferSize) { yield { type: token_1.TokenType.StringChunk, value: buffer, }; buffer = ""; } } if (buffer.length > 0) { yield { type: token_1.TokenType.StringChunk, value: buffer, }; } yield { type: token_1.TokenType.StringClose, value: current.value, }; current = await char.next(); } // eslint-disable-next-line complexity async function* emitNumber() { assertDone(current); if (!(current.value === "-" || isNumeric(current.value))) { throwUnexpected(current.value); } let buffer = ""; // minus if (current.value === "-") { buffer += current.value; current = await char.next(); assertDone(current); } // integer if (current.value === "0") { buffer += current.value; current = await char.next(); } else if (isNumeric(current.value)) { buffer += current.value; current = await char.next(); while (!current.done && isNumeric(current.value)) { buffer += current.value; current = await char.next(); } } else { throwUnexpected(current.value); } // fraction if (!current.done && current.value === ".") { buffer += current.value; current = await char.next(); assertDone(current); if (isNumeric(current.value)) { buffer += current.value; current = await char.next(); while (!current.done && isNumeric(current.value)) { buffer += current.value; current = await char.next(); } } else { throwUnexpected(current.value); } } // exponent if (!current.done && current.value === "e" || current.value === "E") { buffer += current.value; current = await char.next(); assertDone(current); if (current.value === "-" || current.value === "+") { buffer += current.value; current = await char.next(); assertDone(current); } if (isNumeric(current.value)) { buffer += current.value; current = await char.next(); while (!current.done && isNumeric(current.value)) { buffer += current.value; current = await char.next(); } } else { throwUnexpected(current.value); } } yield { type: token_1.TokenType.Number, value: buffer, }; } async function* emitKeyword() { let buffer = ""; while (!current.done && isLowerAlpha(current.value)) { buffer += current.value; current = await char.next(); } switch (buffer) { case "true": yield { type: token_1.TokenType.True, value: buffer, }; break; case "false": yield { type: token_1.TokenType.False, value: buffer, }; break; case "null": yield { type: token_1.TokenType.Null, value: buffer, }; break; default: throwUnexpected(buffer); } } async function* emitWhitespace() { let buffer = ""; while (!current.done && isWhitespace(current.value)) { buffer += current.value; current = await char.next(); } if (buffer.length > 0) yield { type: token_1.TokenType.Whitespace, value: buffer, }; } async function* emitComma() { assertDone(current); assertExpected(current.value, ","); yield { type: token_1.TokenType.Comma, value: current.value, }; current = await char.next(); } async function* emitColon() { assertDone(current); assertExpected(current.value, ":"); yield { type: token_1.TokenType.Colon, value: current.value, }; current = await char.next(); } } exports.tokenize = tokenize; async function* readChars(chunks) { for await (const chunk of chunks) { yield* chunk; } } exports.readChars = readChars; function isLowerAlpha(char) { return char >= "a" && char <= "z"; } exports.isLowerAlpha = isLowerAlpha; function isWhitespace(char) { return (char === "\u0020" || // space char === "\u000A" || // line feed char === "\u000D" || // carriage return char === "\u0009" // horizontal tab ); } exports.isWhitespace = isWhitespace; function isNumeric(char) { return char >= "0" && char <= "9"; } function assertDone(result) { assert(result.done ?? false, "Unexpected end of input"); } function assertExpected(actual, expected) { if (actual !== expected) { throwUnexpected(actual); } } function throwUnexpected(value) { throw new error_1.JsonTokenizerError(`Unexpected ${value}`); } function assert(condition, message) { if (condition) throw new error_1.JsonTokenizerError(message); } //# sourceMappingURL=tokenize.js.map