tokenize-json
Version:
Streaming, environment agnostic JSON tokenizer.
320 lines • 9.83 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.isWhitespace = exports.isLowerAlpha = exports.readChars = exports.tokenize = exports.defaultJsonTokenizerOptions = void 0;
const error_1 = require("./error");
const token_1 = require("./token");
exports.defaultJsonTokenizerOptions = {
bufferSize: 1024,
};
async function* tokenize(chunks, options = {}) {
const { bufferSize, } = {
...exports.defaultJsonTokenizerOptions,
...options,
};
const chars = readChars(chunks);
const char = chars[Symbol.asyncIterator]();
let current = await char.next();
yield* emitRoot();
async function* emitRoot() {
yield* emitWhitespace();
while (!current.done) {
yield* emitValue();
yield* emitWhitespace();
}
}
async function* emitValue() {
assertDone(current);
switch (current.value) {
case "{":
yield* emitObject();
break;
case "[":
yield* emitArray();
break;
case "\"":
yield* emitString();
break;
case "-":
yield* emitNumber();
break;
default:
if (isNumeric(current.value)) {
yield* emitNumber();
break;
}
if (isLowerAlpha(current.value)) {
yield* emitKeyword();
break;
}
throwUnexpected(current.value);
}
}
async function* emitObject() {
assertDone(current);
assertExpected(current.value, "{");
yield {
type: token_1.TokenType.ObjectOpen,
value: current.value,
};
current = await char.next();
assertDone(current);
let expectComma = false;
yield* emitWhitespace();
while (current.value !== "}") {
if (expectComma) {
yield* emitComma();
yield* emitWhitespace();
}
else {
yield* emitString();
yield* emitWhitespace();
yield* emitColon();
yield* emitWhitespace();
yield* emitValue();
yield* emitWhitespace();
}
expectComma = !expectComma;
}
yield {
type: token_1.TokenType.ObjectClose,
value: current.value,
};
current = await char.next();
}
async function* emitArray() {
assertDone(current);
assertExpected(current.value, "[");
yield {
type: token_1.TokenType.ArrayOpen,
value: current.value,
};
current = await char.next();
assertDone(current);
let expectComma = false;
yield* emitWhitespace();
while (current.value !== "]") {
if (expectComma) {
yield* emitComma();
yield* emitWhitespace();
}
else {
yield* emitValue();
yield* emitWhitespace();
}
expectComma = !expectComma;
}
yield {
type: token_1.TokenType.ArrayClose,
value: current.value,
};
current = await char.next();
}
async function* emitString() {
assertDone(current);
assertExpected(current.value, "\"");
yield {
type: token_1.TokenType.StringOpen,
value: current.value,
};
current = await char.next();
assertDone(current);
let buffer = "";
while (current.value !== "\"") {
if (current.value === "\\") {
buffer += current.value;
current = await char.next();
assertDone(current);
}
buffer += current.value;
current = await char.next();
assertDone(current);
if (buffer.length >= bufferSize) {
yield {
type: token_1.TokenType.StringChunk,
value: buffer,
};
buffer = "";
}
}
if (buffer.length > 0) {
yield {
type: token_1.TokenType.StringChunk,
value: buffer,
};
}
yield {
type: token_1.TokenType.StringClose,
value: current.value,
};
current = await char.next();
}
// eslint-disable-next-line complexity
async function* emitNumber() {
assertDone(current);
if (!(current.value === "-" || isNumeric(current.value))) {
throwUnexpected(current.value);
}
let buffer = "";
// minus
if (current.value === "-") {
buffer += current.value;
current = await char.next();
assertDone(current);
}
// integer
if (current.value === "0") {
buffer += current.value;
current = await char.next();
}
else if (isNumeric(current.value)) {
buffer += current.value;
current = await char.next();
while (!current.done && isNumeric(current.value)) {
buffer += current.value;
current = await char.next();
}
}
else {
throwUnexpected(current.value);
}
// fraction
if (!current.done && current.value === ".") {
buffer += current.value;
current = await char.next();
assertDone(current);
if (isNumeric(current.value)) {
buffer += current.value;
current = await char.next();
while (!current.done && isNumeric(current.value)) {
buffer += current.value;
current = await char.next();
}
}
else {
throwUnexpected(current.value);
}
}
// exponent
if (!current.done && current.value === "e" || current.value === "E") {
buffer += current.value;
current = await char.next();
assertDone(current);
if (current.value === "-" || current.value === "+") {
buffer += current.value;
current = await char.next();
assertDone(current);
}
if (isNumeric(current.value)) {
buffer += current.value;
current = await char.next();
while (!current.done && isNumeric(current.value)) {
buffer += current.value;
current = await char.next();
}
}
else {
throwUnexpected(current.value);
}
}
yield {
type: token_1.TokenType.Number,
value: buffer,
};
}
async function* emitKeyword() {
let buffer = "";
while (!current.done && isLowerAlpha(current.value)) {
buffer += current.value;
current = await char.next();
}
switch (buffer) {
case "true":
yield {
type: token_1.TokenType.True,
value: buffer,
};
break;
case "false":
yield {
type: token_1.TokenType.False,
value: buffer,
};
break;
case "null":
yield {
type: token_1.TokenType.Null,
value: buffer,
};
break;
default: throwUnexpected(buffer);
}
}
async function* emitWhitespace() {
let buffer = "";
while (!current.done && isWhitespace(current.value)) {
buffer += current.value;
current = await char.next();
}
if (buffer.length > 0)
yield {
type: token_1.TokenType.Whitespace,
value: buffer,
};
}
async function* emitComma() {
assertDone(current);
assertExpected(current.value, ",");
yield {
type: token_1.TokenType.Comma,
value: current.value,
};
current = await char.next();
}
async function* emitColon() {
assertDone(current);
assertExpected(current.value, ":");
yield {
type: token_1.TokenType.Colon,
value: current.value,
};
current = await char.next();
}
}
exports.tokenize = tokenize;
async function* readChars(chunks) {
for await (const chunk of chunks) {
yield* chunk;
}
}
exports.readChars = readChars;
function isLowerAlpha(char) {
return char >= "a" && char <= "z";
}
exports.isLowerAlpha = isLowerAlpha;
function isWhitespace(char) {
return (char === "\u0020" || // space
char === "\u000A" || // line feed
char === "\u000D" || // carriage return
char === "\u0009" // horizontal tab
);
}
exports.isWhitespace = isWhitespace;
function isNumeric(char) {
return char >= "0" && char <= "9";
}
function assertDone(result) {
assert(result.done ?? false, "Unexpected end of input");
}
function assertExpected(actual, expected) {
if (actual !== expected) {
throwUnexpected(actual);
}
}
function throwUnexpected(value) {
throw new error_1.JsonTokenizerError(`Unexpected ${value}`);
}
function assert(condition, message) {
if (condition)
throw new error_1.JsonTokenizerError(message);
}
//# sourceMappingURL=tokenize.js.map