json-web-streams
Version:
Streaming JSON parser built on top of the Web Streams API, so it works in web browsers, Node.js, and many other environments
268 lines (267 loc) • 9.97 kB
JavaScript
//#region src/JSONParseStreamRaw.ts
const WHITESPACE = new Set([
" ",
" ",
"\n",
"\r"
]);
var JSONParseStreamRaw = class {
tokenizerState = "START";
state = "VALUE";
mode;
stack = [];
string;
key;
value;
position = 0;
onKey;
onPop;
onPush;
onValue;
unicode;
highSurrogate;
seenRootObject = false;
multi;
multiIndex = 0;
constructor({ multi, onKey, onPop, onPush, onValue }) {
this.multi = multi;
this.onKey = onKey;
this.onPop = onPop;
this.onPush = onPush;
this.onValue = onValue;
}
charError(char, i) {
throw new Error(`Unexpected ${JSON.stringify(char)} at position ${this.position + i} in state ${this.tokenizerState}`);
}
parseError(token, value, i) {
throw new Error(`Unexpected ${token}${value ? `(${JSON.stringify(value)})` : ""} at position ${this.position + i} in state ${this.state}`);
}
write(text) {
for (let i = 0, l = text.length; i < l; i++) {
const n = text[i];
if (!this.multi && this.stack.length === 0 && this.seenRootObject && !WHITESPACE.has(n)) return this.charError(n, i);
if (this.tokenizerState === "START") if (n === "{") this.onToken("LEFT_BRACE", "{", i);
else if (n === "}") this.onToken("RIGHT_BRACE", "}", i);
else if (n === "[") this.onToken("LEFT_BRACKET", "[", i);
else if (n === "]") this.onToken("RIGHT_BRACKET", "]", i);
else if (n === ":") this.onToken("COLON", ":", i);
else if (n === ",") this.onToken("COMMA", ",", i);
else if (n === "t") this.tokenizerState = "TRUE1";
else if (n === "f") this.tokenizerState = "FALSE1";
else if (n === "n") this.tokenizerState = "NULL1";
else if (n === "\"") {
this.string = "";
this.tokenizerState = "STRING1";
} else if (n === "-") {
this.string = "-";
this.tokenizerState = "NUMBER-";
} else if (n === "0") {
this.string = n;
this.tokenizerState = "NUMBER0";
} else if (n >= "1" && n <= "9") {
this.string = n;
this.tokenizerState = "NUMBER";
} else if (WHITESPACE.has(n)) {} else if (n === "␞" && this.multi && this.stack.length === 0) {} else return this.charError(n, i);
else if (this.tokenizerState === "STRING1") if (n === "\"") {
this.tokenizerState = "START";
this.onToken("STRING", this.string, i);
this.string = void 0;
} else if (n === "\\") this.tokenizerState = "STRING2";
else {
if (n.charCodeAt(0) <= 31) this.charError(n, i);
this.string += n;
}
else if (this.tokenizerState === "STRING2") if (n === "\"") {
this.string += "\"";
this.tokenizerState = "STRING1";
} else if (n === "\\") {
this.string += "\\";
this.tokenizerState = "STRING1";
} else if (n === "/") {
this.string += "/";
this.tokenizerState = "STRING1";
} else if (n === "b") {
this.string += "\b";
this.tokenizerState = "STRING1";
} else if (n === "f") {
this.string += "\f";
this.tokenizerState = "STRING1";
} else if (n === "n") {
this.string += "\n";
this.tokenizerState = "STRING1";
} else if (n === "r") {
this.string += "\r";
this.tokenizerState = "STRING1";
} else if (n === "t") {
this.string += " ";
this.tokenizerState = "STRING1";
} else if (n === "u") {
this.unicode = "";
this.tokenizerState = "STRING3";
} else return this.charError(n, i);
else if (this.tokenizerState === "STRING3" || this.tokenizerState === "STRING4" || this.tokenizerState === "STRING5" || this.tokenizerState === "STRING6") {
this.unicode += n;
if (this.tokenizerState === "STRING3") this.tokenizerState = "STRING4";
else if (this.tokenizerState === "STRING4") this.tokenizerState = "STRING5";
else if (this.tokenizerState === "STRING5") this.tokenizerState = "STRING6";
else if (this.tokenizerState === "STRING6") {
const intVal = Number.parseInt(this.unicode, 16);
if (Number.isNaN(intVal)) return this.charError(n, i);
this.unicode = void 0;
if (this.highSurrogate !== void 0 && intVal >= 56320 && intVal < 57344) {
this.string += String.fromCharCode(this.highSurrogate, intVal);
this.highSurrogate = void 0;
} else if (this.highSurrogate === void 0 && intVal >= 55296 && intVal < 56320) this.highSurrogate = intVal;
else {
if (this.highSurrogate !== void 0) {
this.string += String.fromCharCode(this.highSurrogate);
this.highSurrogate = void 0;
}
this.string += String.fromCharCode(intVal);
}
this.tokenizerState = "STRING1";
}
} else if (this.tokenizerState === "NUMBER" || this.tokenizerState === "NUMBER-" || this.tokenizerState === "NUMBER0") {
if (this.tokenizerState === "NUMBER0" && n >= "0" && n <= "9") return this.charError("0", i - 1);
switch (n) {
case "0":
this.string += n;
this.tokenizerState = this.tokenizerState === "NUMBER-" ? "NUMBER0" : "NUMBER";
break;
case "1":
case "2":
case "3":
case "4":
case "5":
case "6":
case "7":
case "8":
case "9":
case ".":
case "e":
case "E":
case "+":
case "-":
this.string += n;
this.tokenizerState = "NUMBER";
break;
default:
this.tokenizerState = "START";
this.numberReviver(this.string, i);
this.string = void 0;
i--;
break;
}
} else if (this.tokenizerState === "TRUE1") if (n === "r") this.tokenizerState = "TRUE2";
else return this.charError(n, i);
else if (this.tokenizerState === "TRUE2") if (n === "u") this.tokenizerState = "TRUE3";
else return this.charError(n, i);
else if (this.tokenizerState === "TRUE3") if (n === "e") {
this.tokenizerState = "START";
this.onToken("TRUE", true, i);
} else return this.charError(n, i);
else if (this.tokenizerState === "FALSE1") if (n === "a") this.tokenizerState = "FALSE2";
else return this.charError(n, i);
else if (this.tokenizerState === "FALSE2") if (n === "l") this.tokenizerState = "FALSE3";
else return this.charError(n, i);
else if (this.tokenizerState === "FALSE3") if (n === "s") this.tokenizerState = "FALSE4";
else return this.charError(n, i);
else if (this.tokenizerState === "FALSE4") if (n === "e") {
this.tokenizerState = "START";
this.onToken("FALSE", false, i);
} else return this.charError(n, i);
else if (this.tokenizerState === "NULL1") if (n === "u") this.tokenizerState = "NULL2";
else return this.charError(n, i);
else if (this.tokenizerState === "NULL2") if (n === "l") this.tokenizerState = "NULL3";
else return this.charError(n, i);
else if (this.tokenizerState === "NULL3") if (n === "l") {
this.tokenizerState = "START";
this.onToken("NULL", null, i);
} else return this.charError(n, i);
}
this.position += text.length;
}
push() {
this.stack.push({
value: this.value,
key: this.key,
mode: this.mode
});
this.onPush?.(this.stack.length);
}
pop() {
const value = this.value;
const parent = this.stack.pop();
this.value = parent.value;
this.key = parent.key;
this.mode = parent.mode;
this.emit(value);
this.onPop?.(this.stack.length);
if (!this.mode) this.state = "VALUE";
}
setKey(key) {
this.key = key;
if (this.onKey && typeof key === "string" && this.mode === "OBJECT") this.onKey(this.stack.length);
}
emit(value) {
if (this.mode) this.state = "COMMA";
if (value === void 0) return;
this.onValue(value);
}
onToken(token, value, i) {
if (this.stack.length === 0) {
if (!this.seenRootObject) this.seenRootObject = true;
}
if (this.state === "VALUE" || this.state === "VALUE_AFTER_COMMA") if (token === "STRING" || token === "NUMBER" || token === "TRUE" || token === "FALSE" || token === "NULL") {
if (this.value) this.value[this.key] = value;
this.emit(value);
} else if (token === "LEFT_BRACE") {
this.push();
if (this.value) this.value = this.value[this.key] = {};
else this.value = {};
this.setKey(void 0);
this.state = "KEY";
this.mode = "OBJECT";
} else if (token === "LEFT_BRACKET") {
this.push();
if (this.value) this.value = this.value[this.key] = [];
else this.value = [];
this.setKey(0);
this.mode = "ARRAY";
this.state = "VALUE";
} else if (token === "RIGHT_BRACE") if (this.mode === "OBJECT" && this.state !== "VALUE_AFTER_COMMA") this.pop();
else return this.parseError(token, value, i);
else if (token === "RIGHT_BRACKET") if (this.mode === "ARRAY" && this.state !== "VALUE_AFTER_COMMA") this.pop();
else return this.parseError(token, value, i);
else return this.parseError(token, value, i);
else if (this.state === "KEY" || this.state === "KEY_AFTER_COMMA") if (token === "STRING") {
this.setKey(value);
this.state = "COLON";
} else if (token === "RIGHT_BRACE" && this.state !== "KEY_AFTER_COMMA") this.pop();
else return this.parseError(token, value, i);
else if (this.state === "COLON") if (token === "COLON") this.state = "VALUE";
else return this.parseError(token, value, i);
else if (this.state === "COMMA") if (token === "COMMA") {
if (this.mode === "ARRAY") {
this.key++;
this.state = "VALUE_AFTER_COMMA";
} else if (this.mode === "OBJECT") this.state = "KEY_AFTER_COMMA";
} else if (token === "RIGHT_BRACKET" && this.mode === "ARRAY" || token === "RIGHT_BRACE" && this.mode === "OBJECT") this.pop();
else return this.parseError(token, value, i);
else return this.parseError(token, value, i);
}
numberReviver(text, i) {
const number = JSON.parse(text);
if (Number.isNaN(number)) return this.charError(text, i);
this.onToken("NUMBER", number, i);
}
checkEnd() {
if (this.stack.length > 0) throw new Error(`Unexpected end of input at position ${this.position} in state ${this.state}`);
if (this.state === "VALUE" && this.tokenizerState === "NUMBER" && this.string !== void 0) {
this.numberReviver(this.string, this.position - 1);
this.string = void 0;
} else if (!this.seenRootObject) throw new Error("No data in input");
}
};
//#endregion
export { JSONParseStreamRaw };