@jswalden/streaming-json
Version:
Streaming JSON parsing and stringification for JavaScript/TypeScript
465 lines • 17.9 kB
JavaScript
import { IsArray, Pop, Push } from "../stdlib/array.js";
import { ThrowError, ThrowSyntaxError } from "../stdlib/error.js";
import { LengthOfArrayLike } from "../stdlib/length.js";
import { Min } from "../stdlib/math.js";
import { ParseDecimalDigits, ParseFloat } from "../stdlib/number.js";
import { CreateDataProperty, DeleteProperty, EnumerableOwnPropertyKeys } from "../stdlib/object.js";
import { ReflectApply } from "../stdlib/reflect.js";
import { StringCharCodeAt, StringFromCharCode, StringSlice, ToString } from "../stdlib/string.js";
import { HexDigitToNumber, IsAsciiDigit } from "../utils/unicode.js";
;
;
function BUG(msg) {
ThrowError(`BUG: ${msg}`);
}
export class StreamingJSONParser {
*atEOF() {
if (this.current !== this.end)
BUG("atEOF called when !atEnd()");
if (this.eof)
return true;
const data = yield;
if (data.length === 0) {
this.eof = true;
return true;
}
this.fragment = data;
this.current = 0;
this.end = data.length;
return false;
}
atEnd() {
if (this.current > this.end)
BUG("incremented current past end");
return this.current === this.end;
}
*consumeWhitespace() {
do {
while (!this.atEnd()) {
const c = StringCharCodeAt(this.fragment, this.current);
if (!(c === 32 ||
c === 10 ||
c === 13 ||
c === 9))
return;
this.current++;
}
if (yield* this.atEOF())
return;
} while (true);
}
*consumeKeyword(keyword) {
let i = 0;
while (i < keyword.length) {
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError(`End of data in middle of '${keyword}' keyword`);
const amount = Min(keyword.length - i, this.end - this.current);
if (StringSlice(keyword, i, i + amount) !==
StringSlice(this.fragment, this.current, this.current + amount))
ThrowSyntaxError(`Malformed '${keyword}' keyword`);
this.current += amount;
i += amount;
}
}
*jsonString() {
if (this.atEnd() || StringCharCodeAt(this.fragment, this.current) !== 34)
BUG("jsonString called while not at start of string");
this.current++;
let value = "";
do {
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("Unterminated string literal");
let c = this.fragment[this.current++];
let code = StringCharCodeAt(c, 0);
if (code === 34)
return value;
if (code < 32)
ThrowSyntaxError("Bad control character in string literal");
if (code === 92) {
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("Incomplete escape sequence");
c = this.fragment[this.current++];
code = StringCharCodeAt(c, 0);
switch (code) {
case 34:
case 47:
case 92:
break;
case 98:
c = "\b";
break;
case 102:
c = "\f";
break;
case 110:
c = "\n";
break;
case 114:
c = "\r";
break;
case 116:
c = "\t";
break;
case 117: {
code = 0;
let digits = 0;
do {
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("Too-short Unicode escape");
const amount = Min(4 - digits, this.end - this.current);
for (let i = 0; i < amount; i++) {
const n = HexDigitToNumber(this.fragment, this.current + i);
if (n === null)
ThrowSyntaxError(`Bad Unicode escape digit '${this.fragment[this.current + i]}'`);
code = (code << 4) | n;
}
digits += amount;
this.current += amount;
} while (digits < 4);
c = StringFromCharCode(code);
break;
}
default:
ThrowSyntaxError(`Bad escaped character '${c}'`);
}
}
value += c;
} while (true);
}
*jsonNumber() {
if (this.atEnd())
BUG("jsonNumber called while at end of fragment");
let c = this.fragment[this.current];
let code = StringCharCodeAt(c, 0);
if (!(code === 45 || IsAsciiDigit(code)))
BUG("jsonNumber called while not at start of number");
let numText = "";
if (code === 45) {
numText += c;
this.current++;
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("Missing number after '-'");
c = this.fragment[this.current];
code = StringCharCodeAt(c, 0);
if (!IsAsciiDigit(code))
ThrowSyntaxError("Unexpected nondigit");
}
numText += c;
this.current++;
if (code !== 48) {
do {
if (this.atEnd()) {
if (yield* this.atEOF())
return ParseDecimalDigits(numText);
}
c = this.fragment[this.current];
code = StringCharCodeAt(c, 0);
if (!IsAsciiDigit(code))
break;
numText += c;
this.current++;
} while (true);
}
if (code !== 46 &&
(code & ~1048576) !== 69)
return ParseDecimalDigits(numText);
if (code === 46) {
numText += c;
this.current++;
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("Missing digits after decimal point");
c = this.fragment[this.current];
code = StringCharCodeAt(c, 0);
if (!IsAsciiDigit(code))
ThrowSyntaxError("Unterminated fractional number");
numText += c;
this.current++;
do {
if (this.atEnd() && (yield* this.atEOF()))
return ParseFloat(numText);
c = this.fragment[this.current];
code = StringCharCodeAt(c, 0);
if (!IsAsciiDigit(code))
break;
numText += c;
this.current++;
} while (true);
}
if ((code & ~32) === 69) {
numText += c;
this.current++;
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("Missing digits after exponent indicator");
c = this.fragment[this.current];
code = StringCharCodeAt(c, 0);
if (code === 43 || code === 45) {
numText += c;
this.current++;
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("Missing digits after exponent sign");
}
c = this.fragment[this.current];
code = StringCharCodeAt(c, 0);
if (!IsAsciiDigit(code))
ThrowSyntaxError("Exponent part is missing a number");
numText += c;
this.current++;
do {
if (this.atEnd() && (yield* this.atEOF()))
break;
c = this.fragment[this.current];
code = StringCharCodeAt(c, 0);
if (!IsAsciiDigit(code))
break;
numText += c;
this.current++;
} while (true);
}
return ParseFloat(numText);
}
*advanceColon() {
yield* this.consumeWhitespace();
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("End of data looking for colon in object entry");
if (StringCharCodeAt(this.fragment, this.current) !== 58)
ThrowSyntaxError("Expected ':' after property name in object");
this.current++;
}
*advanceObjectEnds() {
yield* this.consumeWhitespace();
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("End of data after property value in object");
const code = StringCharCodeAt(this.fragment, this.current++);
if (code === 44)
return false;
if (code === 125)
return true;
ThrowSyntaxError("Expected ',' or '}' after property value in object");
}
*advanceArrayEnds() {
yield* this.consumeWhitespace();
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("End of data when ',' or ']' was expected");
const code = StringCharCodeAt(this.fragment, this.current++);
if (code === 44)
return false;
if (code === 93)
return true;
ThrowSyntaxError("Expected property name or '}'");
}
*advance() {
yield* this.consumeWhitespace();
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("Unexpected end of data");
const code = StringCharCodeAt(this.fragment, this.current);
switch (code) {
case 34:
this.tokenValue = yield* this.jsonString();
return 0;
case 116:
yield* this.consumeKeyword("true");
this.tokenValue = true;
return 2;
case 102:
yield* this.consumeKeyword("false");
this.tokenValue = false;
return 2;
case 110:
yield* this.consumeKeyword("null");
this.tokenValue = null;
return 3;
case 91:
this.current++;
return 6;
case 93:
this.current++;
return 7;
case 123:
this.current++;
return 4;
case 125:
this.current++;
return 5;
case 44:
this.current++;
return 8;
case 58:
this.current++;
return 9;
}
if (code === 45 || IsAsciiDigit(code)) {
this.tokenValue = yield* this.jsonNumber();
return 1;
}
ThrowSyntaxError("Unexpected character");
}
*parseJSON() {
const stack = [];
let value = "ERROR";
;
let token;
let state = 2;
toParseElementOrPropertyValue: do {
toFinishValue: switch (state) {
case 2: {
token = yield* this.advance();
processValueToken: do {
switch (token) {
case 0:
case 1:
case 2:
case 3:
value = this.tokenValue;
break toFinishValue;
case 6: {
value = [];
token = yield* this.advance();
if (token === 7)
break toFinishValue;
Push(stack, [0, value]);
continue processValueToken;
}
case 4: {
value = {};
yield* this.consumeWhitespace();
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("End of data while reading object contents");
const c = StringCharCodeAt(this.fragment, this.current);
if (c === 125) {
this.current++;
break toFinishValue;
}
if (c !== 34)
ThrowSyntaxError("Expected property name or '}'");
Push(stack, [1, value, yield* this.jsonString()]);
yield* this.advanceColon();
continue toParseElementOrPropertyValue;
}
case 7:
case 5:
case 9:
case 8:
ThrowSyntaxError(`Encountered token with internal value ${token} in value context`);
default: {
}
}
} while (true);
}
case 1: {
const objectInfo = stack[stack.length - 1];
CreateDataProperty(objectInfo[1], objectInfo[2], value);
if (yield* this.advanceObjectEnds()) {
value = Pop(stack)[1];
break toFinishValue;
}
yield* this.consumeWhitespace();
if (this.atEnd() && (yield* this.atEOF()))
ThrowSyntaxError("End of data where property name was expected");
if (StringCharCodeAt(this.fragment, this.current) !== 34)
ThrowError("Expected property name");
objectInfo[2] = yield* this.jsonString();
yield* this.advanceColon();
state = 2;
continue toParseElementOrPropertyValue;
}
case 0: {
const arrayInfo = stack[stack.length - 1];
Push(arrayInfo[1], value);
if (yield* this.advanceArrayEnds()) {
value = Pop(stack)[1];
break toFinishValue;
}
state = 2;
continue toParseElementOrPropertyValue;
}
}
if (stack.length === 0)
break;
state = stack[stack.length - 1][0];
} while (true);
yield* this.consumeWhitespace();
if (!this.atEnd())
ThrowSyntaxError("Unexpected non-whitespace character after JSON data");
return value;
}
constructor() {
this.fragment = "";
this.current = 0;
this.end = 0;
this.eof = false;
this.tokenValue = null;
this.parser = this.parseJSON();
this.complete = false;
if (this.parser.next().done === true)
BUG("parsing finished before any fragments added");
}
add(fragment) {
if (this.complete)
ThrowError("Can't add fragment: parsing already completed");
if (fragment.length === 0)
return;
try {
if (this.parser.next(fragment).done === true)
BUG("add(nonempty valid fragment) should never complete parsing");
}
catch (e) {
this.complete = true;
throw e;
}
}
finish(reviver) {
if (this.complete) {
ThrowError("Can't call finish: it was either already called or a syntax error " +
"was encountered");
}
let unfiltered;
try {
const result = this.parser.next("");
if (result.done === true)
unfiltered = result.value;
else
ThrowSyntaxError("Complete text is not valid JSON");
}
finally {
this.complete = true;
}
if (typeof reviver === "undefined")
return unfiltered;
const rootName = "";
const root = { [rootName]: unfiltered };
return InternalizeJSONProperty(root, rootName, reviver);
}
done() {
return this.complete;
}
}
;
function InternalizeJSONProperty(holder, name, reviver) {
const val = holder[name];
if (val !== null && typeof val === "object") {
const isArray = IsArray(val);
if (isArray) {
const len = LengthOfArrayLike(val);
for (let i = 0; i < len; i++) {
const newElement = InternalizeJSONProperty(val, ToString(i), reviver);
if (typeof newElement === "undefined")
DeleteProperty(val, i);
else
CreateDataProperty(val, i, newElement);
}
}
else {
const keys = EnumerableOwnPropertyKeys(val);
for (let i = 0, len = keys.length; i < len; i++) {
const p = keys[i];
const newElement = InternalizeJSONProperty(val, p, reviver);
if (typeof newElement === "undefined")
DeleteProperty(val, p);
else
CreateDataProperty(val, p, newElement);
}
}
}
return ReflectApply(reviver, holder, [name, val]);
}
//# sourceMappingURL=parser.js.map