UNPKG

@jswalden/streaming-json

Version:

Streaming JSON parsing and stringification for JavaScript/TypeScript

465 lines 17.9 kB
import { IsArray, Pop, Push } from "../stdlib/array.js"; import { ThrowError, ThrowSyntaxError } from "../stdlib/error.js"; import { LengthOfArrayLike } from "../stdlib/length.js"; import { Min } from "../stdlib/math.js"; import { ParseDecimalDigits, ParseFloat } from "../stdlib/number.js"; import { CreateDataProperty, DeleteProperty, EnumerableOwnPropertyKeys } from "../stdlib/object.js"; import { ReflectApply } from "../stdlib/reflect.js"; import { StringCharCodeAt, StringFromCharCode, StringSlice, ToString } from "../stdlib/string.js"; import { HexDigitToNumber, IsAsciiDigit } from "../utils/unicode.js"; ; ; function BUG(msg) { ThrowError(`BUG: ${msg}`); } export class StreamingJSONParser { *atEOF() { if (this.current !== this.end) BUG("atEOF called when !atEnd()"); if (this.eof) return true; const data = yield; if (data.length === 0) { this.eof = true; return true; } this.fragment = data; this.current = 0; this.end = data.length; return false; } atEnd() { if (this.current > this.end) BUG("incremented current past end"); return this.current === this.end; } *consumeWhitespace() { do { while (!this.atEnd()) { const c = StringCharCodeAt(this.fragment, this.current); if (!(c === 32 || c === 10 || c === 13 || c === 9)) return; this.current++; } if (yield* this.atEOF()) return; } while (true); } *consumeKeyword(keyword) { let i = 0; while (i < keyword.length) { if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError(`End of data in middle of '${keyword}' keyword`); const amount = Min(keyword.length - i, this.end - this.current); if (StringSlice(keyword, i, i + amount) !== StringSlice(this.fragment, this.current, this.current + amount)) ThrowSyntaxError(`Malformed '${keyword}' keyword`); this.current += amount; i += amount; } } *jsonString() { if (this.atEnd() || StringCharCodeAt(this.fragment, this.current) !== 34) BUG("jsonString called while not at start of string"); this.current++; let value = ""; do { if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("Unterminated string literal"); let c = this.fragment[this.current++]; let code = StringCharCodeAt(c, 0); if (code === 34) return value; if (code < 32) ThrowSyntaxError("Bad control character in string literal"); if (code === 92) { if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("Incomplete escape sequence"); c = this.fragment[this.current++]; code = StringCharCodeAt(c, 0); switch (code) { case 34: case 47: case 92: break; case 98: c = "\b"; break; case 102: c = "\f"; break; case 110: c = "\n"; break; case 114: c = "\r"; break; case 116: c = "\t"; break; case 117: { code = 0; let digits = 0; do { if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("Too-short Unicode escape"); const amount = Min(4 - digits, this.end - this.current); for (let i = 0; i < amount; i++) { const n = HexDigitToNumber(this.fragment, this.current + i); if (n === null) ThrowSyntaxError(`Bad Unicode escape digit '${this.fragment[this.current + i]}'`); code = (code << 4) | n; } digits += amount; this.current += amount; } while (digits < 4); c = StringFromCharCode(code); break; } default: ThrowSyntaxError(`Bad escaped character '${c}'`); } } value += c; } while (true); } *jsonNumber() { if (this.atEnd()) BUG("jsonNumber called while at end of fragment"); let c = this.fragment[this.current]; let code = StringCharCodeAt(c, 0); if (!(code === 45 || IsAsciiDigit(code))) BUG("jsonNumber called while not at start of number"); let numText = ""; if (code === 45) { numText += c; this.current++; if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("Missing number after '-'"); c = this.fragment[this.current]; code = StringCharCodeAt(c, 0); if (!IsAsciiDigit(code)) ThrowSyntaxError("Unexpected nondigit"); } numText += c; this.current++; if (code !== 48) { do { if (this.atEnd()) { if (yield* this.atEOF()) return ParseDecimalDigits(numText); } c = this.fragment[this.current]; code = StringCharCodeAt(c, 0); if (!IsAsciiDigit(code)) break; numText += c; this.current++; } while (true); } if (code !== 46 && (code & ~1048576) !== 69) return ParseDecimalDigits(numText); if (code === 46) { numText += c; this.current++; if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("Missing digits after decimal point"); c = this.fragment[this.current]; code = StringCharCodeAt(c, 0); if (!IsAsciiDigit(code)) ThrowSyntaxError("Unterminated fractional number"); numText += c; this.current++; do { if (this.atEnd() && (yield* this.atEOF())) return ParseFloat(numText); c = this.fragment[this.current]; code = StringCharCodeAt(c, 0); if (!IsAsciiDigit(code)) break; numText += c; this.current++; } while (true); } if ((code & ~32) === 69) { numText += c; this.current++; if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("Missing digits after exponent indicator"); c = this.fragment[this.current]; code = StringCharCodeAt(c, 0); if (code === 43 || code === 45) { numText += c; this.current++; if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("Missing digits after exponent sign"); } c = this.fragment[this.current]; code = StringCharCodeAt(c, 0); if (!IsAsciiDigit(code)) ThrowSyntaxError("Exponent part is missing a number"); numText += c; this.current++; do { if (this.atEnd() && (yield* this.atEOF())) break; c = this.fragment[this.current]; code = StringCharCodeAt(c, 0); if (!IsAsciiDigit(code)) break; numText += c; this.current++; } while (true); } return ParseFloat(numText); } *advanceColon() { yield* this.consumeWhitespace(); if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("End of data looking for colon in object entry"); if (StringCharCodeAt(this.fragment, this.current) !== 58) ThrowSyntaxError("Expected ':' after property name in object"); this.current++; } *advanceObjectEnds() { yield* this.consumeWhitespace(); if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("End of data after property value in object"); const code = StringCharCodeAt(this.fragment, this.current++); if (code === 44) return false; if (code === 125) return true; ThrowSyntaxError("Expected ',' or '}' after property value in object"); } *advanceArrayEnds() { yield* this.consumeWhitespace(); if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("End of data when ',' or ']' was expected"); const code = StringCharCodeAt(this.fragment, this.current++); if (code === 44) return false; if (code === 93) return true; ThrowSyntaxError("Expected property name or '}'"); } *advance() { yield* this.consumeWhitespace(); if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("Unexpected end of data"); const code = StringCharCodeAt(this.fragment, this.current); switch (code) { case 34: this.tokenValue = yield* this.jsonString(); return 0; case 116: yield* this.consumeKeyword("true"); this.tokenValue = true; return 2; case 102: yield* this.consumeKeyword("false"); this.tokenValue = false; return 2; case 110: yield* this.consumeKeyword("null"); this.tokenValue = null; return 3; case 91: this.current++; return 6; case 93: this.current++; return 7; case 123: this.current++; return 4; case 125: this.current++; return 5; case 44: this.current++; return 8; case 58: this.current++; return 9; } if (code === 45 || IsAsciiDigit(code)) { this.tokenValue = yield* this.jsonNumber(); return 1; } ThrowSyntaxError("Unexpected character"); } *parseJSON() { const stack = []; let value = "ERROR"; ; let token; let state = 2; toParseElementOrPropertyValue: do { toFinishValue: switch (state) { case 2: { token = yield* this.advance(); processValueToken: do { switch (token) { case 0: case 1: case 2: case 3: value = this.tokenValue; break toFinishValue; case 6: { value = []; token = yield* this.advance(); if (token === 7) break toFinishValue; Push(stack, [0, value]); continue processValueToken; } case 4: { value = {}; yield* this.consumeWhitespace(); if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("End of data while reading object contents"); const c = StringCharCodeAt(this.fragment, this.current); if (c === 125) { this.current++; break toFinishValue; } if (c !== 34) ThrowSyntaxError("Expected property name or '}'"); Push(stack, [1, value, yield* this.jsonString()]); yield* this.advanceColon(); continue toParseElementOrPropertyValue; } case 7: case 5: case 9: case 8: ThrowSyntaxError(`Encountered token with internal value ${token} in value context`); default: { } } } while (true); } case 1: { const objectInfo = stack[stack.length - 1]; CreateDataProperty(objectInfo[1], objectInfo[2], value); if (yield* this.advanceObjectEnds()) { value = Pop(stack)[1]; break toFinishValue; } yield* this.consumeWhitespace(); if (this.atEnd() && (yield* this.atEOF())) ThrowSyntaxError("End of data where property name was expected"); if (StringCharCodeAt(this.fragment, this.current) !== 34) ThrowError("Expected property name"); objectInfo[2] = yield* this.jsonString(); yield* this.advanceColon(); state = 2; continue toParseElementOrPropertyValue; } case 0: { const arrayInfo = stack[stack.length - 1]; Push(arrayInfo[1], value); if (yield* this.advanceArrayEnds()) { value = Pop(stack)[1]; break toFinishValue; } state = 2; continue toParseElementOrPropertyValue; } } if (stack.length === 0) break; state = stack[stack.length - 1][0]; } while (true); yield* this.consumeWhitespace(); if (!this.atEnd()) ThrowSyntaxError("Unexpected non-whitespace character after JSON data"); return value; } constructor() { this.fragment = ""; this.current = 0; this.end = 0; this.eof = false; this.tokenValue = null; this.parser = this.parseJSON(); this.complete = false; if (this.parser.next().done === true) BUG("parsing finished before any fragments added"); } add(fragment) { if (this.complete) ThrowError("Can't add fragment: parsing already completed"); if (fragment.length === 0) return; try { if (this.parser.next(fragment).done === true) BUG("add(nonempty valid fragment) should never complete parsing"); } catch (e) { this.complete = true; throw e; } } finish(reviver) { if (this.complete) { ThrowError("Can't call finish: it was either already called or a syntax error " + "was encountered"); } let unfiltered; try { const result = this.parser.next(""); if (result.done === true) unfiltered = result.value; else ThrowSyntaxError("Complete text is not valid JSON"); } finally { this.complete = true; } if (typeof reviver === "undefined") return unfiltered; const rootName = ""; const root = { [rootName]: unfiltered }; return InternalizeJSONProperty(root, rootName, reviver); } done() { return this.complete; } } ; function InternalizeJSONProperty(holder, name, reviver) { const val = holder[name]; if (val !== null && typeof val === "object") { const isArray = IsArray(val); if (isArray) { const len = LengthOfArrayLike(val); for (let i = 0; i < len; i++) { const newElement = InternalizeJSONProperty(val, ToString(i), reviver); if (typeof newElement === "undefined") DeleteProperty(val, i); else CreateDataProperty(val, i, newElement); } } else { const keys = EnumerableOwnPropertyKeys(val); for (let i = 0, len = keys.length; i < len; i++) { const p = keys[i]; const newElement = InternalizeJSONProperty(val, p, reviver); if (typeof newElement === "undefined") DeleteProperty(val, p); else CreateDataProperty(val, p, newElement); } } } return ReflectApply(reviver, holder, [name, val]); } //# sourceMappingURL=parser.js.map