UNPKG

@xtao-org/jsonhilo

Version:

Pure JavaScript minimal lossless JSON parse event streaming, akin to SAX. Fast, modular, and dependency-free.

431 lines (423 loc) 15.5 kB
export const CodePoint = { _0_: '0'.codePointAt(0), _1_: '1'.codePointAt(0), _9_: '9'.codePointAt(0), _a_: 'a'.codePointAt(0), _f_: 'f'.codePointAt(0), _A_: 'A'.codePointAt(0), _F_: 'F'.codePointAt(0), _openCurly_: '{'.codePointAt(0), _openSquare_: '['.codePointAt(0), _closeCurly_: '}'.codePointAt(0), _closeSquare_: ']'.codePointAt(0), _quoteMark_: '"'.codePointAt(0), _plus_: '+'.codePointAt(0), _minus_: '-'.codePointAt(0), _space_: ' '.codePointAt(0), _newline_: '\n'.codePointAt(0), _tab_: '\t'.codePointAt(0), _return_: '\r'.codePointAt(0), _backslash_: '\\'.codePointAt(0), _slash_: '/'.codePointAt(0), _comma_: ','.codePointAt(0), _colon_: ':'.codePointAt(0), _t_: 't'.codePointAt(0), _n_: 'n'.codePointAt(0), _b_: 'b'.codePointAt(0), _r_: 'r'.codePointAt(0), _u_: 'u'.codePointAt(0), _dot_: '.'.codePointAt(0), _e_: 'e'.codePointAt(0), _E_: 'E'.codePointAt(0), _l_: 'l'.codePointAt(0), _s_: 's'.codePointAt(0), } const { _0_, _1_, _9_, _A_, _E_, _F_, _a_, _b_, _backslash_, _closeCurly_, _closeSquare_, _colon_, _comma_, _dot_, _e_, _f_, _l_, _minus_, _n_, _newline_, _openCurly_, _openSquare_, _plus_, _quoteMark_, _r_, _return_, _s_, _slash_, _space_, _t_, _tab_, _u_, } = CodePoint export const JsonFeedbackType = { error: 'JsonFeedbackType.error', } export const JsonErrorType = { unexpected: 'JsonErrorType.unexpected', unexpectedEnd: 'JsonErrorType.unexpectedEnd', } export const error = (message) => { return { type: JsonFeedbackType.error, message, } } export const isError = (value) => { return value !== null && typeof value === 'object' && value.type === JsonFeedbackType.error } export const unexpected = (code, context, expected) => { return { type: JsonFeedbackType.error, errorType: JsonErrorType.unexpected, codePoint: code, context, expected, } } export const unexpectedEnd = (context, expected) => { return { type: JsonFeedbackType.error, errorType: JsonErrorType.unexpectedEnd, context, expected, } } export const isZeroNine = (code) => code >= _0_ && code <= _9_ export const isOneNine = (code) => code >= _1_ && code <= _9_ export const isWhitespace = (code) => code === _space_ || code === _newline_ || code === _tab_ || code === _return_ export const JsonLow = (next, initialState = {}) => { let mode = initialState.mode ?? 'Mode._value' let parents = initialState.parents ?? ['Parent.top'] let hexIndex = initialState.hexIndex ?? 0 let maxDepth = initialState.maxDepth ?? 65536 let isKey = initialState.isKey ?? false const fraction = (code) => { if (code === _dot_) { mode = 'Mode.dot_' return next.codePoint?.(code) } return exponent(code) } const exponent = (code) => { if (code === _e_ || code === _E_) { mode = 'Mode.exponent_' return next.codePoint?.(code) } return number(code) } /** * numbers are delimited by any token that comes after: * whitespace, comma, parent close * so we have to emit an event and handle the token */ const number = (code) => { mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' next.closeNumber?.() return self.codePoint(code) } const maxDepthExceeded = () => error( `Invalid parser state! Max depth of ${maxDepth} exceeded!` ) const closeParent = (code) => { const parent = parents.pop() if (code === _closeCurly_) { if (parent === 'Parent.object') { mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' return next.closeObject?.(code) } } if (code === _closeSquare_) { if (parent === 'Parent.array') { mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' return next.closeArray?.(code) } } return unexpected(code, `in ${parentToString(parent)}`) } const self = { codePoint: (code) => { switch (mode) { case 'Mode._value': switch (code) { case _openCurly_: { if (parents.length >= maxDepth) return maxDepthExceeded() parents.push('Parent.object') isKey = true mode = 'Mode._key' return next.openObject?.(code) } case _openSquare_: { if (parents.length >= maxDepth) return maxDepthExceeded() parents.push('Parent.array') mode = 'Mode._value' return next.openArray?.(code) } case _quoteMark_: mode = 'Mode.string_' return next.openString?.(code) case _t_: mode = 'Mode.t_rue' return next.openTrue?.(code) case _f_: mode = 'Mode.f_alse' return next.openFalse?.(code) case _n_: mode = 'Mode.n_ull' return next.openNull?.(code) case _minus_: mode = 'Mode.minus_' return next.openNumber?.(code) case _0_: mode = 'Mode.zero_' return next.openNumber?.(code) default: if (isOneNine(code)) { mode = 'Mode.onenine_' return next.openNumber?.(code) } if (isWhitespace(code)) return next.whitespace?.(code) return closeParent(code) } case 'Mode.value_': if (code === _comma_) { const parent = parents[parents.length - 1] if (parent === 'Parent.object') { isKey = true mode = 'Mode._key' return next.comma?.(code) } if (parent === 'Parent.array') { mode = 'Mode._value' return next.comma?.(code) } return error(`Invalid parser state! Unexpected parent ${parent}.`) } if (isWhitespace(code)) return next.whitespace?.(code) return closeParent(code) case 'Mode._key': if (code === _quoteMark_) { mode = 'Mode.string_' return next.openKey?.(code) } if (code === _closeCurly_) { parents.pop() isKey = false mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' return next.closeObject?.(code) } if (isWhitespace(code)) return next.whitespace?.(code) return unexpected(code, 'in an object', ['"', '}', 'whitespace']) case 'Mode.key_': if (code === _colon_) { isKey = false mode = 'Mode._value' return next.colon?.(code) } if (isWhitespace(code)) return next.whitespace?.(code) return unexpected(code, 'after key', [':', 'whitespace']) case 'Mode.string_': if (code === _quoteMark_) { if (isKey) { mode = 'Mode.key_' return next.closeKey?.(code) } mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' return next.closeString?.(code) } if (code === _backslash_) { mode = 'Mode.escape_' return next.escape?.(code) } if (code >= 0x0020 && code <= 0x10ffff) return next.codePoint?.(code) return unexpected(code, 'in a string', ['"', '\\', 'a code point 0x0020 thru 0x10ffff']) case 'Mode.escape_': if ( code === _quoteMark_ || code === _n_ || code === _backslash_ || code === _t_ || code === _slash_ || code === _b_ || code === _f_ || code === _r_ ) { mode = 'Mode.string_' return next.codePoint?.(code) } if (code === _u_) { mode = 'Mode.hex_' return next.openHex?.(code) } return unexpected(code, 'after escape', ['"', 'n', '\\', 't', '/', 'b', 'f', 'r', 'u']) case 'Mode.hex_': if ( (code >= _0_ && code <= _9_) || (code >= _a_ && code <= _f_) || (code >= _A_ && code <= _F_) ) { if (hexIndex < 3) { hexIndex += 1 return next.codePoint?.(code) } hexIndex = 0 mode = 'Mode.string_' return next.closeHex?.(code) } return unexpected(code, `at index ${hexIndex} of a hexadecimal escape sequence`, [['a', 'f'], ['A', 'F'], ['0', '9']]) case 'Mode.minus_': if (code === _0_) { mode = 'Mode.zero_' return next.codePoint?.(code) } if (isOneNine(code)) { mode = 'Mode.onenine_' return next.codePoint?.(code) } return unexpected(code, `after '-'`, [['0', '9']]) case 'Mode.zero_': return fraction(code) case 'Mode.onenine_': if (isZeroNine(code)) { mode = 'Mode.onenineDigit_' return next.codePoint?.(code) } return fraction(code) case 'Mode.dot_': if (isZeroNine(code)) { mode = 'Mode.digitDotDigit_' return next.codePoint?.(code) } return unexpected(code, `after '.'`, [['0', '9']]) case 'Mode.exponent_': if (code === _plus_ || code === _minus_) { mode = 'Mode.exponentSign_' return next.codePoint?.(code) } if (isZeroNine(code)) { mode = 'Mode.exponentSignDigit_' return next.codePoint?.(code) } return unexpected(code, `after exponent`, ['+', '-', ['0', '9']]) case 'Mode.exponentSign_': if (isZeroNine(code)) { mode = 'Mode.exponentSignDigit_' return next.codePoint?.(code) } return unexpected(code, `after exponent sign`, [['0', '9']]) case 'Mode.onenineDigit_': if (isZeroNine(code)) return next.codePoint?.(code) return fraction(code) case 'Mode.digitDotDigit_': if (isZeroNine(code)) return next.codePoint?.(code) return exponent(code) case 'Mode.exponentSignDigit_': if (isZeroNine(code)) return next.codePoint?.(code) return number(code) case 'Mode.t_rue': if (code === _r_) { mode = 'Mode.tr_ue' return next.codePoint?.(code) } return unexpected(code, `at the second position in true`, ['r']) case 'Mode.tr_ue': if (code === _u_) { mode = 'Mode.tru_e' return next.codePoint?.(code) } return unexpected(code, `at the third position in true`, ['u']) case 'Mode.tru_e': if (code === _e_) { mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' return next.closeTrue?.(code) } return unexpected(code, `at the fourth position in true`, ['e']) case 'Mode.f_alse': if (code === _a_) { mode = 'Mode.fa_lse' return next.codePoint?.(code) } return unexpected(code, `at the second position in false`, ['a']) case 'Mode.fa_lse': if (code === _l_) { mode = 'Mode.fal_se' return next.codePoint?.(code) } return unexpected(code, `at the third position in false`, ['l']) case 'Mode.fal_se': if (code === _s_) { mode = 'Mode.fals_e' return next.codePoint?.(code) } return unexpected(code, `at the fourth position in false`, ['s']) case 'Mode.fals_e': if (code === _e_) { mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' return next.closeFalse?.(code) } return unexpected(code, `at the fifth position in false`, ['e']) case 'Mode.n_ull': if (code === _u_) { mode = 'Mode.nu_ll' return next.codePoint?.(code) } return unexpected(code, `at the second position in null`, ['u']) case 'Mode.nu_ll': if (code === _l_) { mode = 'Mode.nul_l' return next.codePoint?.(code) } return unexpected(code, `at the third position in null`, ['l']) case 'Mode.nul_l': if (code === _l_) { mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' return next.closeNull?.(code) } return unexpected(code, `at the fourth position in null`, ['l']) default: return error(`Invalid parser mode: ${mode}`) } }, end: () => { if (isKey) return unexpectedEnd(`a key/object left unclosed!`) const parent = parents[parents.length - 1] if (parent !== 'Parent.top') return unexpectedEnd( `${parentToString(parent)} left unclosed!` ) switch (mode) { case 'Mode._value': return next.end?.() case 'Mode.key_': return error('a key/object left unclosed!') case 'Mode._key': return unexpectedEnd('an object left unclosed!') case 'Mode.exponentSignDigit_': case 'Mode.onenine_': case 'Mode.onenineDigit_': case 'Mode.digitDotDigit_': case 'Mode.zero_': mode = parents[parents.length - 1] === 'Parent.top'? 'Mode._value': 'Mode.value_' next.closeNumber?.() return next.end?.() case 'Mode.minus_': case 'Mode.dot_': case 'Mode.exponent_': case 'Mode.exponentSign_': return unexpectedEnd(`incomplete number!`) case 'Mode.hex_': return unexpectedEnd('after hexadecimal escape in string!') case 'Mode.escape_': return unexpectedEnd('after escape in string!') case 'Mode.string_': return unexpectedEnd('a string left unclosed!') case 'Mode.t_rue': return unexpectedEnd(`before the second position in true!`, ['r']) case 'Mode.tr_ue': return unexpectedEnd(`before the third position in true!`, ['u']) case 'Mode.tru_e': return unexpectedEnd(`before the fourth position in true!`, ['e']) case 'Mode.f_alse': return unexpectedEnd(`before the second position in false!`, ['a']) case 'Mode.fa_lse': return unexpectedEnd(`before the third position in false!`, ['l']) case 'Mode.fal_se': return unexpectedEnd(`before the fourth position in false!`, ['s']) case 'Mode.fals_e': return unexpectedEnd(`before the fifth position in false!`, ['e']) case 'Mode.n_ull': return unexpectedEnd(`before the second position in null!`, ['u']) case 'Mode.nu_ll': return unexpectedEnd(`before the third position in null!`, ['l']) case 'Mode.nul_l': return unexpectedEnd(`before the fourth position in null!`, ['l']) default: return unexpectedEnd() } }, depth: () => { return parents.length - 1 }, state: () => { const downstream = next.state?.() return {mode, parents: [...parents], isKey, hexIndex, downstream} }, config: () => { const downstream = next.config?.() return {maxDepth, downstream} }, } return self } const parentToString = (parent) => { switch (parent) { case 'Parent.array': return 'an array' case 'Parent.object': return 'an object' case 'Parent.top': return 'the top-level value' } }