UNPKG

@anchan828/json-ast

Version:

JSON parser AST utilities

github.com/anchan828/json-ast

anchan828/json-ast

487 lines (486 loc) • 15.4 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.tokenize = tokenize; var error_js_1 = require("./error.js"); var position_js_1 = require("./position.js"); var tokenizeErrorTypes_js_1 = require("./tokenizeErrorTypes.js"); var types_js_1 = require("./types.js"); var charTokens = { "{": types_js_1.JsonTokenTypes.LEFT_BRACE, "}": types_js_1.JsonTokenTypes.RIGHT_BRACE, "[": types_js_1.JsonTokenTypes.LEFT_BRACKET, "]": types_js_1.JsonTokenTypes.RIGHT_BRACKET, ":": types_js_1.JsonTokenTypes.COLON, ",": types_js_1.JsonTokenTypes.COMMA, }; var keywordsTokens = { true: types_js_1.JsonTokenTypes.TRUE, false: types_js_1.JsonTokenTypes.FALSE, null: types_js_1.JsonTokenTypes.NULL, }; var stringStates = { _START_: 0, START_QUOTE_OR_CHAR: 1, ESCAPE: 2, }; var symbolSubstitutes = { b: '\b', // Backspace f: '\f', // Form feed n: '\n', // New line r: '\r', // Carriage return t: '\t', // Horizontal tab }; // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#escape_sequences var escapes = { '"': 0, // Quotation mask "\\": 1, // Reverse solidus "/": 2, // Solidus b: 3, // Backspace f: 4, // Form feed n: 5, // New line r: 6, // Carriage return t: 7, // Horizontal tab u: 8, // 4 hexadecimal digits }; // Support regex ["d", "D", "w", "W", "s", "S"].forEach(function (d, i) { escapes[d] = i; }); var numberStates = { _START_: 0, MINUS: 1, ZERO: 2, DIGIT: 3, POINT: 4, DIGIT_FRACTION: 5, EXP: 6, EXP_DIGIT_OR_SIGN: 7, }; // HELPERS function isDigit1to9(char) { return char >= "1" && char <= "9"; } function isDigit(char) { return char >= "0" && char <= "9"; } function isLetter(char) { return (char >= "a" && char <= "z") || (char >= "A" && char <= "Z"); } function isHex(char) { return isDigit(char) || (char >= "a" && char <= "f") || (char >= "A" && char <= "F"); } function isExp(char) { return char === "e" || char === "E"; } // PARSERS function parseWhitespace(source, index, line, column) { var char = source.charAt(index); if (char === "\r") { // CR (Unix) index++; line++; column = 1; if (source.charAt(index) === "\n") { // CRLF (Windows) index++; } } else if (char === "\n") { // LF (MacOS) index++; line++; column = 1; } else if (char === "\t" || char === " ") { index++; column++; } else { return null; } return { index: index, line: line, column: column }; } function parseComment(source, index, line, column) { var sourceLength = source.length; var char = source.charAt(index); if (char === "/") { var next_char = source.charAt(index + 1) || ""; if ("/" === next_char) { // Unroll until the end of the line var first_index = index + 2; var last_index = index + 2; index += 2; while (index < sourceLength) { char = source.charAt(index); if (char === "\r") { last_index = index; index++; line++; column = 1; if (source.charAt(index + 1) === "\n") { // CR LF last_index = index; index++; } break; } else if (char === "\n") { last_index = index; index++; line++; column = 1; break; } else { index++; } } if (index >= sourceLength) { last_index = sourceLength; } return { type: types_js_1.JsonTokenTypes.COMMENT, value: source.substring(first_index, last_index).replace(/(\r\n|\n|\r)/gm, ""), index: index, line: line, column: column, }; } else if ("*" === next_char) { // unroll until we find */ var first_index = index + 2; var last_index = index + 2; index += 2; while (index < sourceLength) { char = source.charAt(index); if (char !== "*") { if (char === "\r") { next_char = source.charAt(index + 1) || ""; line++; column = 1; if (next_char === "\n") { index++; } } else if (char === "\n") { line++; column = 1; } } else { next_char = source.charAt(index + 1) || ""; if ("/" === next_char) { last_index = index; if (last_index >= sourceLength) { last_index = sourceLength; } return { type: types_js_1.JsonTokenTypes.COMMENT, value: source.substring(first_index, last_index), index: index + 2, line: line, column: column, }; } } index++; } } } else { return null; } } function parseChar(source, index, line, column) { var char = source.charAt(index); if (char in charTokens) { return { type: charTokens[char], line: line, column: column + 1, index: index + 1, value: undefined, }; } else { return null; } } function parseKeyword(source, index, line, column) { var matched = Object.keys(keywordsTokens).find(function (name) { return name === source.slice(index, index + name.length); }); if (matched) { return { type: keywordsTokens[matched], line: line, column: column + matched.length, index: index + matched.length, value: null, }; } else { return null; } } function parseIdentifier(source, index, line, column) { var sourceLength = source.length; var startIndex = index; var buffer = ""; // Must start with a letter or underscore var firstChar = source.charAt(index); if (!(isLetter(firstChar) || firstChar === "_")) return null; while (index < sourceLength) { var char = source.charAt(index); if (!(isLetter(char) || char === "_" || isDigit(char))) break; buffer += char; index++; } if (buffer.length > 0) { return { type: types_js_1.JsonTokenTypes.IDENTIFIER, line: line, column: column + index - startIndex, index: index, value: buffer, }; } else { return null; } } function parseString(source, index, line, column) { var sourceLength = source.length; var startIndex = index; var buffer = ""; var decoded = ""; var state = stringStates._START_; while (index < sourceLength) { var char = source.charAt(index); switch (state) { case stringStates._START_: if (char === '"') { state = stringStates.START_QUOTE_OR_CHAR; index++; } else { return null; } break; case stringStates.START_QUOTE_OR_CHAR: if (char === "\\") { state = stringStates.ESCAPE; buffer += char; index++; } else if (char === '"') { index++; return { type: types_js_1.JsonTokenTypes.STRING, value: buffer, decoded: decoded !== buffer ? decoded : null, line: line, index: index, column: column + index - startIndex, }; } else { buffer += char; decoded += char; index++; } break; case stringStates.ESCAPE: if (char in escapes) { buffer += char; index++; if (char === "u") { var hex = ""; for (var i = 0; i < 4; i++) { var curChar = source.charAt(index); if (curChar && isHex(curChar)) { hex += curChar; index++; } else { return null; } } buffer += hex; decoded += String.fromCodePoint(parseInt(hex, 16)); } else { if (char in symbolSubstitutes) { decoded += symbolSubstitutes[char]; } else { decoded += char; } } state = stringStates.START_QUOTE_OR_CHAR; } else { return null; } break; } } } function parseNumber(source, index, line, column) { var sourceLength = source.length; var startIndex = index; var passedValueIndex = index; var state = numberStates._START_; iterator: while (index < sourceLength) { var char = source.charAt(index); switch (state) { case numberStates._START_: if (char === "-") { state = numberStates.MINUS; } else if (char === "0") { passedValueIndex = index + 1; state = numberStates.ZERO; } else if (isDigit1to9(char)) { passedValueIndex = index + 1; state = numberStates.DIGIT; } else { return null; } break; case numberStates.MINUS: if (char === "0") { passedValueIndex = index + 1; state = numberStates.ZERO; } else if (isDigit1to9(char)) { passedValueIndex = index + 1; state = numberStates.DIGIT; } else { return null; } break; case numberStates.ZERO: if (char === ".") { state = numberStates.POINT; } else if (isExp(char)) { state = numberStates.EXP; } else { break iterator; } break; case numberStates.DIGIT: if (isDigit(char)) { passedValueIndex = index + 1; } else if (char === ".") { state = numberStates.POINT; } else if (isExp(char)) { state = numberStates.EXP; } else { break iterator; } break; case numberStates.POINT: if (isDigit(char)) { passedValueIndex = index + 1; state = numberStates.DIGIT_FRACTION; } else { break iterator; } break; case numberStates.DIGIT_FRACTION: if (isDigit(char)) { passedValueIndex = index + 1; } else if (isExp(char)) { state = numberStates.EXP; } else { break iterator; } break; case numberStates.EXP: if (char === "+" || char === "-") { state = numberStates.EXP_DIGIT_OR_SIGN; } else if (isDigit(char)) { passedValueIndex = index + 1; state = numberStates.EXP_DIGIT_OR_SIGN; } else { break iterator; } break; case numberStates.EXP_DIGIT_OR_SIGN: if (isDigit(char)) { passedValueIndex = index + 1; } else { break iterator; } break; } index++; } if (passedValueIndex > 0) { return { type: types_js_1.JsonTokenTypes.NUMBER, value: source.substring(startIndex, passedValueIndex), line: line, index: passedValueIndex, column: column + passedValueIndex - startIndex, }; } else { return null; } } var defaultSettings = { verbose: true, }; function tokenize(source, settings) { settings = Object.assign({}, defaultSettings, settings); var line = 1; var column = 1; var index = 0; var tokens = []; var sourceLength = source.length; while (index < sourceLength) { var whitespace = parseWhitespace(source, index, line, column); if (whitespace) { index = whitespace.index; line = whitespace.line; column = whitespace.column; continue; } var matched = parseComment(source, index, line, column) || parseChar(source, index, line, column) || parseKeyword(source, index, line, column) || parseIdentifier(source, index, line, column) || parseString(source, index, line, column) || parseNumber(source, index, line, column); if (matched) { var token = { type: matched.type, value: matched.value, decoded: matched.decoded }; if (settings.verbose) { token.position = new position_js_1.JsonPosition(line, column, index, matched.line, matched.column, matched.index); } tokens.push(token); index = matched.index; line = matched.line; column = matched.column; } else { (0, error_js_1.error)((0, tokenizeErrorTypes_js_1.cannotTokenizeSymbol)(source.charAt(index), line, column), source, line, column); } } return tokens; }