UNPKG

mdsf

Version:

Metarhia Data Serialization Format

697 lines (582 loc) 16.3 kB
'use strict'; const stringify = require('./stringify'); // Deserialize a string into a JavaScript value and return it. // data - a string or Buffer to parse // const parse = data => { if (Buffer.isBuffer(data)) { data = data.toString(); } const parser = new Parser(data); return parser.parse(); }; // Parse a buffer of JSTP network messages. // data - buffer contents // messages - target array // Returns the part of the message that has not been received yet // const parseJSTPMessages = (data, messages) => { const chunks = data.split('\u0000'); const readyMessagesCount = chunks.length - 1; for (let i = 0; i < readyMessagesCount; i++) { const parser = new Parser(chunks[i]); const message = parser.parseObject(); parser.ensureEndOfData(); messages.push(message); } return chunks[readyMessagesCount]; }; // Internal parser class // string - a string to parse // function Parser(string) { this.string = string; this.lookaheadIndex = 0; } // Start parsing // Parser.prototype.parse = function() { const value = this.parseValue(); this.ensureEndOfData(); return value; }; // Check that there's no data left except for comments and whitespace and throw // an error otherwise // Parser.prototype.ensureEndOfData = function() { this.skipClutter(); if (this.lookaheadIndex < this.string.length) { this.throwUnexpected(); } }; // Return the current lookahead character // Parser.prototype.lookahead = function() { return this.string[this.lookaheadIndex]; }; // Advance to the next character and return the character // that used to be lookahead // Parser.prototype.advance = function() { const character = this.string[this.lookaheadIndex++]; if (this.lookaheadIndex > this.string.length) { throw new SyntaxError('Unexpected end of data'); } return character; }; // Step back to the previous character // Parser.prototype.retreat = function() { this.lookaheadIndex--; if (this.lookaheadIndex < 0) { throw new SyntaxError('Parse error'); } }; // Advances one character ensuring the equality of the passed // one to the given one // token - a character to match // Parser.prototype.match = function(token) { if (this.advance() !== token) { this.throwExpected(token); } }; // Throw a generic parsing error // message - error message // Parser.prototype.throwError = function(message) { throw new SyntaxError(message + ' at position ' + this.lookaheadIndex); }; // Throw a 'smth expected' error // token - what has been expected // Parser.prototype.throwExpected = function(token) { this.throwError(token + ' expected'); }; // Throw a 'smth unexpected' error // token - what has been unexpected // Parser.prototype.throwUnexpected = function(token) { token = token || this.lookahead(); if (token === undefined) { token = 'end of data'; } this.throwError('Unexpected ' + token); }; // Check if a given character is a whitespace character // character - a character to check // Parser.prototype.isWhitespace = function(character) { return ' \f\n\r\t\v'.includes(character); }; // Check if a given character is a newline character // character - a character to check // Parser.prototype.isNewline = function(character) { return character === '\n' || character === '\r'; }; // Check if a given character is a lowercase letter character // character - a character to check // Parser.prototype.isLetter = function(character) { return character >= 'a' && character <= 'z'; }; // Check if a given character can be the first character in a number // character - a character to check // Parser.prototype.isInitialDigit = function(character) { return ( this.isDecimalDigit(character) || character === '+' || character === '-' || character === '.' ); }; // Check if a given character is one of that are allowed in numbers // character - a character to check // Parser.prototype.isDigit = function(character) { return ( this.isInitialDigit(character) || character === 'e' || character === 'E' ); }; // Check if a given character is a binary digit // character - a character to check // Parser.prototype.isBinaryDigit = function(character) { return character === '0' || character === '1'; }; // Check if a given character is an octal digit // character - a character to check // Parser.prototype.isOctalDigit = function(character) { return character >= '0' && character <= '7'; }; // Check if a given character is a decimal digit // character - a character to check // Parser.prototype.isDecimalDigit = function(character) { return character >= '0' && character <= '9'; }; // Check if a given character is a hexadecimal digit // character - a character to check // Parser.prototype.isHexadecimalDigit = function(character) { return ( (character >= '0' && character <= '9') || (character >= 'a' && character <= 'f') || (character >= 'A' && character <= 'F') ); }; // Check if a given character is a quote character // character - a character to check // Parser.prototype.isQuoteCharacter = function(character) { return character === "'" || character === '"'; }; // Check if a given character can be the first character of an identifier // character - a character to check // Parser.prototype.isInitialIdentifierCharacter = function(character) { return ( (character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z') || (character === '_' || character === '$') ); }; // Check if a given character can be a part of an identifier // character - a character to check // Parser.prototype.isIdentifierCharacter = function(character) { return ( this.isInitialIdentifierCharacter(character) || this.isDecimalDigit(character) ); }; // Skip whitespace and comments // Parser.prototype.skipClutter = function() { this.skipWhitespace(); this.skipComments(); this.skipWhitespace(); }; // Skip whitespace // Parser.prototype.skipWhitespace = function() { while (this.isWhitespace(this.lookahead())) { this.advance(); } }; // Skip comments // Parser.prototype.skipComments = function() { if (this.lookahead() !== '/') { return; } this.advance(); if (this.lookahead() === '/') { this.advance(); this.skipLineCommentBody(); } else if (this.lookahead() === '*') { this.advance(); this.skipMultilineCommentBody(); } else { this.retreat(); } }; // Skip the body of a single-line comment (i.e. the part after //) // Parser.prototype.skipLineCommentBody = function() { let ahead = this.lookahead(); while (!this.isNewline(ahead) && ahead !== undefined) { this.advance(); ahead = this.lookahead(); } }; // Skip the body of a multiline comment (i.e. the part after /*) // Parser.prototype.skipMultilineCommentBody = function() { let done = false; while (!done) { while (this.lookahead() !== '*') { this.advance(); } this.advance(); if (this.lookahead() === '/') { this.advance(); done = true; } } }; // Parse a JavaScript value // Parser.prototype.parseValue = function() { this.skipClutter(); const look = this.lookahead(); if (this.isInitialDigit(look)) { return this.parseNumber(); } else if (this.isLetter(look)) { return this.parseIdentifier(); } else if (this.isQuoteCharacter(look)) { return this.parseString(); } else if (look === '[') { return this.parseArray(); } else if (look === '{') { return this.parseObject(); } else { return this.throwUnexpected(); } }; // Parse a number // Parser.prototype.parseNumber = function() { this.skipClutter(); let negateResult = false; let look = this.lookahead(); if (look === '+' || look === '-') { negateResult = look === '-'; this.advance(); } let base = 10; if (this.lookahead() === '0') { this.advance(); look = this.lookahead(); if (this.isDecimalDigit(look)) { this.throwError('Use new octal literals syntax'); } else if (look === 'b') { base = 2; this.advance(); } else if (look === 'o') { base = 8; this.advance(); } else if (look === 'x') { base = 16; this.advance(); } else { this.retreat(); } } const value = base === 10 ? this.parseDecimal() : this.parseMachineInteger(base); if (isNaN(value)) { this.throwError('Invalid number format'); } return negateResult ? -value : value; }; // Parse a decimal number, either integer or float. // May return NaN when the number is incorrect. // Parser.prototype.parseDecimal = function() { let number = ''; const encountered = { // parseFloat ignores unparsed part of dot: false, // the string so we must handle it manually sign: false, exponent: false, }; while (this.isDigit(this.lookahead())) { this.checkNumberPartEncountered(encountered, 'dot', '.'); this.checkNumberPartEncountered(encountered, 'sign', '+', '-'); this.checkNumberPartEncountered(encountered, 'exponent', 'e', 'E'); number += this.advance(); } return parseFloat(number); }; // Check whether a part of a decimal number that can be used only once // has been encountered before and throw SyntaxError if it indeed has. // encounterContext - an object with boolean flags // name - name of the key of the object which value represents the // entitity being checked // character - a character to match against the lookahead character // altCharacter - optional alternative character related to the // same enitity // Parser.prototype.checkNumberPartEncountered = function( encounterContext, name, character, altCharacter ) { const look = this.lookahead(); if (look === character || (altCharacter && look === altCharacter)) { if (encounterContext[name]) { this.throwUnexpected(); } encounterContext[name] = true; } }; // Parse an integer number in binary, octal or hexadecimal representation. // The prefix is not included and must be parsed before invocation. // The function may return NaN when the number is incorrect. // base - 2, 8 or 16 // Parser.prototype.parseMachineInteger = function(base) { const self = this; const checkers = { 2: self.isBinaryDigit, 8: self.isOctalDigit, 16: self.isHexadecimalDigit, }; const checkDigit = checkers[base]; let number = ''; while (checkDigit(this.lookahead())) { number += this.advance(); } return parseInt(number, base); }; // Parse null, undefined, true or false // Parser.prototype.parseIdentifier = function() { this.skipClutter(); let identifier = ''; while (this.isLetter(this.lookahead())) { identifier += this.advance(); } const matching = { undefined, null: null, true: true, false: false, }; if (matching.hasOwnProperty(identifier)) { return matching[identifier]; } else { return this.throwUnexpected(); } }; // Parse a single-quoted or double-quoted string // Parser.prototype.parseString = function() { this.skipClutter(); const quoteStyle = this.lookahead(); if (!this.isQuoteCharacter(quoteStyle)) { this.throwExpected('String'); } let string = ''; let escapeMode = false; this.advance(); while (escapeMode || this.lookahead() !== quoteStyle) { const look = this.advance(); if (escapeMode) { const controlCharacters = { b: '\b', f: '\f', n: '\n', r: '\r', t: '\t', v: '\v', }; const controlCharacter = controlCharacters[look]; if (controlCharacter) { string += controlCharacter; } else if (this.isOctalDigit(look)) { this.retreat(); string += this.parseOctalEncodedStringCharacter(); } else if (look === 'x') { this.retreat(); string += this.parseOneByteHexEncodedCharacter(); } else if (look === 'u') { this.retreat(); string += this.parseUnicodeCharacter(); } else { string += look; } escapeMode = false; } else if (look === '\\') { escapeMode = true; } else { string += look; } } this.match(quoteStyle); return string; }; // Parse the part of an octal escape sequence after backslash and // return the corresponding character // Parser.prototype.parseOctalEncodedStringCharacter = function() { let digits = ''; for (let count = 0; count < 3; count++) { const look = this.advance(); if (!this.isOctalDigit(look)) { this.retreat(); break; } digits += look; } if (digits.length === 0) { this.throwExpected('Octal number'); } const code = parseInt(digits, 8); return String.fromCharCode(code); }; // Parse the part of a hexadecimal escape sequence after backslash and `x` or // `u` character and return the character corresponding to that code // nibblesCount - count of half-bytes in the escape sequence // Parser.prototype.parseHexEncodedStringCharacter = function(nibblesCount) { let digits = ''; for (let i = 0; i < nibblesCount; i++) { const look = this.advance(); if (!this.isHexadecimalDigit(look)) { this.throwExpected('Hexadecimal digit'); } digits += look; } const code = parseInt(digits, 16); return String.fromCharCode(code); }; // Parse the part of a one-byte hexadecimal escape sequence after backslash // and return the corresponding character // Parser.prototype.parseOneByteHexEncodedCharacter = function() { this.match('x'); return this.parseHexEncodedStringCharacter(2); }; // Parse the part of a two-byte hexadecimal escape sequence after backslash // and return the corresponding character // Parser.prototype.parseTwoByteHexEncodedCharacter = function() { this.match('u'); return this.parseHexEncodedStringCharacter(4); }; // Parse a Unicode escape sequence after backslash and return the // corresponding character // Parser.prototype.parseUnicodeCharacter = function() { this.match('u'); if (this.lookahead() === '{') { return this.parseEs6UnicodeLiteral(); } else { this.retreat(); return this.parseTwoByteHexEncodedCharacter(); } }; // Parse an ES2015 multibyte Unicode escape sequence after backslash and `u` // character // Parser.prototype.parseEs6UnicodeLiteral = function() { this.match('{'); let hexDigits = ''; while (this.lookahead() !== '}') { const digit = this.advance(); if (!this.isHexadecimalDigit(digit)) { this.throwExpected('Hexadecimal digit'); } hexDigits += digit; } if (hexDigits.length === 0 || hexDigits.length > 8) { this.throwError('Invalid code point'); } this.match('}'); const code = parseInt(hexDigits, 16); return String.fromCodePoint(code); }; // Parse an array // Parser.prototype.parseArray = function() { this.skipClutter(); this.match('['); const array = []; while (this.lookahead() !== ']') { this.skipClutter(); if (this.lookahead() === ',') { array.push(undefined); } else if (this.lookahead() === ']') { break; } else { const value = this.parseValue(); array.push(value); } this.skipClutter(); if (this.lookahead() !== ']') { this.match(','); } } this.match(']'); return array; }; // Parse an object // Parser.prototype.parseObject = function() { this.skipClutter(); const object = {}; this.match('{'); while (this.lookahead() !== '}') { const key = this.parseObjectKey(); this.match(':'); const value = this.parseValue(); if (value !== undefined) { object[key] = value; } this.skipClutter(); if (this.lookahead() !== '}') { this.match(','); this.skipClutter(); } } this.skipClutter(); this.match('}'); return object; }; // Parse a key of an object // Parser.prototype.parseObjectKey = function() { this.skipClutter(); if (this.isQuoteCharacter(this.lookahead())) { return this.parseString(); } if (!this.isInitialIdentifierCharacter(this.lookahead())) { this.throwExpected('String or identifier'); } let key = ''; while (this.isIdentifierCharacter(this.lookahead())) { key += this.advance(); } return key; }; module.exports = { stringify, parse, parseJSTPMessages, };