UNPKG

@creditkarma/thrift-parser

Version:

A parser for Thrift written in TypeScript

380 lines 11.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.createScanner = void 0; const debugger_1 = require("./debugger"); const factory_1 = require("./factory"); const keywords_1 = require("./keywords"); const types_1 = require("./types"); function isDigit(value) { return value >= '0' && value <= '9'; } function isAlpha(value) { return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z'); } // The first character of an Identifier can be a letter or underscore function isAlphaOrUnderscore(value) { return isAlpha(value) || value === '_'; } function isValidIdentifier(value) { return (isAlphaOrUnderscore(value) || isDigit(value) || value === '.' || value === '-'); } function isHexDigit(value) { return ((value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f')); } function isWhiteSpace(char) { switch (char) { case ' ': case '\r': case '\t': case '\n': return true; default: return false; } } class ScanError extends Error { constructor(msg, loc) { super(msg); this.message = msg; this.loc = loc; } } function createScanner(src, report = debugger_1.noopReporter) { const source = src; const tokens = []; let line = 1; let column = 1; let startLine = 1; let startColumn = 1; let startIndex = 0; let currentIndex = 0; function scan() { while (!isAtEnd()) { try { startIndex = currentIndex; startLine = line; startColumn = column; scanToken(); } catch (e) { report((0, factory_1.createScanError)(e.message, e.loc)); } } startIndex = currentIndex; addToken(types_1.SyntaxType.EOF); return tokens; } // Find the beginning of the next word to restart parse after error function syncronize() { while (!isAtEnd() && !isWhiteSpace(current())) { advance(); } } function scanToken() { const next = advance(); switch (next) { case ' ': case '\r': case '\t': // Ignore whitespace. break; case '\n': nextLine(); break; case '&': // Thirft supports (undocumented by the grammar) a syntax for c-style pointers // Pointers are indicated by the '&' token. As these are not relevant to JavaScript we // drop them here. This may not be the best thing to do, perhaps should leave them in // the parse tree and allow consumers to deal. break; case '=': addToken(types_1.SyntaxType.EqualToken); break; case '(': addToken(types_1.SyntaxType.LeftParenToken); break; case ')': addToken(types_1.SyntaxType.RightParenToken); break; case '{': addToken(types_1.SyntaxType.LeftBraceToken); break; case '}': addToken(types_1.SyntaxType.RightBraceToken); break; case '[': addToken(types_1.SyntaxType.LeftBracketToken); break; case ']': addToken(types_1.SyntaxType.RightBracketToken); break; case ';': addToken(types_1.SyntaxType.SemicolonToken); break; case ',': addToken(types_1.SyntaxType.CommaToken); break; // Strings can use single or double quotes case '"': case "'": string(next); break; case ':': addToken(types_1.SyntaxType.ColonToken); break; case '#': singleLineComment(); break; case '/': if (peek() === '/') { singleLineComment(); } else if (peek() === '*') { multilineComment(); } else { reportError(`Unexpected token: ${next}`); } break; case '<': addToken(types_1.SyntaxType.LessThanToken); break; case '>': addToken(types_1.SyntaxType.GreaterThanToken); break; case '-': if (isDigit(peek())) { number(); } else { addToken(types_1.SyntaxType.MinusToken); } break; default: if (isDigit(next)) { number(); } else if (isAlphaOrUnderscore(next)) { identifier(); } else if (isValidIdentifier(next)) { reportError(`Invalid identifier '${next}': Identifiers must begin with a letter or underscore`); } else { reportError(`Unexpected token: ${next}`); } } } function identifier() { while (!isAtEnd() && peek() !== '\n' && isValidIdentifier(peek())) { advance(); } const literal = source.substring(startIndex, currentIndex); const type = keywords_1.KEYWORDS[literal]; if (type == null) { addToken(types_1.SyntaxType.Identifier, literal); } else { addToken(type, literal); } } function number() { if (current() === '0' && (consume('x') || consume('X'))) { hexadecimal(); } else { integer(); if (peek() === 'e' || peek() === 'E') { enotation(); } else if (peek() === '.' && isDigit(peekNext())) { float(); } else { commitToken(types_1.SyntaxType.IntegerLiteral); } } } function hexadecimal() { while (!isAtEnd() && peek() !== '\n' && isHexDigit(peek())) { advance(); } commitToken(types_1.SyntaxType.HexLiteral); } function enotation() { consume('e') || consume('E'); consume('-') || consume('+'); if (isDigit(peek())) { integer(); commitToken(types_1.SyntaxType.ExponentialLiteral); } else { reportError(`Invalid use of e-notation`); } } function float() { consume('.'); integer(); if (peek() === 'e' || peek() === 'E') { enotation(); } else { commitToken(types_1.SyntaxType.FloatLiteral); } } function integer() { while (!isAtEnd() && peek() !== '\n' && isDigit(peek())) { advance(); } } function singleLineComment() { let comment = ''; while (true) { if (current() === '\n' || isAtEnd() || (current() !== '/' && current() !== '#' && current() !== ' ')) { break; } else { advance(); } } if (current() !== '\n') { // A comment goes until the end of the line. while (peek() !== '\n' && !isAtEnd()) { comment += current(); advance(); } comment += current(); } addToken(types_1.SyntaxType.CommentLine, comment.trim()); } function multilineComment() { let comment = ''; let cursor = 0; while (true) { if (current() === '\n' || isAtEnd() || (current() !== '/' && current() !== '*' && current() !== ' ')) { break; } else { advance(); } } while (true) { if (current() === '\n') { nextLine(); } if (comment.charAt(cursor - 1) === '\n' && (peek() === ' ' || peek() === '*')) { /** * We ignore stars and spaces after a new line to normalize comment formatting. * We're only keeping the text of the comment without the extranious formatting. */ } else { comment += current(); cursor += 1; } advance(); // A comment goes until we find a comment terminator (*/). if ((peek() === '*' && peekNext() === '/') || isAtEnd()) { advance(); advance(); break; } } addToken(types_1.SyntaxType.CommentBlock, comment.trim()); } function string(terminator) { while (!isAtEnd() && peek() !== terminator) { if (peek() === '\n') { nextLine(); } if (peek() === '\\') { advance(); } advance(); } if (isAtEnd() && previous() !== terminator) { reportError(`String must be terminated with ${terminator}`); } else { // advance past closing " advance(); // We use "+ 1" and "- 1" to remove the quote markes from the string and unsescape escaped terminators const literal = source .substring(startIndex + 1, currentIndex - 1) .replace(/\\(\"|\')/g, '$1'); addToken(types_1.SyntaxType.StringLiteral, literal); } } function consume(text) { if (peek() === text) { advance(); return true; } return false; } function advance() { currentIndex++; column++; return source.charAt(currentIndex - 1); } function previous() { return source.charAt(currentIndex - 2); } function current() { return source.charAt(currentIndex - 1); } function peek() { return source.charAt(currentIndex); } function peekNext() { return source.charAt(currentIndex + 1); } function nextLine() { line++; column = 1; } function commitToken(type) { const literal = source.substring(startIndex, currentIndex); addToken(type, literal); } function currentLocation() { return { start: { line: startLine, column: startColumn, index: startIndex, }, end: { line, column, index: currentIndex, }, }; } function addToken(type, value = '') { const loc = currentLocation(); tokens.push((0, factory_1.createToken)(type, value, loc)); } function isAtEnd() { return currentIndex >= source.length; } function reportError(msg) { throw new ScanError(msg, currentLocation()); } return { scan, syncronize, }; } exports.createScanner = createScanner; //# sourceMappingURL=scanner.js.map