UNPKG

@creditkarma/thrift-parser

Version:

A parser for Thrift written in TypeScript

367 lines 11.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const debugger_1 = require("./debugger"); const factory_1 = require("./factory"); const keywords_1 = require("./keywords"); function isDigit(value) { return value >= '0' && value <= '9'; } function isAlpha(value) { return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z'); } // The first character of an Identifier can be a letter or underscore function isAlphaOrUnderscore(value) { return isAlpha(value) || value === '_'; } function isValidIdentifier(value) { return isAlphaOrUnderscore(value) || isDigit(value) || value === '.' || value === '-'; } function isHexDigit(value) { return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f'); } function isWhiteSpace(char) { switch (char) { case ' ': case '\r': case '\t': case '\n': return true; default: return false; } } class ScanError extends Error { constructor(msg, loc) { super(msg); this.message = msg; this.loc = loc; } } function createScanner(src, report = debugger_1.noopReporter) { const source = src; const tokens = []; let line = 1; let column = 1; let startLine = 1; let startColumn = 1; let startIndex = 0; let currentIndex = 0; function scan() { while (!isAtEnd()) { try { startIndex = currentIndex; startLine = line; startColumn = column; scanToken(); } catch (e) { report(factory_1.createScanError(e.message, e.loc)); } } startIndex = currentIndex; addToken("EOF" /* EOF */); return tokens; } // Find the beginning of the next word to restart parse after error function syncronize() { while (!isAtEnd() && !isWhiteSpace(current())) { advance(); } } function scanToken() { const next = advance(); switch (next) { case ' ': case '\r': case '\t': // Ignore whitespace. break; case '\n': nextLine(); break; case '&': // Thirft supports (undocumented by the grammar) a syntax for c-style pointers // Pointers are indicated by the '&' token. As these are not relevant to JavaScript we // drop them here. This may not be the best thing to do, perhaps should leave them in // the parse tree and allow consumers to deal. break; case '=': addToken("EqualToken" /* EqualToken */); break; case '(': addToken("LeftParenToken" /* LeftParenToken */); break; case ')': addToken("RightParenToken" /* RightParenToken */); break; case '{': addToken("LeftBraceToken" /* LeftBraceToken */); break; case '}': addToken("RightBraceToken" /* RightBraceToken */); break; case '[': addToken("LeftBracketToken" /* LeftBracketToken */); break; case ']': addToken("RightBracketToken" /* RightBracketToken */); break; case ';': addToken("SemicolonToken" /* SemicolonToken */); break; case ',': addToken("CommaToken" /* CommaToken */); break; // Strings can use single or double quotes case '"': case "'": string(); break; case ':': addToken("ColonToken" /* ColonToken */); break; case '#': singleLineComment(); break; case '/': if (peek() === '/') { singleLineComment(); } else if (peek() === '*') { multilineComment(); } else { reportError(`Unexpected token: ${next}`); } break; case '<': addToken("LessThanToken" /* LessThanToken */); break; case '>': addToken("GreaterThanToken" /* GreaterThanToken */); break; case '-': if (isDigit(peek())) { number(); } else { addToken("MinusToken" /* MinusToken */); } break; default: if (isDigit(next)) { number(); } else if (isAlphaOrUnderscore(next)) { identifier(); } else if (isValidIdentifier(next)) { reportError(`Invalid identifier '${next}': Identifiers must begin with a letter or underscore`); } else { reportError(`Unexpected token: ${next}`); } } } function identifier() { while (!isAtEnd() && peek() !== '\n' && isValidIdentifier(peek())) { advance(); } const literal = source.substring(startIndex, currentIndex); const type = keywords_1.KEYWORDS[literal]; if (type == null) { addToken("Identifier" /* Identifier */, literal); } else { addToken(type, literal); } } function number() { if (current() === '0' && (consume('x') || consume('X'))) { hexadecimal(); } else { integer(); if (peek() === 'e' || peek() === 'E') { enotation(); } else if (peek() === '.' && isDigit(peekNext())) { float(); } else { commitToken("IntegerLiteral" /* IntegerLiteral */); } } } function hexadecimal() { while (!isAtEnd() && peek() !== '\n' && isHexDigit(peek())) { advance(); } commitToken("HexLiteral" /* HexLiteral */); } function enotation() { consume('e') || consume('E'); consume('-') || consume('+'); if (isDigit(peek())) { integer(); commitToken("ExponentialLiteral" /* ExponentialLiteral */); } else { reportError(`Invalid use of e-notation`); } } function float() { consume('.'); integer(); if (peek() === 'e' || peek() === 'E') { enotation(); } else { commitToken("FloatLiteral" /* FloatLiteral */); } } function integer() { while (!isAtEnd() && peek() !== '\n' && isDigit(peek())) { advance(); } } function singleLineComment() { let comment = ''; while (true) { if (current() === '\n' || isAtEnd() || (current() !== '/' && current() !== '#' && current() !== ' ')) { break; } else { advance(); } } if (current() !== '\n') { // A comment goes until the end of the line. while (peek() !== '\n' && !isAtEnd()) { comment += current(); advance(); } comment += current(); } addToken("CommentLine" /* CommentLine */, comment.trim()); } function multilineComment() { let comment = ''; let cursor = 0; while (true) { if (current() === '\n' || isAtEnd() || (current() !== '/' && current() !== '*' && current() !== ' ')) { break; } else { advance(); } } while (true) { if (current() === '\n') { nextLine(); } if (comment.charAt(cursor - 1) === '\n' && (peek() === ' ' || peek() === '*')) { /** * We ignore stars and spaces after a new line to normalize comment formatting. * We're only keeping the text of the comment without the extranious formatting. */ } else { comment += current(); cursor += 1; } advance(); // A comment goes until we find a comment terminator (*/). if ((peek() === '*' && peekNext() === '/') || isAtEnd()) { advance(); advance(); break; } } addToken("CommentBlock" /* CommentBlock */, comment.trim()); } function string() { while (!isAtEnd() && peek() !== '"' && peek() !== "'") { if (peek() === '\n') { nextLine(); } advance(); } if (isAtEnd() && previous() !== '"') { reportError(`Strings must be terminated with '"'`); } else { // advance past closing " advance(); // We use "+ 1" and "- 1" to remove the quote markes from the string const literal = source.substring(startIndex + 1, currentIndex - 1); addToken("StringLiteral" /* StringLiteral */, literal); } } function consume(text) { if (peek() === text) { advance(); return true; } return false; } function advance() { currentIndex++; column++; return source.charAt(currentIndex - 1); } function previous() { return source.charAt(currentIndex - 2); } function current() { return source.charAt(currentIndex - 1); } function peek() { return source.charAt(currentIndex); } function peekNext() { return source.charAt(currentIndex + 1); } function nextLine() { line++; column = 1; } function commitToken(type) { const literal = source.substring(startIndex, currentIndex); addToken(type, literal); } function currentLocation() { return { start: { line: startLine, column: startColumn, index: startIndex, }, end: { line, column, index: currentIndex, }, }; } function addToken(type, value = '') { const loc = currentLocation(); tokens.push(factory_1.createToken(type, value, loc)); } function isAtEnd() { return currentIndex >= source.length; } function reportError(msg) { throw new ScanError(msg, currentLocation()); } return { scan, syncronize, }; } exports.createScanner = createScanner; //# sourceMappingURL=scanner.js.map