@creditkarma/thrift-parser
Version:
A parser for Thrift written in TypeScript
367 lines • 11.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const debugger_1 = require("./debugger");
const factory_1 = require("./factory");
const keywords_1 = require("./keywords");
function isDigit(value) {
return value >= '0' && value <= '9';
}
function isAlpha(value) {
return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z');
}
// The first character of an Identifier can be a letter or underscore
function isAlphaOrUnderscore(value) {
return isAlpha(value) || value === '_';
}
function isValidIdentifier(value) {
return isAlphaOrUnderscore(value) || isDigit(value) || value === '.' || value === '-';
}
function isHexDigit(value) {
return (value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || (value >= 'a' && value <= 'f');
}
function isWhiteSpace(char) {
switch (char) {
case ' ':
case '\r':
case '\t':
case '\n':
return true;
default:
return false;
}
}
class ScanError extends Error {
constructor(msg, loc) {
super(msg);
this.message = msg;
this.loc = loc;
}
}
function createScanner(src, report = debugger_1.noopReporter) {
const source = src;
const tokens = [];
let line = 1;
let column = 1;
let startLine = 1;
let startColumn = 1;
let startIndex = 0;
let currentIndex = 0;
function scan() {
while (!isAtEnd()) {
try {
startIndex = currentIndex;
startLine = line;
startColumn = column;
scanToken();
}
catch (e) {
report(factory_1.createScanError(e.message, e.loc));
}
}
startIndex = currentIndex;
addToken("EOF" /* EOF */);
return tokens;
}
// Find the beginning of the next word to restart parse after error
function syncronize() {
while (!isAtEnd() && !isWhiteSpace(current())) {
advance();
}
}
function scanToken() {
const next = advance();
switch (next) {
case ' ':
case '\r':
case '\t':
// Ignore whitespace.
break;
case '\n':
nextLine();
break;
case '&':
// Thirft supports (undocumented by the grammar) a syntax for c-style pointers
// Pointers are indicated by the '&' token. As these are not relevant to JavaScript we
// drop them here. This may not be the best thing to do, perhaps should leave them in
// the parse tree and allow consumers to deal.
break;
case '=':
addToken("EqualToken" /* EqualToken */);
break;
case '(':
addToken("LeftParenToken" /* LeftParenToken */);
break;
case ')':
addToken("RightParenToken" /* RightParenToken */);
break;
case '{':
addToken("LeftBraceToken" /* LeftBraceToken */);
break;
case '}':
addToken("RightBraceToken" /* RightBraceToken */);
break;
case '[':
addToken("LeftBracketToken" /* LeftBracketToken */);
break;
case ']':
addToken("RightBracketToken" /* RightBracketToken */);
break;
case ';':
addToken("SemicolonToken" /* SemicolonToken */);
break;
case ',':
addToken("CommaToken" /* CommaToken */);
break;
// Strings can use single or double quotes
case '"':
case "'":
string();
break;
case ':':
addToken("ColonToken" /* ColonToken */);
break;
case '#':
singleLineComment();
break;
case '/':
if (peek() === '/') {
singleLineComment();
}
else if (peek() === '*') {
multilineComment();
}
else {
reportError(`Unexpected token: ${next}`);
}
break;
case '<':
addToken("LessThanToken" /* LessThanToken */);
break;
case '>':
addToken("GreaterThanToken" /* GreaterThanToken */);
break;
case '-':
if (isDigit(peek())) {
number();
}
else {
addToken("MinusToken" /* MinusToken */);
}
break;
default:
if (isDigit(next)) {
number();
}
else if (isAlphaOrUnderscore(next)) {
identifier();
}
else if (isValidIdentifier(next)) {
reportError(`Invalid identifier '${next}': Identifiers must begin with a letter or underscore`);
}
else {
reportError(`Unexpected token: ${next}`);
}
}
}
function identifier() {
while (!isAtEnd() && peek() !== '\n' && isValidIdentifier(peek())) {
advance();
}
const literal = source.substring(startIndex, currentIndex);
const type = keywords_1.KEYWORDS[literal];
if (type == null) {
addToken("Identifier" /* Identifier */, literal);
}
else {
addToken(type, literal);
}
}
function number() {
if (current() === '0' && (consume('x') || consume('X'))) {
hexadecimal();
}
else {
integer();
if (peek() === 'e' || peek() === 'E') {
enotation();
}
else if (peek() === '.' && isDigit(peekNext())) {
float();
}
else {
commitToken("IntegerLiteral" /* IntegerLiteral */);
}
}
}
function hexadecimal() {
while (!isAtEnd() && peek() !== '\n' && isHexDigit(peek())) {
advance();
}
commitToken("HexLiteral" /* HexLiteral */);
}
function enotation() {
consume('e') || consume('E');
consume('-') || consume('+');
if (isDigit(peek())) {
integer();
commitToken("ExponentialLiteral" /* ExponentialLiteral */);
}
else {
reportError(`Invalid use of e-notation`);
}
}
function float() {
consume('.');
integer();
if (peek() === 'e' || peek() === 'E') {
enotation();
}
else {
commitToken("FloatLiteral" /* FloatLiteral */);
}
}
function integer() {
while (!isAtEnd() && peek() !== '\n' && isDigit(peek())) {
advance();
}
}
function singleLineComment() {
let comment = '';
while (true) {
if (current() === '\n' ||
isAtEnd() ||
(current() !== '/' && current() !== '#' && current() !== ' ')) {
break;
}
else {
advance();
}
}
if (current() !== '\n') {
// A comment goes until the end of the line.
while (peek() !== '\n' && !isAtEnd()) {
comment += current();
advance();
}
comment += current();
}
addToken("CommentLine" /* CommentLine */, comment.trim());
}
function multilineComment() {
let comment = '';
let cursor = 0;
while (true) {
if (current() === '\n' ||
isAtEnd() ||
(current() !== '/' && current() !== '*' && current() !== ' ')) {
break;
}
else {
advance();
}
}
while (true) {
if (current() === '\n') {
nextLine();
}
if (comment.charAt(cursor - 1) === '\n' && (peek() === ' ' || peek() === '*')) {
/**
* We ignore stars and spaces after a new line to normalize comment formatting.
* We're only keeping the text of the comment without the extranious formatting.
*/
}
else {
comment += current();
cursor += 1;
}
advance();
// A comment goes until we find a comment terminator (*/).
if ((peek() === '*' && peekNext() === '/') || isAtEnd()) {
advance();
advance();
break;
}
}
addToken("CommentBlock" /* CommentBlock */, comment.trim());
}
function string() {
while (!isAtEnd() && peek() !== '"' && peek() !== "'") {
if (peek() === '\n') {
nextLine();
}
advance();
}
if (isAtEnd() && previous() !== '"') {
reportError(`Strings must be terminated with '"'`);
}
else {
// advance past closing "
advance();
// We use "+ 1" and "- 1" to remove the quote markes from the string
const literal = source.substring(startIndex + 1, currentIndex - 1);
addToken("StringLiteral" /* StringLiteral */, literal);
}
}
function consume(text) {
if (peek() === text) {
advance();
return true;
}
return false;
}
function advance() {
currentIndex++;
column++;
return source.charAt(currentIndex - 1);
}
function previous() {
return source.charAt(currentIndex - 2);
}
function current() {
return source.charAt(currentIndex - 1);
}
function peek() {
return source.charAt(currentIndex);
}
function peekNext() {
return source.charAt(currentIndex + 1);
}
function nextLine() {
line++;
column = 1;
}
function commitToken(type) {
const literal = source.substring(startIndex, currentIndex);
addToken(type, literal);
}
function currentLocation() {
return {
start: {
line: startLine,
column: startColumn,
index: startIndex,
},
end: {
line,
column,
index: currentIndex,
},
};
}
function addToken(type, value = '') {
const loc = currentLocation();
tokens.push(factory_1.createToken(type, value, loc));
}
function isAtEnd() {
return currentIndex >= source.length;
}
function reportError(msg) {
throw new ScanError(msg, currentLocation());
}
return {
scan,
syncronize,
};
}
exports.createScanner = createScanner;
//# sourceMappingURL=scanner.js.map