@creditkarma/thrift-parser
Version:
A parser for Thrift written in TypeScript
380 lines • 11.5 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.createScanner = void 0;
const debugger_1 = require("./debugger");
const factory_1 = require("./factory");
const keywords_1 = require("./keywords");
const types_1 = require("./types");
function isDigit(value) {
return value >= '0' && value <= '9';
}
function isAlpha(value) {
return (value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z');
}
// The first character of an Identifier can be a letter or underscore
function isAlphaOrUnderscore(value) {
return isAlpha(value) || value === '_';
}
function isValidIdentifier(value) {
return (isAlphaOrUnderscore(value) ||
isDigit(value) ||
value === '.' ||
value === '-');
}
function isHexDigit(value) {
return ((value >= '0' && value <= '9') ||
(value >= 'A' && value <= 'F') ||
(value >= 'a' && value <= 'f'));
}
function isWhiteSpace(char) {
switch (char) {
case ' ':
case '\r':
case '\t':
case '\n':
return true;
default:
return false;
}
}
class ScanError extends Error {
constructor(msg, loc) {
super(msg);
this.message = msg;
this.loc = loc;
}
}
function createScanner(src, report = debugger_1.noopReporter) {
const source = src;
const tokens = [];
let line = 1;
let column = 1;
let startLine = 1;
let startColumn = 1;
let startIndex = 0;
let currentIndex = 0;
function scan() {
while (!isAtEnd()) {
try {
startIndex = currentIndex;
startLine = line;
startColumn = column;
scanToken();
}
catch (e) {
report((0, factory_1.createScanError)(e.message, e.loc));
}
}
startIndex = currentIndex;
addToken(types_1.SyntaxType.EOF);
return tokens;
}
// Find the beginning of the next word to restart parse after error
function syncronize() {
while (!isAtEnd() && !isWhiteSpace(current())) {
advance();
}
}
function scanToken() {
const next = advance();
switch (next) {
case ' ':
case '\r':
case '\t':
// Ignore whitespace.
break;
case '\n':
nextLine();
break;
case '&':
// Thirft supports (undocumented by the grammar) a syntax for c-style pointers
// Pointers are indicated by the '&' token. As these are not relevant to JavaScript we
// drop them here. This may not be the best thing to do, perhaps should leave them in
// the parse tree and allow consumers to deal.
break;
case '=':
addToken(types_1.SyntaxType.EqualToken);
break;
case '(':
addToken(types_1.SyntaxType.LeftParenToken);
break;
case ')':
addToken(types_1.SyntaxType.RightParenToken);
break;
case '{':
addToken(types_1.SyntaxType.LeftBraceToken);
break;
case '}':
addToken(types_1.SyntaxType.RightBraceToken);
break;
case '[':
addToken(types_1.SyntaxType.LeftBracketToken);
break;
case ']':
addToken(types_1.SyntaxType.RightBracketToken);
break;
case ';':
addToken(types_1.SyntaxType.SemicolonToken);
break;
case ',':
addToken(types_1.SyntaxType.CommaToken);
break;
// Strings can use single or double quotes
case '"':
case "'":
string(next);
break;
case ':':
addToken(types_1.SyntaxType.ColonToken);
break;
case '#':
singleLineComment();
break;
case '/':
if (peek() === '/') {
singleLineComment();
}
else if (peek() === '*') {
multilineComment();
}
else {
reportError(`Unexpected token: ${next}`);
}
break;
case '<':
addToken(types_1.SyntaxType.LessThanToken);
break;
case '>':
addToken(types_1.SyntaxType.GreaterThanToken);
break;
case '-':
if (isDigit(peek())) {
number();
}
else {
addToken(types_1.SyntaxType.MinusToken);
}
break;
default:
if (isDigit(next)) {
number();
}
else if (isAlphaOrUnderscore(next)) {
identifier();
}
else if (isValidIdentifier(next)) {
reportError(`Invalid identifier '${next}': Identifiers must begin with a letter or underscore`);
}
else {
reportError(`Unexpected token: ${next}`);
}
}
}
function identifier() {
while (!isAtEnd() && peek() !== '\n' && isValidIdentifier(peek())) {
advance();
}
const literal = source.substring(startIndex, currentIndex);
const type = keywords_1.KEYWORDS[literal];
if (type == null) {
addToken(types_1.SyntaxType.Identifier, literal);
}
else {
addToken(type, literal);
}
}
function number() {
if (current() === '0' && (consume('x') || consume('X'))) {
hexadecimal();
}
else {
integer();
if (peek() === 'e' || peek() === 'E') {
enotation();
}
else if (peek() === '.' && isDigit(peekNext())) {
float();
}
else {
commitToken(types_1.SyntaxType.IntegerLiteral);
}
}
}
function hexadecimal() {
while (!isAtEnd() && peek() !== '\n' && isHexDigit(peek())) {
advance();
}
commitToken(types_1.SyntaxType.HexLiteral);
}
function enotation() {
consume('e') || consume('E');
consume('-') || consume('+');
if (isDigit(peek())) {
integer();
commitToken(types_1.SyntaxType.ExponentialLiteral);
}
else {
reportError(`Invalid use of e-notation`);
}
}
function float() {
consume('.');
integer();
if (peek() === 'e' || peek() === 'E') {
enotation();
}
else {
commitToken(types_1.SyntaxType.FloatLiteral);
}
}
function integer() {
while (!isAtEnd() && peek() !== '\n' && isDigit(peek())) {
advance();
}
}
function singleLineComment() {
let comment = '';
while (true) {
if (current() === '\n' ||
isAtEnd() ||
(current() !== '/' && current() !== '#' && current() !== ' ')) {
break;
}
else {
advance();
}
}
if (current() !== '\n') {
// A comment goes until the end of the line.
while (peek() !== '\n' && !isAtEnd()) {
comment += current();
advance();
}
comment += current();
}
addToken(types_1.SyntaxType.CommentLine, comment.trim());
}
function multilineComment() {
let comment = '';
let cursor = 0;
while (true) {
if (current() === '\n' ||
isAtEnd() ||
(current() !== '/' && current() !== '*' && current() !== ' ')) {
break;
}
else {
advance();
}
}
while (true) {
if (current() === '\n') {
nextLine();
}
if (comment.charAt(cursor - 1) === '\n' &&
(peek() === ' ' || peek() === '*')) {
/**
* We ignore stars and spaces after a new line to normalize comment formatting.
* We're only keeping the text of the comment without the extranious formatting.
*/
}
else {
comment += current();
cursor += 1;
}
advance();
// A comment goes until we find a comment terminator (*/).
if ((peek() === '*' && peekNext() === '/') || isAtEnd()) {
advance();
advance();
break;
}
}
addToken(types_1.SyntaxType.CommentBlock, comment.trim());
}
function string(terminator) {
while (!isAtEnd() && peek() !== terminator) {
if (peek() === '\n') {
nextLine();
}
if (peek() === '\\') {
advance();
}
advance();
}
if (isAtEnd() && previous() !== terminator) {
reportError(`String must be terminated with ${terminator}`);
}
else {
// advance past closing "
advance();
// We use "+ 1" and "- 1" to remove the quote markes from the string and unsescape escaped terminators
const literal = source
.substring(startIndex + 1, currentIndex - 1)
.replace(/\\(\"|\')/g, '$1');
addToken(types_1.SyntaxType.StringLiteral, literal);
}
}
function consume(text) {
if (peek() === text) {
advance();
return true;
}
return false;
}
function advance() {
currentIndex++;
column++;
return source.charAt(currentIndex - 1);
}
function previous() {
return source.charAt(currentIndex - 2);
}
function current() {
return source.charAt(currentIndex - 1);
}
function peek() {
return source.charAt(currentIndex);
}
function peekNext() {
return source.charAt(currentIndex + 1);
}
function nextLine() {
line++;
column = 1;
}
function commitToken(type) {
const literal = source.substring(startIndex, currentIndex);
addToken(type, literal);
}
function currentLocation() {
return {
start: {
line: startLine,
column: startColumn,
index: startIndex,
},
end: {
line,
column,
index: currentIndex,
},
};
}
function addToken(type, value = '') {
const loc = currentLocation();
tokens.push((0, factory_1.createToken)(type, value, loc));
}
function isAtEnd() {
return currentIndex >= source.length;
}
function reportError(msg) {
throw new ScanError(msg, currentLocation());
}
return {
scan,
syncronize,
};
}
exports.createScanner = createScanner;
//# sourceMappingURL=scanner.js.map