UNPKG

ebnf-railroad-visualizer

Version:
151 lines 4.91 kB
/* * This work © 2024 by Alexander Voglsperger is licensed under CC BY 4.0. * To view a copy of this license, see the provided LICENSE file or visit https://creativecommons.org/licenses/by/4.0/ */ import { Token, Kind } from './Token.js'; const LF = "\n"; export class Scanner { constructor(input) { this.input = input; this.pos = 0; this.ch = ""; this.nextChar(); this.isLiteral = false; this.line = 1; this.column = 1; } next() { if (this.pos > this.input.length) { // Return eof token if end of input is reached return new Token(Kind.eof); } const token = new Token(Kind.unknown); // Special handling for literals, where almost every character is valid when under double quotes if (this.isLiteral && !Scanner.isQuote(this.ch)) { let chars = ""; // letter { letter } while (this.hasNext() && !Scanner.isQuote(this.ch)) { // Add until the next '"' is found if (this.ch === " ") { // Make space explicitly visible chars += "␣"; } else { chars += this.ch; } this.nextChar(); } token.kind = Kind.literal; token.str = chars; return token; } // Skip whitespace characters while (this.pos <= this.input.length && this.isWhitespace(this.ch)) { this.nextChar(); } if (this.pos > this.input.length) { // Return eof token if end of input is reached return new Token(Kind.eof); } // Perform quote check before switch as a helper function is used if (Scanner.isQuote(this.ch)) { token.kind = Kind.quote; this.nextChar(); this.isLiteral = !this.isLiteral; return token; } switch (this.ch) { case "(": token.kind = Kind.lpar; this.nextChar(); break; case ")": token.kind = Kind.rpar; this.nextChar(); break; case "[": token.kind = Kind.lbrack; this.nextChar(); break; case "]": token.kind = Kind.rbrack; this.nextChar(); break; case "{": token.kind = Kind.lbrace; this.nextChar(); break; case "}": token.kind = Kind.rbrace; this.nextChar(); break; case ".": token.kind = Kind.period; this.nextChar(); break; case "|": token.kind = Kind.pipe; this.nextChar(); break; case "=": token.kind = Kind.assign; this.nextChar(); break; default: if (/[a-zA-Z]/.test(this.ch)) { let chars = ""; // letter { letter } while (this.hasNext() && /[a-zA-Z]/.test(this.ch)) { chars += this.ch; this.nextChar(); } token.kind = Kind.ident; token.str = chars; } else { throw new Error(`(line ${this.line}, column ${this.column}) - Unknown character '${this.ch}'`); } break; } return token; } /** * Get the next character from the input and skip LF symbol */ nextChar() { this.ch = this.input[this.pos++]; this.column++; if (this.ch === LF) { this.line++; this.column = 1; // Skip LF symbol this.ch = this.input[this.pos++]; } } /** * Check if there are more characters to read * @returns true if there are more characters to read, otherwise false */ hasNext() { return this.pos < this.input.length; } isWhitespace(ch) { return !ch.replace(/\s/g, '').length; } /** * Returns the position of the scanner. * @returns the line and column of the scanner */ getPosition() { return [this.line, this.column]; } /** * Checks if char is some type of quote character. * * Because iOS (and probably other OS) uses different quotes. * @param char The character to check. * @returns `true` if the character is a quote character, `false` otherwise. */ static isQuote(char) { return char === '"' || char === '„' || char === '“'; } } //# sourceMappingURL=Scanner.js.map