UNPKG

lkml

Version:

A speedy LookML parser & serializer implemented in Typescript (port of joshtemple/lkml)

199 lines (172 loc) 6.61 kB
// Splits a LookML string into a sequence of tokens. import * as tokens from "./tokens" import { CHARACTER_TO_TOKEN, EXPR_BLOCK_KEYS } from "./keys" export class Lexer { /*Splits a LookML string into a sequence of tokens. Attributes: text: Raw LookML to parse, padded with null character to denote end of stream index: Position of lexer in characters as it traverses the text tokens: Sequence of tokens that contain the relevant chunks of text line_number: Position of lexer in lines as it traverses the text */ text: string index: number tokens: Array<object> line_number: number constructor(text: string) { /* Initializes the Lexer with a LookML string and sets the index. Args: text: LookML string to be lexed */ this.text = text + "\0" this.index = 0 this.tokens = [] this.line_number = 1 } peek(): string { // Returns the character at the current index of the text being lexed. return this.text.charAt(this.index) } peek_multiple(length: number): string { /* Returns the next n characters from the current index in the text being lexed. Args: length: The number of characters to return */ return this.text.substring(this.index, this.index + length) } advance(length: number = 1): void { /* Moves the index forward by n characters. Args: length: The number of positions forward to move the index. */ this.index += length } consume(): string { // Returns the current index character and advances the index 1 character. this.advance() return this.text.charAt(this.index - 1) } scan_until_token(): void { // Skips through the text being lexed to the next tokenizable character. let found = false while (!found) { while (this.peek() == "\n" || this.peek() == "\t" || this.peek() == " ") { if (this.peek() == "\n") { this.line_number += 1 } this.advance() } if (this.peek() == "#") { while (this.peek() != "\0" && this.peek() != "\n") { this.advance() } } else { found = true } } } scan() { /* Tokenizes LookML into a sequence of tokens. This method skips through the text being lexed until it finds a character that indicates the start of a new token. It consumes the relevant characters and adds the tokens to a sequence until it reaches the end of the text. */ this.tokens.push(new tokens.StreamStartToken(this.line_number)); while (true) { this.scan_until_token() let ch = this.peek() if (ch == "\0") { this.tokens.push(new CHARACTER_TO_TOKEN[ch](this.line_number)) break; } else if (ch == ";") { if (this.peek_multiple(2) == ";;") { this.advance(2) this.tokens.push(new CHARACTER_TO_TOKEN[ch](this.line_number)) } } else if (ch == '"') { this.advance() this.tokens.push(this.scan_quoted_literal()) } else if (Object.keys(CHARACTER_TO_TOKEN).indexOf(ch) != -1) { this.advance() this.tokens.push(new CHARACTER_TO_TOKEN[ch](this.line_number)) } else if (ch == "+") { this.tokens.push(new tokens.RefinementToken(this.line_number)) this.advance() } else if (this.check_for_expression_block(this.peek_multiple(25))) { this.tokens.push(this.scan_literal()) this.scan_until_token() this.advance() this.tokens.push(new tokens.ValueToken(this.line_number)) this.scan_until_token() this.tokens.push(this.scan_expression_block()) } else { this.tokens.push(this.scan_literal()) } } return this.tokens } check_for_expression_block(str: string): boolean { // Returns True if the input string is an expression block. for (const entry of EXPR_BLOCK_KEYS) { if (str.lastIndexOf(entry + ":", 0) === 0) { return true } } } scan_expression_block(): object { /* Returns an token from an expression block string. This method strips any trailing whitespace from the expression string, since Looker usually adds an extra space before the `;;` terminal. */ let chars = "" while (this.peek_multiple(2) != ";;") { if (this.peek() == "\n") { this.line_number += 1 } chars += this.consume() } chars = chars.trim() // TODO: this was an rtrim... could it cause a bug? return new tokens.ExpressionBlockToken(chars, this.line_number) } scan_literal(): object { // Returns a token from a literal string. let chars = "" while (this.peek() != "\0" && this.peek() != " " && this.peek() != "\n" && this.peek() != "\t" && this.peek() != ":" && this.peek() != "}" && this.peek() != "{" && this.peek() != "," && this.peek() != "]") { chars += this.consume() } return new tokens.LiteralToken(chars, this.line_number) } scan_quoted_literal(): object { /* Returns a token from a quoted literal string. The initial double quote character is consumed in the scan method, so this method only scans for the trailing quote to indicate the end of the token. */ let chars = "" while (true) { let ch = this.peek() if (ch == '"') { break; } else if (ch == "\\") { chars += this.consume() // Extra consume to skip the escaped character } else if (ch == "\n") { this.line_number += 1 } chars += this.consume() } this.advance() return new tokens.QuotedLiteralToken(chars, this.line_number) } }