UNPKG

quis

Version:

A simple DSL for data sorting and filtering

322 lines (288 loc) 10.2 kB
import { Token, TokenType } from './ast-types.js'; /** * Tokenizer for Quis expressions * Converts input string into array of tokens */ export class Tokenizer { private input: string; private position: number; private tokens: Token[]; constructor(input: string) { this.input = input.trim(); this.position = 0; this.tokens = []; } tokenize(): Token[] { this.tokens = []; this.position = 0; while (this.position < this.input.length) { this.skipWhitespace(); if (this.position >= this.input.length) break; const char = this.peek(); // Numbers (including negative) if (this.isDigit(char) || (char === '-' && this.isDigit(this.peekNext()))) { this.tokenizeNumber(); } // Strings else if (char === '"' || char === "'") { this.tokenizeString(); } // Variables else if (char === '$') { this.tokenizeVariable(); } // Two-character operators else if (this.position + 1 < this.input.length) { const twoChar = this.input.slice(this.position, this.position + 2); if (this.tokenizeTwoCharOperator(twoChar)) { continue; } // Single-character operators this.tokenizeSingleChar(char); } // Single-character operators else { this.tokenizeSingleChar(char); } } this.addToken(TokenType.EOF, ''); return this.tokens; } private peek(): string { return this.position < this.input.length ? this.input[this.position] : ''; } private peekNext(): string { return this.position + 1 < this.input.length ? this.input[this.position + 1] : ''; } private advance(): string { return this.position < this.input.length ? this.input[this.position++] : ''; } private skipWhitespace(): void { while (this.position < this.input.length && /\s/.test(this.input[this.position])) { this.position++; } } private isDigit(char: string): boolean { return /[0-9]/.test(char); } private isLetter(char: string): boolean { return /[a-zA-Z_]/.test(char); } private isAlphaNumeric(char: string): boolean { return /[a-zA-Z0-9_]/.test(char); } private addToken(type: TokenType, value: string): void { this.tokens.push({ type, value, position: this.position - value.length }); } private tokenizeNumber(): void { const start = this.position; let hasDecimal = false; // Handle negative sign if (this.peek() === '-') { this.advance(); } // Read digits and decimal point while (this.position < this.input.length) { const char = this.peek(); if (this.isDigit(char)) { this.advance(); } else if (char === '.' && !hasDecimal) { hasDecimal = true; this.advance(); } else { break; } } const value = this.input.slice(start, this.position); this.addToken(TokenType.NUMBER, value); } private tokenizeString(): void { const quote = this.advance(); // consume opening quote const start = this.position; while (this.position < this.input.length && this.peek() !== quote) { this.advance(); } if (this.position >= this.input.length) { throw new Error(`Unterminated string starting at position ${start - 1}`); } const value = this.input.slice(start, this.position); this.advance(); // consume closing quote this.addToken(TokenType.STRING, value); } private tokenizeVariable(): void { const start = this.position; this.advance(); // consume '$' while (this.position < this.input.length && this.isAlphaNumeric(this.peek())) { this.advance(); } const value = this.input.slice(start + 1, this.position); // exclude '$' this.addToken(TokenType.VARIABLE, value); } private tokenizeIdentifier(): string { const start = this.position; while (this.position < this.input.length && this.isAlphaNumeric(this.peek())) { this.advance(); } return this.input.slice(start, this.position); } private tokenizeKeywordOrIdentifier(): void { const identifier = this.tokenizeIdentifier(); // Check for keywords switch (identifier.toLowerCase()) { case 'true': this.addToken(TokenType.BOOLEAN, 'true'); break; case 'false': this.addToken(TokenType.BOOLEAN, 'false'); break; case 'null': this.addToken(TokenType.NULL, 'null'); break; case 'and': this.addToken(TokenType.AND, 'and'); break; case 'or': this.addToken(TokenType.OR, 'or'); break; case 'not': this.addToken(TokenType.NOT, 'not'); break; case 'is': // Check if followed by 'not' this.skipWhitespace(); if (this.input.slice(this.position, this.position + 3).toLowerCase() === 'not') { this.position += 3; this.addToken(TokenType.IS_NOT, 'is not'); } else { this.addToken(TokenType.IS, 'is'); } break; case 'gt': this.addToken(TokenType.GT, 'gt'); break; case 'gte': this.addToken(TokenType.GTE, 'gte'); break; case 'lt': this.addToken(TokenType.LT, 'lt'); break; case 'lte': this.addToken(TokenType.LTE, 'lte'); break; case 'custom': this.addToken(TokenType.CUSTOM, 'custom'); break; default: this.addToken(TokenType.IDENTIFIER, identifier); break; } } private tokenizeTwoCharOperator(twoChar: string): boolean { switch (twoChar) { case '==': this.position += 2; this.addToken(TokenType.EQUALS, '=='); return true; case '!=': this.position += 2; this.addToken(TokenType.NOT_EQUALS, '!='); return true; case '>=': this.position += 2; this.addToken(TokenType.GREATER_THAN_EQUAL, '>='); return true; case '<=': this.position += 2; this.addToken(TokenType.LESS_THAN_EQUAL, '<='); return true; case '&&': this.position += 2; this.addToken(TokenType.AND, '&&'); return true; case '||': this.position += 2; this.addToken(TokenType.OR, '||'); return true; default: // Check for words like 'AND', 'OR' if (this.isLetter(twoChar[0])) { this.tokenizeKeywordOrIdentifier(); return true; } return false; } } private tokenizeSingleChar(char: string): void { switch (char) { case '>': this.advance(); this.addToken(TokenType.GREATER_THAN, '>'); break; case '<': this.advance(); this.addToken(TokenType.LESS_THAN, '<'); break; case '(': this.advance(); this.addToken(TokenType.LPAREN, '('); break; case ')': this.advance(); this.addToken(TokenType.RPAREN, ')'); break; case '.': this.advance(); this.addToken(TokenType.DOT, '.'); break; case '[': this.advance(); this.addToken(TokenType.LBRACKET, '['); break; case ']': this.advance(); this.addToken(TokenType.RBRACKET, ']'); break; case ':': this.advance(); this.addToken(TokenType.COLON, ':'); break; case '!': this.advance(); this.addToken(TokenType.NOT, '!'); break; case '+': this.advance(); this.addToken(TokenType.PLUS, '+'); break; case '-': // Check if this is a negative number if (!this.isDigit(this.peekNext())) { this.advance(); this.addToken(TokenType.MINUS, '-'); } else { // This is handled by tokenizeNumber throw new Error(`Unexpected character '${char}' at position ${this.position}`); } break; case '*': this.advance(); this.addToken(TokenType.MULTIPLY, '*'); break; case '/': this.advance(); this.addToken(TokenType.DIVIDE, '/'); break; default: if (this.isLetter(char)) { this.tokenizeKeywordOrIdentifier(); } else { throw new Error(`Unexpected character '${char}' at position ${this.position}`); } break; } } }