UNPKG

@ordojs/core

Version:

Core compiler and runtime for OrdoJS framework

460 lines (413 loc) 12.1 kB
/** * @fileoverview OrdoJS Lexer - Full implementation for lexical analysis */ import { LexicalContext, LexicalError, TokenType, type SourcePosition, type Token, type TokenStream } from '../types/index.js'; export class OrdoJSLexer { private source: string; private current: number = 0; private line: number = 1; private column: number = 1; private tokens: Token[] = []; private filename: string; private contextStack: LexicalContext[] = [LexicalContext.COMPONENT]; constructor(source: string = '', filename: string = 'unknown') { this.source = source; this.filename = filename; } tokenize(): TokenStream { this.current = 0; this.line = 1; this.column = 1; this.tokens = []; this.contextStack = [LexicalContext.COMPONENT]; while (!this.isAtEnd()) { this.scanToken(); } this.addToken(TokenType.EOF, ''); let currentIndex = 0; const eofToken = this.tokens[this.tokens.length - 1]!; return { tokens: this.tokens, current: 0, peek: () => { if (currentIndex >= this.tokens.length) return eofToken; return this.tokens[currentIndex] || eofToken; }, advance: () => { if (currentIndex >= this.tokens.length) return eofToken; const token = this.tokens[currentIndex] || eofToken; currentIndex++; return token; }, isAtEnd: () => currentIndex >= this.tokens.length, previous: () => { const prevIndex = Math.max(0, currentIndex - 1); return this.tokens[prevIndex] || this.tokens[0] || eofToken; } }; } private scanToken(): void { const char = this.advance(); switch (char) { case ' ': case '\r': case '\t': // Ignore whitespace break; case '\n': this.line++; this.column = 1; break; case '(': this.addToken(TokenType.LEFT_PAREN, char); break; case ')': this.addToken(TokenType.RIGHT_PAREN, char); break; case '{': this.addToken(TokenType.LEFT_BRACE, char); this.updateContext(char); break; case '}': this.addToken(TokenType.RIGHT_BRACE, char); this.updateContext(char); break; case '[': this.addToken(TokenType.LEFT_BRACKET, char); break; case ']': this.addToken(TokenType.RIGHT_BRACKET, char); break; case ',': this.addToken(TokenType.COMMA, char); break; case '.': this.addToken(TokenType.DOT, char); break; case ';': this.addToken(TokenType.SEMICOLON, char); break; case ':': this.addToken(TokenType.COLON, char); break; case '?': this.addToken(TokenType.QUESTION, char); break; case '+': if (this.match('+')) { this.addToken(TokenType.INCREMENT, '++'); } else { this.addToken(TokenType.PLUS, char); } break; case '-': if (this.match('-')) { this.addToken(TokenType.DECREMENT, '--'); } else { this.addToken(TokenType.MINUS, char); } break; case '*': this.addToken(TokenType.MULTIPLY, char); break; case '/': if (this.match('/')) { // Single-line comment while (this.peek() !== '\n' && !this.isAtEnd()) { this.advance(); } } else if (this.match('*')) { // Multi-line comment this.scanMultiLineComment(); } else { this.addToken(TokenType.DIVIDE, char); } break; case '%': this.addToken(TokenType.MODULO, char); break; case '=': if (this.match('=')) { this.addToken(TokenType.EQUALS, '=='); } else { this.addToken(TokenType.ASSIGN, char); } break; case '!': if (this.match('=')) { this.addToken(TokenType.NOT_EQUALS, '!='); } else { this.addToken(TokenType.LOGICAL_NOT, char); } break; case '<': if (this.getCurrentContext() === LexicalContext.MARKUP_BLOCK) { this.scanHTMLTag(); } else { if (this.match('=')) { this.addToken(TokenType.LESS_EQUAL, '<='); } else { this.addToken(TokenType.LESS_THAN, char); } } break; case '>': if (this.match('=')) { this.addToken(TokenType.GREATER_EQUAL, '>='); } else { this.addToken(TokenType.GREATER_THAN, char); } break; case '&': if (this.match('&')) { this.addToken(TokenType.LOGICAL_AND, '&&'); } else { this.throwError(`Unexpected character: ${char}`); } break; case '|': if (this.match('|')) { this.addToken(TokenType.LOGICAL_OR, '||'); } else { this.throwError(`Unexpected character: ${char}`); } break; case '"': this.scanString('"'); break; case "'": this.scanString("'"); break; case '`': this.scanTemplateString(); break; default: if (this.isDigit(char)) { this.scanNumber(); } else if (this.isAlpha(char)) { this.scanIdentifier(); } else { this.throwError(`Unexpected character: ${char}`); } break; } } private scanString(quote: string): void { const startPosition = this.getPosition(); let value = ''; while (this.peek() !== quote && !this.isAtEnd()) { if (this.peek() === '\n') { this.line++; this.column = 1; } else if (this.peek() === '\\') { this.advance(); // Skip backslash const escaped = this.advance(); switch (escaped) { case 'n': value += '\n'; break; case 't': value += '\t'; break; case 'r': value += '\r'; break; case '\\': value += '\\'; break; case '"': value += '"'; break; case "'": value += "'"; break; default: value += escaped; } } else { value += this.advance(); } } if (this.isAtEnd()) { this.throwError('Unterminated string'); } // Consume the closing quote this.advance(); this.addToken(TokenType.STRING, value, startPosition); } private scanTemplateString(): void { const startPosition = this.getPosition(); let value = ''; while (this.peek() !== '`' && !this.isAtEnd()) { if (this.peek() === '\n') { this.line++; this.column = 1; } value += this.advance(); } if (this.isAtEnd()) { this.throwError('Unterminated template string'); } // Consume the closing backtick this.advance(); this.addToken(TokenType.STRING, value, startPosition); } private scanNumber(): void { const startPosition = this.getPosition(); while (this.isDigit(this.peek())) { this.advance(); } // Look for a decimal part if (this.peek() === '.' && this.isDigit(this.peekNext())) { this.advance(); // Consume the '.' while (this.isDigit(this.peek())) { this.advance(); } } const value = this.source.substring(startPosition.offset, this.current); this.addToken(TokenType.NUMBER, value, startPosition); } private scanIdentifier(): void { const startPosition = this.getPosition(); // The first character was already consumed in scanToken, so we need to include it const firstChar = this.source.charAt(this.current - 1); let value = firstChar; while (this.isAlphaNumeric(this.peek())) { value += this.advance(); } const type = this.getKeywordType(value); this.addToken(type, value, startPosition); } private scanHTMLTag(): void { const startPosition = this.getPosition(); if (this.match('/')) { // Closing tag this.addToken(TokenType.HTML_TAG_CLOSE, '</', startPosition); } else { // Opening tag this.addToken(TokenType.HTML_TAG_OPEN, '<', startPosition); } } private scanMultiLineComment(): void { while (!this.isAtEnd()) { if (this.peek() === '*' && this.peekNext() === '/') { this.advance(); // Consume '*' this.advance(); // Consume '/' return; } if (this.peek() === '\n') { this.line++; this.column = 1; } this.advance(); } this.throwError('Unterminated comment'); } private updateContext(char: string): void { // This is a simplified context management - in a real implementation, // you'd need more sophisticated logic to track context transitions if (char === '{') { // Push new context based on previous tokens const lastToken = this.tokens[this.tokens.length - 2]; if (lastToken?.value === 'client') { this.contextStack.push(LexicalContext.CLIENT_BLOCK); } else if (lastToken?.value === 'server') { this.contextStack.push(LexicalContext.SERVER_BLOCK); } else if (lastToken?.value === 'markup') { this.contextStack.push(LexicalContext.MARKUP_BLOCK); } else { this.contextStack.push(LexicalContext.JAVASCRIPT); } } else if (char === '}') { if (this.contextStack.length > 1) { this.contextStack.pop(); } } } private getCurrentContext(): LexicalContext { return this.contextStack[this.contextStack.length - 1] || LexicalContext.COMPONENT; } private getKeywordType(value: string): TokenType { const keywords: Record<string, TokenType> = { 'component': TokenType.COMPONENT, 'client': TokenType.CLIENT, 'server': TokenType.SERVER, 'markup': TokenType.MARKUP, 'let': TokenType.LET, 'const': TokenType.CONST, 'if': TokenType.IF, 'else': TokenType.ELSE, 'each': TokenType.EACH, 'public': TokenType.PUBLIC, 'bind': TokenType.BIND, 'true': TokenType.BOOLEAN, 'false': TokenType.BOOLEAN, }; return keywords[value] || TokenType.IDENTIFIER; } private match(expected: string): boolean { if (this.isAtEnd()) return false; if (this.source.charAt(this.current) !== expected) return false; this.current++; this.column++; return true; } private advance(): string { if (this.isAtEnd()) return '\0'; const char = this.source.charAt(this.current); this.current++; this.column++; return char; } private peek(): string { if (this.isAtEnd()) return '\0'; return this.source.charAt(this.current); } private peekNext(): string { if (this.current + 1 >= this.source.length) return '\0'; return this.source.charAt(this.current + 1); } private isAtEnd(): boolean { return this.current >= this.source.length; } private isDigit(char: string): boolean { return char >= '0' && char <= '9'; } private isAlpha(char: string): boolean { return (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') || char === '_'; } private isAlphaNumeric(char: string): boolean { return this.isAlpha(char) || this.isDigit(char); } private addToken(type: TokenType, value: string, startPosition?: SourcePosition): void { const start = startPosition || this.getPosition(); const end = this.getPosition(); const token: Token = { type, value, position: start, range: { start, end } }; this.tokens.push(token); } private getPosition(): SourcePosition { return { line: this.line, column: this.column, offset: this.current }; } private throwError(message: string): never { throw new LexicalError( message, this.getPosition(), this.peek(), this.filename ); } }