UNPKG

@ordojs/core

Version:

Core compiler and runtime for OrdoJS framework

691 lines 23.2 kB
/** * @fileoverview OrdoJS Lexer - Refactored modular implementation * @author OrdoJS Framework Team */ import { LexicalContext, LexicalError, TokenType } from '../types/index.js'; /** * Character classification utilities */ class CharacterUtils { static isDigit(char) { return char >= '0' && char <= '9'; } static isAlpha(char) { return (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') || char === '_' || char === '$'; } static isAlphaNumeric(char) { return this.isAlpha(char) || this.isDigit(char); } static isWhitespace(char) { return char === ' ' || char === '\t' || char === '\r'; } static isNewline(char) { return char === '\n'; } static isQuote(char) { return char === '"' || char === "'" || char === '`'; } static isHexDigit(char) { return this.isDigit(char) || (char >= 'a' && char <= 'f') || (char >= 'A' && char <= 'F'); } } /** * Keyword registry for efficient keyword lookup */ class KeywordRegistry { static keywords = new Map([ ['component', TokenType.COMPONENT], ['client', TokenType.CLIENT], ['server', TokenType.SERVER], ['markup', TokenType.MARKUP], ['let', TokenType.LET], ['const', TokenType.CONST], ['if', TokenType.IF], ['else', TokenType.ELSE], ['each', TokenType.EACH], ['public', TokenType.PUBLIC], ['bind', TokenType.BIND], ['true', TokenType.BOOLEAN], ['false', TokenType.BOOLEAN], ['null', TokenType.IDENTIFIER], // null is treated as identifier for now ['undefined', TokenType.IDENTIFIER], ['function', TokenType.IDENTIFIER], ['return', TokenType.IDENTIFIER], ['for', TokenType.IDENTIFIER], ['while', TokenType.IDENTIFIER], ['break', TokenType.IDENTIFIER], ['continue', TokenType.IDENTIFIER], ['try', TokenType.IDENTIFIER], ['catch', TokenType.IDENTIFIER], ['finally', TokenType.IDENTIFIER], ['throw', TokenType.IDENTIFIER], ['async', TokenType.IDENTIFIER], ['await', TokenType.IDENTIFIER], ['import', TokenType.IDENTIFIER], ['export', TokenType.IDENTIFIER], ['default', TokenType.IDENTIFIER], ['class', TokenType.IDENTIFIER], ['extends', TokenType.IDENTIFIER], ['interface', TokenType.IDENTIFIER], ['type', TokenType.IDENTIFIER], ['enum', TokenType.IDENTIFIER], ['namespace', TokenType.IDENTIFIER] ]); static getTokenType(identifier) { return this.keywords.get(identifier) || TokenType.IDENTIFIER; } static isKeyword(identifier) { return this.keywords.has(identifier); } static getAllKeywords() { return Array.from(this.keywords.keys()); } } /** * Context manager for tracking lexical contexts */ class ContextManager { contextStack = [LexicalContext.COMPONENT]; getCurrentContext() { return this.contextStack[this.contextStack.length - 1] || LexicalContext.COMPONENT; } pushContext(context) { this.contextStack.push(context); } popContext() { if (this.contextStack.length > 1) { return this.contextStack.pop(); } return undefined; } updateContext(char, previousTokens) { if (char === '{') { const lastToken = previousTokens[previousTokens.length - 1]; if (lastToken?.value === 'client') { this.pushContext(LexicalContext.CLIENT_BLOCK); } else if (lastToken?.value === 'server') { this.pushContext(LexicalContext.SERVER_BLOCK); } else if (lastToken?.value === 'markup') { this.pushContext(LexicalContext.MARKUP_BLOCK); } else { this.pushContext(LexicalContext.JAVASCRIPT); } } else if (char === '}') { this.popContext(); } } getContextStack() { return [...this.contextStack]; } reset() { this.contextStack = [LexicalContext.COMPONENT]; } } /** * Enhanced OrdoJS Lexer with modular architecture */ export class OrdoJSLexer { state; options; contextManager; tokens = []; constructor(options = {}) { this.options = { generateSourceMaps: true, enableRecovery: false, maxErrors: 10, contextAware: true, tokenProcessors: [], ...options }; this.contextManager = new ContextManager(); this.state = this.createInitialState(); } /** * Tokenize source code into token stream */ tokenize(source, filename = 'unknown') { this.initializeState(source, filename); try { while (!this.isAtEnd()) { this.scanToken(); } this.addToken(TokenType.EOF, ''); // Process tokens through registered processors this.processTokens(); return this.createTokenStream(); } catch (error) { if (this.options.enableRecovery && error instanceof LexicalError) { this.state.errors.push(error); return this.createTokenStream(); } throw error; } } /** * Get lexer state for debugging */ getState() { return { ...this.state }; } /** * Get all errors encountered during tokenization */ getErrors() { return [...this.state.errors]; } /** * Add custom token processor */ addTokenProcessor(processor) { this.options.tokenProcessors.push(processor); } /** * Remove token processor by name */ removeTokenProcessor(name) { const index = this.options.tokenProcessors.findIndex(p => p.name === name); if (index >= 0) { this.options.tokenProcessors.splice(index, 1); return true; } return false; } createInitialState() { return { source: '', current: 0, line: 1, column: 1, filename: 'unknown', contextStack: [LexicalContext.COMPONENT], errors: [] }; } initializeState(source, filename) { this.state = { ...this.createInitialState(), source, filename }; this.contextManager.reset(); this.tokens = []; } scanToken() { const char = this.advance(); // Skip whitespace but track position if (CharacterUtils.isWhitespace(char)) { return; } // Handle newlines if (CharacterUtils.isNewline(char)) { this.state.line++; this.state.column = 1; return; } // Single character tokens const singleCharTokens = { '(': TokenType.LEFT_PAREN, ')': TokenType.RIGHT_PAREN, '[': TokenType.LEFT_BRACKET, ']': TokenType.RIGHT_BRACKET, ',': TokenType.COMMA, '.': TokenType.DOT, ';': TokenType.SEMICOLON, ':': TokenType.COLON, '?': TokenType.QUESTION }; if (singleCharTokens[char]) { this.addToken(singleCharTokens[char], char); return; } // Context-sensitive tokens if (char === '{' || char === '}') { this.handleBrace(char); return; } // Multi-character operators if (this.handleOperators(char)) { return; } // Comments if (char === '/' && this.handleComments()) { return; } // Strings if (CharacterUtils.isQuote(char)) { this.scanString(char); return; } // Numbers if (CharacterUtils.isDigit(char)) { this.scanNumber(); return; } // Identifiers and keywords if (CharacterUtils.isAlpha(char)) { this.scanIdentifier(); return; } // HTML tags (context-sensitive) if (char === '<' && this.contextManager.getCurrentContext() === LexicalContext.MARKUP_BLOCK) { this.scanHTMLTag(); return; } // Handle unexpected characters this.handleUnexpectedCharacter(char); } handleBrace(char) { const tokenType = char === '{' ? TokenType.LEFT_BRACE : TokenType.RIGHT_BRACE; this.addToken(tokenType, char); if (this.options.contextAware) { this.contextManager.updateContext(char, this.tokens); } } handleOperators(char) { const operators = { '+': { single: TokenType.PLUS, double: TokenType.INCREMENT, doubleChar: '+' }, '-': { single: TokenType.MINUS, double: TokenType.DECREMENT, doubleChar: '-' }, '*': { single: TokenType.MULTIPLY }, '%': { single: TokenType.MODULO }, '=': { single: TokenType.ASSIGN, double: TokenType.EQUALS, doubleChar: '=' }, '!': { single: TokenType.LOGICAL_NOT, double: TokenType.NOT_EQUALS, doubleChar: '=' }, '<': { single: TokenType.LESS_THAN, double: TokenType.LESS_EQUAL, doubleChar: '=' }, '>': { single: TokenType.GREATER_THAN, double: TokenType.GREATER_EQUAL, doubleChar: '=' }, '&': { single: TokenType.LOGICAL_AND, double: TokenType.LOGICAL_AND, doubleChar: '&' }, '|': { single: TokenType.LOGICAL_OR, double: TokenType.LOGICAL_OR, doubleChar: '|' } }; const op = operators[char]; if (!op) return false; if (op.double && op.doubleChar && this.match(op.doubleChar)) { this.addToken(op.double, char + op.doubleChar); } else if (char === '&' || char === '|') { // These require double characters if (!this.match(char)) { this.throwError(`Unexpected character: ${char}. Did you mean '${char}${char}'?`); } this.addToken(op.single, char + char); } else { this.addToken(op.single, char); } return true; } handleComments() { if (this.match('/')) { // Single-line comment this.scanSingleLineComment(); return true; } else if (this.match('*')) { // Multi-line comment this.scanMultiLineComment(); return true; } else { // Division operator this.addToken(TokenType.DIVIDE, '/'); return true; } } scanSingleLineComment() { while (this.peek() !== '\n' && !this.isAtEnd()) { this.advance(); } } scanMultiLineComment() { const startLine = this.state.line; while (!this.isAtEnd()) { if (this.peek() === '*' && this.peekNext() === '/') { this.advance(); // consume '*' this.advance(); // consume '/' return; } if (this.peek() === '\n') { this.state.line++; this.state.column = 1; } this.advance(); } this.throwError(`Unterminated comment starting at line ${startLine}`); } scanString(quote) { const startPosition = this.getPosition(); let value = ''; let isTemplate = quote === '`'; while (this.peek() !== quote && !this.isAtEnd()) { if (this.peek() === '\n') { if (quote !== '`') { this.throwError('Unterminated string literal'); } this.state.line++; this.state.column = 1; } if (this.peek() === '\\') { this.advance(); // consume backslash value += this.scanEscapeSequence(); } else { value += this.advance(); } } if (this.isAtEnd()) { this.throwError(`Unterminated ${isTemplate ? 'template' : 'string'} literal`); } // Consume closing quote this.advance(); this.addToken(TokenType.STRING, value, startPosition); } scanEscapeSequence() { const char = this.advance(); switch (char) { case 'n': return '\n'; case 't': return '\t'; case 'r': return '\r'; case 'b': return '\b'; case 'f': return '\f'; case 'v': return '\v'; case '0': return '\0'; case '\\': return '\\'; case '"': return '"'; case "'": return "'"; case '`': return '`'; case 'x': return this.scanHexEscape(); case 'u': return this.scanUnicodeEscape(); default: this.throwError(`Invalid escape sequence: \\${char}`); return char; } } scanHexEscape() { let hex = ''; for (let i = 0; i < 2; i++) { if (!CharacterUtils.isHexDigit(this.peek())) { this.throwError('Invalid hex escape sequence'); } hex += this.advance(); } return String.fromCharCode(parseInt(hex, 16)); } scanUnicodeEscape() { if (this.peek() === '{') { // Unicode code point escape: \u{...} this.advance(); // consume '{' let hex = ''; while (this.peek() !== '}' && !this.isAtEnd()) { if (!CharacterUtils.isHexDigit(this.peek())) { this.throwError('Invalid unicode escape sequence'); } hex += this.advance(); } if (this.peek() !== '}') { this.throwError('Unterminated unicode escape sequence'); } this.advance(); // consume '}' return String.fromCodePoint(parseInt(hex, 16)); } else { // Fixed-length unicode escape: \uXXXX let hex = ''; for (let i = 0; i < 4; i++) { if (!CharacterUtils.isHexDigit(this.peek())) { this.throwError('Invalid unicode escape sequence'); } hex += this.advance(); } return String.fromCharCode(parseInt(hex, 16)); } } scanNumber() { const startPosition = this.getPosition(); // Handle different number formats if (this.state.source.charAt(this.state.current - 1) === '0') { if (this.match('x') || this.match('X')) { this.scanHexNumber(startPosition); return; } else if (this.match('b') || this.match('B')) { this.scanBinaryNumber(startPosition); return; } else if (this.match('o') || this.match('O')) { this.scanOctalNumber(startPosition); return; } } this.scanDecimalNumber(startPosition); } scanDecimalNumber(startPosition) { // Consume integer part while (CharacterUtils.isDigit(this.peek())) { this.advance(); } // Look for decimal point if (this.peek() === '.' && CharacterUtils.isDigit(this.peekNext())) { this.advance(); // consume '.' while (CharacterUtils.isDigit(this.peek())) { this.advance(); } } // Look for exponent if (this.peek() === 'e' || this.peek() === 'E') { this.advance(); if (this.peek() === '+' || this.peek() === '-') { this.advance(); } if (!CharacterUtils.isDigit(this.peek())) { this.throwError('Invalid number format: missing exponent digits'); } while (CharacterUtils.isDigit(this.peek())) { this.advance(); } } const value = this.state.source.substring(startPosition.offset, this.state.current); this.addToken(TokenType.NUMBER, value, startPosition); } scanHexNumber(startPosition) { if (!CharacterUtils.isHexDigit(this.peek())) { this.throwError('Invalid hex number: missing digits'); } while (CharacterUtils.isHexDigit(this.peek())) { this.advance(); } const value = this.state.source.substring(startPosition.offset, this.state.current); this.addToken(TokenType.NUMBER, value, startPosition); } scanBinaryNumber(startPosition) { if (this.peek() !== '0' && this.peek() !== '1') { this.throwError('Invalid binary number: missing digits'); } while (this.peek() === '0' || this.peek() === '1') { this.advance(); } const value = this.state.source.substring(startPosition.offset, this.state.current); this.addToken(TokenType.NUMBER, value, startPosition); } scanOctalNumber(startPosition) { if (this.peek() < '0' || this.peek() > '7') { this.throwError('Invalid octal number: missing digits'); } while (this.peek() >= '0' && this.peek() <= '7') { this.advance(); } const value = this.state.source.substring(startPosition.offset, this.state.current); this.addToken(TokenType.NUMBER, value, startPosition); } scanIdentifier() { const startPosition = this.getPosition(); while (CharacterUtils.isAlphaNumeric(this.peek())) { this.advance(); } const value = this.state.source.substring(startPosition.offset, this.state.current); const tokenType = KeywordRegistry.getTokenType(value); this.addToken(tokenType, value, startPosition); } scanHTMLTag() { const startPosition = this.getPosition(); if (this.match('/')) { this.addToken(TokenType.HTML_TAG_CLOSE, '</', startPosition); } else { this.addToken(TokenType.HTML_TAG_OPEN, '<', startPosition); } } handleUnexpectedCharacter(char) { this.throwError(`Unexpected character: '${char}' (${char.charCodeAt(0)})`); } processTokens() { if (this.options.tokenProcessors.length === 0) return; const processedTokens = []; const currentContext = this.contextManager.getCurrentContext(); for (const token of this.tokens) { let processedToken = token; for (const processor of this.options.tokenProcessors) { if (processor.handles.includes(token.type)) { processedToken = processor.process(processedToken, currentContext); if (!processedToken) break; } } if (processedToken) { processedTokens.push(processedToken); } } this.tokens = processedTokens; } createTokenStream() { let currentIndex = 0; const eofToken = this.tokens[this.tokens.length - 1] || this.createEOFToken(); return { tokens: this.tokens, current: 0, peek: () => { if (currentIndex >= this.tokens.length) return eofToken; return this.tokens[currentIndex]; }, advance: () => { if (currentIndex >= this.tokens.length) return eofToken; const token = this.tokens[currentIndex]; if (currentIndex < this.tokens.length - 1) { currentIndex++; } return token; }, isAtEnd: () => currentIndex >= this.tokens.length - 1, previous: () => { const prevIndex = Math.max(0, currentIndex - 1); return this.tokens[prevIndex] || eofToken; } }; } createEOFToken() { return { type: TokenType.EOF, value: '', position: this.getPosition(), range: { start: this.getPosition(), end: this.getPosition() } }; } // Utility methods match(expected) { if (this.isAtEnd()) return false; if (this.state.source.charAt(this.state.current) !== expected) return false; this.state.current++; this.state.column++; return true; } advance() { if (this.isAtEnd()) return '\0'; const char = this.state.source.charAt(this.state.current); this.state.current++; this.state.column++; return char; } peek() { if (this.isAtEnd()) return '\0'; return this.state.source.charAt(this.state.current); } peekNext() { if (this.state.current + 1 >= this.state.source.length) return '\0'; return this.state.source.charAt(this.state.current + 1); } isAtEnd() { return this.state.current >= this.state.source.length; } addToken(type, value, startPosition) { const start = startPosition || this.getPosition(); const end = this.getPosition(); const token = { type, value, position: start, range: { start, end } }; this.tokens.push(token); } getPosition() { return { line: this.state.line, column: this.state.column, offset: this.state.current }; } throwError(message) { const error = new LexicalError(message, this.getPosition(), this.peek(), this.state.filename); if (this.options.enableRecovery && this.state.errors.length < this.options.maxErrors) { this.state.errors.push(error); // Skip the problematic character and continue this.advance(); throw error; // Still throw for proper error handling } throw error; } } /** * Default token processors */ export const defaultTokenProcessors = [ { name: 'context-validator', handles: [TokenType.CLIENT, TokenType.SERVER, TokenType.MARKUP], process: (token, context) => { // Validate that block keywords are used in appropriate contexts if (context !== LexicalContext.COMPONENT && [TokenType.CLIENT, TokenType.SERVER, TokenType.MARKUP].includes(token.type)) { // Could emit warning here } return token; } }, { name: 'identifier-enhancer', handles: [TokenType.IDENTIFIER], process: (token, context) => { // Could enhance identifiers with additional metadata return token; } } ]; //# sourceMappingURL=lexer-refactored.js.map