@ordojs/core
Version:
Core compiler and runtime for OrdoJS framework
834 lines (716 loc) • 22.6 kB
text/typescript
/**
* @fileoverview OrdoJS Lexer - Refactored modular implementation
* @author OrdoJS Framework Team
*/
import {
LexicalContext,
LexicalError,
TokenType,
type SourcePosition,
type Token,
type TokenStream
} from '../types/index.js';
/**
* Lexer configuration options
*/
export interface LexerOptions {
/** Enable source map generation */
generateSourceMaps?: boolean;
/** Enable error recovery mode */
enableRecovery?: boolean;
/** Maximum number of errors before stopping */
maxErrors?: number;
/** Enable context-aware tokenization */
contextAware?: boolean;
/** Custom token processors */
tokenProcessors?: TokenProcessor[];
}
/**
* Token processor interface for plugin-based token processing
*/
export interface TokenProcessor {
/** Processor name */
name: string;
/** Token types this processor handles */
handles: TokenType[];
/** Process token and return modified token or null to skip */
process(token: Token, context: LexicalContext): Token | null;
}
/**
* Lexer state for error recovery and debugging
*/
export interface LexerState {
source: string;
current: number;
line: number;
column: number;
filename: string;
contextStack: LexicalContext[];
errors: LexicalError[];
}
/**
* Character classification utilities
*/
class CharacterUtils {
static isDigit(char: string): boolean {
return char >= '0' && char <= '9';
}
static isAlpha(char: string): boolean {
return (char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z') ||
char === '_' || char === '$';
}
static isAlphaNumeric(char: string): boolean {
return this.isAlpha(char) || this.isDigit(char);
}
static isWhitespace(char: string): boolean {
return char === ' ' || char === '\t' || char === '\r';
}
static isNewline(char: string): boolean {
return char === '\n';
}
static isQuote(char: string): boolean {
return char === '"' || char === "'" || char === '`';
}
static isHexDigit(char: string): boolean {
return this.isDigit(char) ||
(char >= 'a' && char <= 'f') ||
(char >= 'A' && char <= 'F');
}
}
/**
* Keyword registry for efficient keyword lookup
*/
class KeywordRegistry {
private static readonly keywords = new Map<string, TokenType>([
['component', TokenType.COMPONENT],
['client', TokenType.CLIENT],
['server', TokenType.SERVER],
['markup', TokenType.MARKUP],
['let', TokenType.LET],
['const', TokenType.CONST],
['if', TokenType.IF],
['else', TokenType.ELSE],
['each', TokenType.EACH],
['public', TokenType.PUBLIC],
['bind', TokenType.BIND],
['true', TokenType.BOOLEAN],
['false', TokenType.BOOLEAN],
['null', TokenType.IDENTIFIER], // null is treated as identifier for now
['undefined', TokenType.IDENTIFIER],
['function', TokenType.IDENTIFIER],
['return', TokenType.IDENTIFIER],
['for', TokenType.IDENTIFIER],
['while', TokenType.IDENTIFIER],
['break', TokenType.IDENTIFIER],
['continue', TokenType.IDENTIFIER],
['try', TokenType.IDENTIFIER],
['catch', TokenType.IDENTIFIER],
['finally', TokenType.IDENTIFIER],
['throw', TokenType.IDENTIFIER],
['async', TokenType.IDENTIFIER],
['await', TokenType.IDENTIFIER],
['import', TokenType.IDENTIFIER],
['export', TokenType.IDENTIFIER],
['default', TokenType.IDENTIFIER],
['class', TokenType.IDENTIFIER],
['extends', TokenType.IDENTIFIER],
['interface', TokenType.IDENTIFIER],
['type', TokenType.IDENTIFIER],
['enum', TokenType.IDENTIFIER],
['namespace', TokenType.IDENTIFIER]
]);
static getTokenType(identifier: string): TokenType {
return this.keywords.get(identifier) || TokenType.IDENTIFIER;
}
static isKeyword(identifier: string): boolean {
return this.keywords.has(identifier);
}
static getAllKeywords(): string[] {
return Array.from(this.keywords.keys());
}
}
/**
* Context manager for tracking lexical contexts
*/
class ContextManager {
private contextStack: LexicalContext[] = [LexicalContext.COMPONENT];
getCurrentContext(): LexicalContext {
return this.contextStack[this.contextStack.length - 1] || LexicalContext.COMPONENT;
}
pushContext(context: LexicalContext): void {
this.contextStack.push(context);
}
popContext(): LexicalContext | undefined {
if (this.contextStack.length > 1) {
return this.contextStack.pop();
}
return undefined;
}
updateContext(char: string, previousTokens: Token[]): void {
if (char === '{') {
const lastToken = previousTokens[previousTokens.length - 1];
if (lastToken?.value === 'client') {
this.pushContext(LexicalContext.CLIENT_BLOCK);
} else if (lastToken?.value === 'server') {
this.pushContext(LexicalContext.SERVER_BLOCK);
} else if (lastToken?.value === 'markup') {
this.pushContext(LexicalContext.MARKUP_BLOCK);
} else {
this.pushContext(LexicalContext.JAVASCRIPT);
}
} else if (char === '}') {
this.popContext();
}
}
getContextStack(): LexicalContext[] {
return [...this.contextStack];
}
reset(): void {
this.contextStack = [LexicalContext.COMPONENT];
}
}
/**
* Enhanced OrdoJS Lexer with modular architecture
*/
export class OrdoJSLexer {
private state: LexerState;
private options: Required<LexerOptions>;
private contextManager: ContextManager;
private tokens: Token[] = [];
constructor(options: LexerOptions = {}) {
this.options = {
generateSourceMaps: true,
enableRecovery: false,
maxErrors: 10,
contextAware: true,
tokenProcessors: [],
...options
};
this.contextManager = new ContextManager();
this.state = this.createInitialState();
}
/**
* Tokenize source code into token stream
*/
tokenize(source: string, filename: string = 'unknown'): TokenStream {
this.initializeState(source, filename);
try {
while (!this.isAtEnd()) {
this.scanToken();
}
this.addToken(TokenType.EOF, '');
// Process tokens through registered processors
this.processTokens();
return this.createTokenStream();
} catch (error) {
if (this.options.enableRecovery && error instanceof LexicalError) {
this.state.errors.push(error);
return this.createTokenStream();
}
throw error;
}
}
/**
* Get lexer state for debugging
*/
getState(): Readonly<LexerState> {
return { ...this.state };
}
/**
* Get all errors encountered during tokenization
*/
getErrors(): LexicalError[] {
return [...this.state.errors];
}
/**
* Add custom token processor
*/
addTokenProcessor(processor: TokenProcessor): void {
this.options.tokenProcessors.push(processor);
}
/**
* Remove token processor by name
*/
removeTokenProcessor(name: string): boolean {
const index = this.options.tokenProcessors.findIndex(p => p.name === name);
if (index >= 0) {
this.options.tokenProcessors.splice(index, 1);
return true;
}
return false;
}
private createInitialState(): LexerState {
return {
source: '',
current: 0,
line: 1,
column: 1,
filename: 'unknown',
contextStack: [LexicalContext.COMPONENT],
errors: []
};
}
private initializeState(source: string, filename: string): void {
this.state = {
...this.createInitialState(),
source,
filename
};
this.contextManager.reset();
this.tokens = [];
}
private scanToken(): void {
const char = this.advance();
// Skip whitespace but track position
if (CharacterUtils.isWhitespace(char)) {
return;
}
// Handle newlines
if (CharacterUtils.isNewline(char)) {
this.state.line++;
this.state.column = 1;
return;
}
// Single character tokens
const singleCharTokens: Record<string, TokenType> = {
'(': TokenType.LEFT_PAREN,
')': TokenType.RIGHT_PAREN,
'[': TokenType.LEFT_BRACKET,
']': TokenType.RIGHT_BRACKET,
',': TokenType.COMMA,
'.': TokenType.DOT,
';': TokenType.SEMICOLON,
':': TokenType.COLON,
'?': TokenType.QUESTION
};
if (singleCharTokens[char]) {
this.addToken(singleCharTokens[char], char);
return;
}
// Context-sensitive tokens
if (char === '{' || char === '}') {
this.handleBrace(char);
return;
}
// Multi-character operators
if (this.handleOperators(char)) {
return;
}
// Comments
if (char === '/' && this.handleComments()) {
return;
}
// Strings
if (CharacterUtils.isQuote(char)) {
this.scanString(char);
return;
}
// Numbers
if (CharacterUtils.isDigit(char)) {
this.scanNumber();
return;
}
// Identifiers and keywords
if (CharacterUtils.isAlpha(char)) {
this.scanIdentifier();
return;
}
// HTML tags (context-sensitive)
if (char === '<' && this.contextManager.getCurrentContext() === LexicalContext.MARKUP_BLOCK) {
this.scanHTMLTag();
return;
}
// Handle unexpected characters
this.handleUnexpectedCharacter(char);
}
private handleBrace(char: string): void {
const tokenType = char === '{' ? TokenType.LEFT_BRACE : TokenType.RIGHT_BRACE;
this.addToken(tokenType, char);
if (this.options.contextAware) {
this.contextManager.updateContext(char, this.tokens);
}
}
private handleOperators(char: string): boolean {
const operators: Record<string, { single: TokenType; double?: TokenType; doubleChar?: string }> = {
'+': { single: TokenType.PLUS, double: TokenType.INCREMENT, doubleChar: '+' },
'-': { single: TokenType.MINUS, double: TokenType.DECREMENT, doubleChar: '-' },
'*': { single: TokenType.MULTIPLY },
'%': { single: TokenType.MODULO },
'=': { single: TokenType.ASSIGN, double: TokenType.EQUALS, doubleChar: '=' },
'!': { single: TokenType.LOGICAL_NOT, double: TokenType.NOT_EQUALS, doubleChar: '=' },
'<': { single: TokenType.LESS_THAN, double: TokenType.LESS_EQUAL, doubleChar: '=' },
'>': { single: TokenType.GREATER_THAN, double: TokenType.GREATER_EQUAL, doubleChar: '=' },
'&': { single: TokenType.LOGICAL_AND, double: TokenType.LOGICAL_AND, doubleChar: '&' },
'|': { single: TokenType.LOGICAL_OR, double: TokenType.LOGICAL_OR, doubleChar: '|' }
};
const op = operators[char];
if (!op) return false;
if (op.double && op.doubleChar && this.match(op.doubleChar)) {
this.addToken(op.double, char + op.doubleChar);
} else if (char === '&' || char === '|') {
// These require double characters
if (!this.match(char)) {
this.throwError(`Unexpected character: ${char}. Did you mean '${char}${char}'?`);
}
this.addToken(op.single, char + char);
} else {
this.addToken(op.single, char);
}
return true;
}
private handleComments(): boolean {
if (this.match('/')) {
// Single-line comment
this.scanSingleLineComment();
return true;
} else if (this.match('*')) {
// Multi-line comment
this.scanMultiLineComment();
return true;
} else {
// Division operator
this.addToken(TokenType.DIVIDE, '/');
return true;
}
}
private scanSingleLineComment(): void {
while (this.peek() !== '\n' && !this.isAtEnd()) {
this.advance();
}
}
private scanMultiLineComment(): void {
const startLine = this.state.line;
while (!this.isAtEnd()) {
if (this.peek() === '*' && this.peekNext() === '/') {
this.advance(); // consume '*'
this.advance(); // consume '/'
return;
}
if (this.peek() === '\n') {
this.state.line++;
this.state.column = 1;
}
this.advance();
}
this.throwError(`Unterminated comment starting at line ${startLine}`);
}
private scanString(quote: string): void {
const startPosition = this.getPosition();
let value = '';
let isTemplate = quote === '`';
while (this.peek() !== quote && !this.isAtEnd()) {
if (this.peek() === '\n') {
if (quote !== '`') {
this.throwError('Unterminated string literal');
}
this.state.line++;
this.state.column = 1;
}
if (this.peek() === '\\') {
this.advance(); // consume backslash
value += this.scanEscapeSequence();
} else {
value += this.advance();
}
}
if (this.isAtEnd()) {
this.throwError(`Unterminated ${isTemplate ? 'template' : 'string'} literal`);
}
// Consume closing quote
this.advance();
this.addToken(TokenType.STRING, value, startPosition);
}
private scanEscapeSequence(): string {
const char = this.advance();
switch (char) {
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
case 'b': return '\b';
case 'f': return '\f';
case 'v': return '\v';
case '0': return '\0';
case '\\': return '\\';
case '"': return '"';
case "'": return "'";
case '`': return '`';
case 'x': return this.scanHexEscape();
case 'u': return this.scanUnicodeEscape();
default:
this.throwError(`Invalid escape sequence: \\${char}`);
return char;
}
}
private scanHexEscape(): string {
let hex = '';
for (let i = 0; i < 2; i++) {
if (!CharacterUtils.isHexDigit(this.peek())) {
this.throwError('Invalid hex escape sequence');
}
hex += this.advance();
}
return String.fromCharCode(parseInt(hex, 16));
}
private scanUnicodeEscape(): string {
if (this.peek() === '{') {
// Unicode code point escape: \u{...}
this.advance(); // consume '{'
let hex = '';
while (this.peek() !== '}' && !this.isAtEnd()) {
if (!CharacterUtils.isHexDigit(this.peek())) {
this.throwError('Invalid unicode escape sequence');
}
hex += this.advance();
}
if (this.peek() !== '}') {
this.throwError('Unterminated unicode escape sequence');
}
this.advance(); // consume '}'
return String.fromCodePoint(parseInt(hex, 16));
} else {
// Fixed-length unicode escape: \uXXXX
let hex = '';
for (let i = 0; i < 4; i++) {
if (!CharacterUtils.isHexDigit(this.peek())) {
this.throwError('Invalid unicode escape sequence');
}
hex += this.advance();
}
return String.fromCharCode(parseInt(hex, 16));
}
}
private scanNumber(): void {
const startPosition = this.getPosition();
// Handle different number formats
if (this.state.source.charAt(this.state.current - 1) === '0') {
if (this.match('x') || this.match('X')) {
this.scanHexNumber(startPosition);
return;
} else if (this.match('b') || this.match('B')) {
this.scanBinaryNumber(startPosition);
return;
} else if (this.match('o') || this.match('O')) {
this.scanOctalNumber(startPosition);
return;
}
}
this.scanDecimalNumber(startPosition);
}
private scanDecimalNumber(startPosition: SourcePosition): void {
// Consume integer part
while (CharacterUtils.isDigit(this.peek())) {
this.advance();
}
// Look for decimal point
if (this.peek() === '.' && CharacterUtils.isDigit(this.peekNext())) {
this.advance(); // consume '.'
while (CharacterUtils.isDigit(this.peek())) {
this.advance();
}
}
// Look for exponent
if (this.peek() === 'e' || this.peek() === 'E') {
this.advance();
if (this.peek() === '+' || this.peek() === '-') {
this.advance();
}
if (!CharacterUtils.isDigit(this.peek())) {
this.throwError('Invalid number format: missing exponent digits');
}
while (CharacterUtils.isDigit(this.peek())) {
this.advance();
}
}
const value = this.state.source.substring(startPosition.offset, this.state.current);
this.addToken(TokenType.NUMBER, value, startPosition);
}
private scanHexNumber(startPosition: SourcePosition): void {
if (!CharacterUtils.isHexDigit(this.peek())) {
this.throwError('Invalid hex number: missing digits');
}
while (CharacterUtils.isHexDigit(this.peek())) {
this.advance();
}
const value = this.state.source.substring(startPosition.offset, this.state.current);
this.addToken(TokenType.NUMBER, value, startPosition);
}
private scanBinaryNumber(startPosition: SourcePosition): void {
if (this.peek() !== '0' && this.peek() !== '1') {
this.throwError('Invalid binary number: missing digits');
}
while (this.peek() === '0' || this.peek() === '1') {
this.advance();
}
const value = this.state.source.substring(startPosition.offset, this.state.current);
this.addToken(TokenType.NUMBER, value, startPosition);
}
private scanOctalNumber(startPosition: SourcePosition): void {
if (this.peek() < '0' || this.peek() > '7') {
this.throwError('Invalid octal number: missing digits');
}
while (this.peek() >= '0' && this.peek() <= '7') {
this.advance();
}
const value = this.state.source.substring(startPosition.offset, this.state.current);
this.addToken(TokenType.NUMBER, value, startPosition);
}
private scanIdentifier(): void {
const startPosition = this.getPosition();
while (CharacterUtils.isAlphaNumeric(this.peek())) {
this.advance();
}
const value = this.state.source.substring(startPosition.offset, this.state.current);
const tokenType = KeywordRegistry.getTokenType(value);
this.addToken(tokenType, value, startPosition);
}
private scanHTMLTag(): void {
const startPosition = this.getPosition();
if (this.match('/')) {
this.addToken(TokenType.HTML_TAG_CLOSE, '</', startPosition);
} else {
this.addToken(TokenType.HTML_TAG_OPEN, '<', startPosition);
}
}
private handleUnexpectedCharacter(char: string): void {
this.throwError(`Unexpected character: '${char}' (${char.charCodeAt(0)})`);
}
private processTokens(): void {
if (this.options.tokenProcessors.length === 0) return;
const processedTokens: Token[] = [];
const currentContext = this.contextManager.getCurrentContext();
for (const token of this.tokens) {
let processedToken: Token | null = token;
for (const processor of this.options.tokenProcessors) {
if (processor.handles.includes(token.type)) {
processedToken = processor.process(processedToken, currentContext);
if (!processedToken) break;
}
}
if (processedToken) {
processedTokens.push(processedToken);
}
}
this.tokens = processedTokens;
}
private createTokenStream(): TokenStream {
let currentIndex = 0;
const eofToken = this.tokens[this.tokens.length - 1] || this.createEOFToken();
return {
tokens: this.tokens,
current: 0,
peek: () => {
if (currentIndex >= this.tokens.length) return eofToken;
return this.tokens[currentIndex];
},
advance: () => {
if (currentIndex >= this.tokens.length) return eofToken;
const token = this.tokens[currentIndex];
if (currentIndex < this.tokens.length - 1) {
currentIndex++;
}
return token;
},
isAtEnd: () => currentIndex >= this.tokens.length - 1,
previous: () => {
const prevIndex = Math.max(0, currentIndex - 1);
return this.tokens[prevIndex] || eofToken;
}
};
}
private createEOFToken(): Token {
return {
type: TokenType.EOF,
value: '',
position: this.getPosition(),
range: {
start: this.getPosition(),
end: this.getPosition()
}
};
}
// Utility methods
private match(expected: string): boolean {
if (this.isAtEnd()) return false;
if (this.state.source.charAt(this.state.current) !== expected) return false;
this.state.current++;
this.state.column++;
return true;
}
private advance(): string {
if (this.isAtEnd()) return '\0';
const char = this.state.source.charAt(this.state.current);
this.state.current++;
this.state.column++;
return char;
}
private peek(): string {
if (this.isAtEnd()) return '\0';
return this.state.source.charAt(this.state.current);
}
private peekNext(): string {
if (this.state.current + 1 >= this.state.source.length) return '\0';
return this.state.source.charAt(this.state.current + 1);
}
private isAtEnd(): boolean {
return this.state.current >= this.state.source.length;
}
private addToken(type: TokenType, value: string, startPosition?: SourcePosition): void {
const start = startPosition || this.getPosition();
const end = this.getPosition();
const token: Token = {
type,
value,
position: start,
range: { start, end }
};
this.tokens.push(token);
}
private getPosition(): SourcePosition {
return {
line: this.state.line,
column: this.state.column,
offset: this.state.current
};
}
private throwError(message: string): never {
const error = new LexicalError(
message,
this.getPosition(),
this.peek(),
this.state.filename
);
if (this.options.enableRecovery && this.state.errors.length < this.options.maxErrors) {
this.state.errors.push(error);
// Skip the problematic character and continue
this.advance();
throw error; // Still throw for proper error handling
}
throw error;
}
}
/**
* Default token processors
*/
export const defaultTokenProcessors: TokenProcessor[] = [
{
name: 'context-validator',
handles: [TokenType.CLIENT, TokenType.SERVER, TokenType.MARKUP],
process: (token: Token, context: LexicalContext): Token | null => {
// Validate that block keywords are used in appropriate contexts
if (context !== LexicalContext.COMPONENT &&
[TokenType.CLIENT, TokenType.SERVER, TokenType.MARKUP].includes(token.type)) {
// Could emit warning here
}
return token;
}
},
{
name: 'identifier-enhancer',
handles: [TokenType.IDENTIFIER],
process: (token: Token, context: LexicalContext): Token | null => {
// Could enhance identifiers with additional metadata
return token;
}
}
];