@ordojs/core
Version:
Core compiler and runtime for OrdoJS framework
460 lines (413 loc) • 12.1 kB
text/typescript
/**
* @fileoverview OrdoJS Lexer - Full implementation for lexical analysis
*/
import { LexicalContext, LexicalError, TokenType, type SourcePosition, type Token, type TokenStream } from '../types/index.js';
export class OrdoJSLexer {
private source: string;
private current: number = 0;
private line: number = 1;
private column: number = 1;
private tokens: Token[] = [];
private filename: string;
private contextStack: LexicalContext[] = [LexicalContext.COMPONENT];
constructor(source: string = '', filename: string = 'unknown') {
this.source = source;
this.filename = filename;
}
tokenize(): TokenStream {
this.current = 0;
this.line = 1;
this.column = 1;
this.tokens = [];
this.contextStack = [LexicalContext.COMPONENT];
while (!this.isAtEnd()) {
this.scanToken();
}
this.addToken(TokenType.EOF, '');
let currentIndex = 0;
const eofToken = this.tokens[this.tokens.length - 1]!;
return {
tokens: this.tokens,
current: 0,
peek: () => {
if (currentIndex >= this.tokens.length) return eofToken;
return this.tokens[currentIndex] || eofToken;
},
advance: () => {
if (currentIndex >= this.tokens.length) return eofToken;
const token = this.tokens[currentIndex] || eofToken;
currentIndex++;
return token;
},
isAtEnd: () => currentIndex >= this.tokens.length,
previous: () => {
const prevIndex = Math.max(0, currentIndex - 1);
return this.tokens[prevIndex] || this.tokens[0] || eofToken;
}
};
}
private scanToken(): void {
const char = this.advance();
switch (char) {
case ' ':
case '\r':
case '\t':
// Ignore whitespace
break;
case '\n':
this.line++;
this.column = 1;
break;
case '(':
this.addToken(TokenType.LEFT_PAREN, char);
break;
case ')':
this.addToken(TokenType.RIGHT_PAREN, char);
break;
case '{':
this.addToken(TokenType.LEFT_BRACE, char);
this.updateContext(char);
break;
case '}':
this.addToken(TokenType.RIGHT_BRACE, char);
this.updateContext(char);
break;
case '[':
this.addToken(TokenType.LEFT_BRACKET, char);
break;
case ']':
this.addToken(TokenType.RIGHT_BRACKET, char);
break;
case ',':
this.addToken(TokenType.COMMA, char);
break;
case '.':
this.addToken(TokenType.DOT, char);
break;
case ';':
this.addToken(TokenType.SEMICOLON, char);
break;
case ':':
this.addToken(TokenType.COLON, char);
break;
case '?':
this.addToken(TokenType.QUESTION, char);
break;
case '+':
if (this.match('+')) {
this.addToken(TokenType.INCREMENT, '++');
} else {
this.addToken(TokenType.PLUS, char);
}
break;
case '-':
if (this.match('-')) {
this.addToken(TokenType.DECREMENT, '--');
} else {
this.addToken(TokenType.MINUS, char);
}
break;
case '*':
this.addToken(TokenType.MULTIPLY, char);
break;
case '/':
if (this.match('/')) {
// Single-line comment
while (this.peek() !== '\n' && !this.isAtEnd()) {
this.advance();
}
} else if (this.match('*')) {
// Multi-line comment
this.scanMultiLineComment();
} else {
this.addToken(TokenType.DIVIDE, char);
}
break;
case '%':
this.addToken(TokenType.MODULO, char);
break;
case '=':
if (this.match('=')) {
this.addToken(TokenType.EQUALS, '==');
} else {
this.addToken(TokenType.ASSIGN, char);
}
break;
case '!':
if (this.match('=')) {
this.addToken(TokenType.NOT_EQUALS, '!=');
} else {
this.addToken(TokenType.LOGICAL_NOT, char);
}
break;
case '<':
if (this.getCurrentContext() === LexicalContext.MARKUP_BLOCK) {
this.scanHTMLTag();
} else {
if (this.match('=')) {
this.addToken(TokenType.LESS_EQUAL, '<=');
} else {
this.addToken(TokenType.LESS_THAN, char);
}
}
break;
case '>':
if (this.match('=')) {
this.addToken(TokenType.GREATER_EQUAL, '>=');
} else {
this.addToken(TokenType.GREATER_THAN, char);
}
break;
case '&':
if (this.match('&')) {
this.addToken(TokenType.LOGICAL_AND, '&&');
} else {
this.throwError(`Unexpected character: ${char}`);
}
break;
case '|':
if (this.match('|')) {
this.addToken(TokenType.LOGICAL_OR, '||');
} else {
this.throwError(`Unexpected character: ${char}`);
}
break;
case '"':
this.scanString('"');
break;
case "'":
this.scanString("'");
break;
case '`':
this.scanTemplateString();
break;
default:
if (this.isDigit(char)) {
this.scanNumber();
} else if (this.isAlpha(char)) {
this.scanIdentifier();
} else {
this.throwError(`Unexpected character: ${char}`);
}
break;
}
}
private scanString(quote: string): void {
const startPosition = this.getPosition();
let value = '';
while (this.peek() !== quote && !this.isAtEnd()) {
if (this.peek() === '\n') {
this.line++;
this.column = 1;
} else if (this.peek() === '\\') {
this.advance(); // Skip backslash
const escaped = this.advance();
switch (escaped) {
case 'n':
value += '\n';
break;
case 't':
value += '\t';
break;
case 'r':
value += '\r';
break;
case '\\':
value += '\\';
break;
case '"':
value += '"';
break;
case "'":
value += "'";
break;
default:
value += escaped;
}
} else {
value += this.advance();
}
}
if (this.isAtEnd()) {
this.throwError('Unterminated string');
}
// Consume the closing quote
this.advance();
this.addToken(TokenType.STRING, value, startPosition);
}
private scanTemplateString(): void {
const startPosition = this.getPosition();
let value = '';
while (this.peek() !== '`' && !this.isAtEnd()) {
if (this.peek() === '\n') {
this.line++;
this.column = 1;
}
value += this.advance();
}
if (this.isAtEnd()) {
this.throwError('Unterminated template string');
}
// Consume the closing backtick
this.advance();
this.addToken(TokenType.STRING, value, startPosition);
}
private scanNumber(): void {
const startPosition = this.getPosition();
while (this.isDigit(this.peek())) {
this.advance();
}
// Look for a decimal part
if (this.peek() === '.' && this.isDigit(this.peekNext())) {
this.advance(); // Consume the '.'
while (this.isDigit(this.peek())) {
this.advance();
}
}
const value = this.source.substring(startPosition.offset, this.current);
this.addToken(TokenType.NUMBER, value, startPosition);
}
private scanIdentifier(): void {
const startPosition = this.getPosition();
// The first character was already consumed in scanToken, so we need to include it
const firstChar = this.source.charAt(this.current - 1);
let value = firstChar;
while (this.isAlphaNumeric(this.peek())) {
value += this.advance();
}
const type = this.getKeywordType(value);
this.addToken(type, value, startPosition);
}
private scanHTMLTag(): void {
const startPosition = this.getPosition();
if (this.match('/')) {
// Closing tag
this.addToken(TokenType.HTML_TAG_CLOSE, '</', startPosition);
} else {
// Opening tag
this.addToken(TokenType.HTML_TAG_OPEN, '<', startPosition);
}
}
private scanMultiLineComment(): void {
while (!this.isAtEnd()) {
if (this.peek() === '*' && this.peekNext() === '/') {
this.advance(); // Consume '*'
this.advance(); // Consume '/'
return;
}
if (this.peek() === '\n') {
this.line++;
this.column = 1;
}
this.advance();
}
this.throwError('Unterminated comment');
}
private updateContext(char: string): void {
// This is a simplified context management - in a real implementation,
// you'd need more sophisticated logic to track context transitions
if (char === '{') {
// Push new context based on previous tokens
const lastToken = this.tokens[this.tokens.length - 2];
if (lastToken?.value === 'client') {
this.contextStack.push(LexicalContext.CLIENT_BLOCK);
} else if (lastToken?.value === 'server') {
this.contextStack.push(LexicalContext.SERVER_BLOCK);
} else if (lastToken?.value === 'markup') {
this.contextStack.push(LexicalContext.MARKUP_BLOCK);
} else {
this.contextStack.push(LexicalContext.JAVASCRIPT);
}
} else if (char === '}') {
if (this.contextStack.length > 1) {
this.contextStack.pop();
}
}
}
private getCurrentContext(): LexicalContext {
return this.contextStack[this.contextStack.length - 1] || LexicalContext.COMPONENT;
}
private getKeywordType(value: string): TokenType {
const keywords: Record<string, TokenType> = {
'component': TokenType.COMPONENT,
'client': TokenType.CLIENT,
'server': TokenType.SERVER,
'markup': TokenType.MARKUP,
'let': TokenType.LET,
'const': TokenType.CONST,
'if': TokenType.IF,
'else': TokenType.ELSE,
'each': TokenType.EACH,
'public': TokenType.PUBLIC,
'bind': TokenType.BIND,
'true': TokenType.BOOLEAN,
'false': TokenType.BOOLEAN,
};
return keywords[value] || TokenType.IDENTIFIER;
}
private match(expected: string): boolean {
if (this.isAtEnd()) return false;
if (this.source.charAt(this.current) !== expected) return false;
this.current++;
this.column++;
return true;
}
private advance(): string {
if (this.isAtEnd()) return '\0';
const char = this.source.charAt(this.current);
this.current++;
this.column++;
return char;
}
private peek(): string {
if (this.isAtEnd()) return '\0';
return this.source.charAt(this.current);
}
private peekNext(): string {
if (this.current + 1 >= this.source.length) return '\0';
return this.source.charAt(this.current + 1);
}
private isAtEnd(): boolean {
return this.current >= this.source.length;
}
private isDigit(char: string): boolean {
return char >= '0' && char <= '9';
}
private isAlpha(char: string): boolean {
return (char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z') ||
char === '_';
}
private isAlphaNumeric(char: string): boolean {
return this.isAlpha(char) || this.isDigit(char);
}
private addToken(type: TokenType, value: string, startPosition?: SourcePosition): void {
const start = startPosition || this.getPosition();
const end = this.getPosition();
const token: Token = {
type,
value,
position: start,
range: {
start,
end
}
};
this.tokens.push(token);
}
private getPosition(): SourcePosition {
return {
line: this.line,
column: this.column,
offset: this.current
};
}
private throwError(message: string): never {
throw new LexicalError(
message,
this.getPosition(),
this.peek(),
this.filename
);
}
}