petcarescript
Version:
PetCareScript - A modern, expressive programming language designed for humans
500 lines (450 loc) • 16.1 kB
JavaScript
/**
* PetCareScript Tokenizer
* Analyzes source code and converts it to tokens
*/
const { TokenType, Token } = require('./tokens');
class Tokenizer {
constructor(source) {
this.source = source;
this.tokens = [];
this.start = 0;
this.current = 0;
this.line = 1;
this.column = 1;
}
tokenize() {
while (!this.isAtEnd()) {
this.start = this.current;
this.scanToken();
}
this.tokens.push(new Token(TokenType.EOF, '', null, this.line, this.column));
return this.tokens;
}
scanToken() {
const c = this.advance();
switch (c) {
case ' ':
case '\r':
case '\t':
this.column++;
break;
case '\n':
this.line++;
this.column = 1;
break;
case '(':
this.addToken(TokenType.LEFT_PAREN);
break;
case ')':
this.addToken(TokenType.RIGHT_PAREN);
break;
case '{':
this.addToken(TokenType.LEFT_BRACE);
break;
case '}':
this.addToken(TokenType.RIGHT_BRACE);
break;
case '[':
this.addToken(TokenType.LEFT_BRACKET);
break;
case ']':
this.addToken(TokenType.RIGHT_BRACKET);
break;
case ',':
this.addToken(TokenType.COMMA);
break;
case '.':
// Check for spread operator (...)
if (this.match('.') && this.match('.')) {
this.addToken(TokenType.DOT_DOT_DOT);
} else {
this.addToken(TokenType.DOT);
}
break;
case '-':
if (this.match('-')) {
this.addToken(TokenType.MINUS_MINUS);
} else if (this.match('=')) {
this.addToken(TokenType.MINUS_EQUAL);
} else if (this.match('>')) {
this.addToken(TokenType.ARROW);
} else {
this.addToken(TokenType.MINUS);
}
break;
case '+':
if (this.match('+')) {
this.addToken(TokenType.PLUS_PLUS);
} else if (this.match('=')) {
this.addToken(TokenType.PLUS_EQUAL);
} else {
this.addToken(TokenType.PLUS);
}
break;
case ';':
this.addToken(TokenType.SEMICOLON);
break;
case '*':
this.addToken(TokenType.STAR);
break;
case '%':
this.addToken(TokenType.PERCENT);
break;
case ':':
this.addToken(TokenType.COLON);
break;
case '?':
this.addToken(TokenType.QUESTION);
break;
case '@':
this.addToken(TokenType.AT);
break;
case '#':
this.addToken(TokenType.HASH);
break;
case '!':
this.addToken(this.match('=') ? TokenType.BANG_EQUAL : TokenType.BANG);
break;
case '=':
if (this.match('=')) {
this.addToken(this.match('=') ? TokenType.EQUAL_EQUAL_EQUAL : TokenType.EQUAL_EQUAL);
} else if (this.match('>')) {
this.addToken(TokenType.ARROW);
} else {
this.addToken(TokenType.EQUAL);
}
break;
case '<':
this.addToken(this.match('=') ? TokenType.LESS_EQUAL : TokenType.LESS);
break;
case '>':
this.addToken(this.match('=') ? TokenType.GREATER_EQUAL : TokenType.GREATER);
break;
case '&':
if (this.match('&')) {
this.addToken(TokenType.AND_AND);
} else {
this.addToken(TokenType.AMPERSAND);
}
break;
case '|':
if (this.match('|')) {
this.addToken(TokenType.OR_OR);
} else {
this.addToken(TokenType.PIPE);
}
break;
case '/':
if (this.match('/')) {
while (this.peek() !== '\n' && !this.isAtEnd()) this.advance();
} else if (this.match('*')) {
this.blockComment();
} else {
this.addToken(TokenType.SLASH);
}
break;
case '"':
this.string();
break;
case "'":
this.singleQuoteString();
break;
case '`':
this.templateString();
break;
default:
if (this.isDigit(c)) {
this.number();
} else if (this.isAlpha(c)) {
this.identifier();
} else {
throw new Error(`Unexpected character: ${c} at line ${this.line}`);
}
break;
}
}
templateString() {
let value = '';
const expressions = [];
while (this.peek() !== '`' && !this.isAtEnd()) {
if (this.peek() === '${') {
// Add the current string part
if (value.length > 0) {
this.addToken(TokenType.TEMPLATE_STRING, value);
value = '';
}
// Skip ${
this.advance(); // $
this.advance(); // {
// Parse the expression inside ${}
let braceCount = 1;
let expr = '';
while (braceCount > 0 && !this.isAtEnd()) {
const ch = this.advance();
if (ch === '{') braceCount++;
else if (ch === '}') braceCount--;
if (braceCount > 0) {
expr += ch;
}
}
// Tokenize the expression
const exprTokenizer = new Tokenizer(expr);
const exprTokens = exprTokenizer.tokenize();
expressions.push(exprTokens);
this.addToken(TokenType.TEMPLATE_EXPRESSION, exprTokens);
} else {
if (this.peek() === '\n') {
this.line++;
this.column = 1;
}
if (this.peek() === '\\') {
this.advance(); // consume backslash
value += this.advance(); // consume escaped character
} else {
value += this.advance();
}
}
}
if (this.isAtEnd()) {
throw new Error(`Unterminated template string at line ${this.line}`);
}
this.advance(); // closing `
if (value.length > 0) {
this.addToken(TokenType.TEMPLATE_STRING, value);
}
this.addToken(TokenType.TEMPLATE_END);
}
blockComment() {
while (!this.isAtEnd()) {
if (this.peek() === '*' && this.peekNext() === '/') {
this.advance(); // consume '*'
this.advance(); // consume '/'
break;
}
if (this.peek() === '\n') {
this.line++;
this.column = 1;
}
this.advance();
}
}
identifier() {
while (this.isAlphaNumeric(this.peek())) this.advance();
const text = this.source.substring(this.start, this.current);
const type = this.getKeywordType(text) || TokenType.IDENTIFIER;
this.addToken(type);
}
getKeywordType(text) {
const keywords = {
// PetCareScript original keywords
'store': TokenType.STORE,
'when': TokenType.WHEN,
'otherwise': TokenType.OTHERWISE,
'repeat': TokenType.REPEAT,
'loop': TokenType.LOOP,
'again': TokenType.AGAIN,
'until': TokenType.UNTIL,
'unless': TokenType.UNLESS,
'foreach': TokenType.FOREACH,
'build': TokenType.BUILD,
'give': TokenType.GIVE,
'yes': TokenType.YES,
'no': TokenType.NO,
'empty': TokenType.EMPTY,
'show': TokenType.SHOW,
'blueprint': TokenType.BLUEPRINT,
'self': TokenType.SELF,
'parent': TokenType.PARENT,
'also': TokenType.ALSO,
'either': TokenType.EITHER,
'and': TokenType.AND,
'or': TokenType.OR,
'not': TokenType.NOT,
'is': TokenType.IS,
'isnt': TokenType.ISNT,
'between': TokenType.BETWEEN,
'contains': TokenType.CONTAINS, // MANTIDO como keyword para usar como operador
'in': TokenType.IN,
'like': TokenType.LIKE,
'break': TokenType.BREAK,
'continue': TokenType.CONTINUE,
'import': TokenType.IMPORT,
'export': TokenType.EXPORT,
'attempt': TokenType.ATTEMPT,
'catch': TokenType.CATCH,
'finally': TokenType.FINALLY,
'throw': TokenType.THROW,
'switch': TokenType.SWITCH,
'case': TokenType.CASE,
'default': TokenType.DEFAULT,
// Type system
'interface': TokenType.INTERFACE,
'enum': TokenType.ENUM,
'type': TokenType.TYPE,
'namespace': TokenType.NAMESPACE,
'module': TokenType.MODULE,
'void': TokenType.VOID,
// JavaScript-style alternatives
'async': TokenType.ASYNC,
'await': TokenType.AWAIT,
'try': TokenType.TRY,
'class': TokenType.CLASS,
'function': TokenType.FUNCTION,
'let': TokenType.LET,
'const': TokenType.CONST,
'var': TokenType.VAR,
'if': TokenType.IF,
'else': TokenType.ELSE,
'while': TokenType.WHILE,
'for': TokenType.FOR,
'return': TokenType.RETURN,
'true': TokenType.TRUE,
'false': TokenType.FALSE,
'null': TokenType.NULL,
'undefined': TokenType.UNDEFINED,
'this': TokenType.THIS,
'super': TokenType.SUPER,
'new': TokenType.NEW,
'delete': TokenType.DELETE,
'instanceof': TokenType.INSTANCEOF,
'of': TokenType.OF,
'static': TokenType.STATIC,
'public': TokenType.PUBLIC,
'private': TokenType.PRIVATE,
'protected': TokenType.PROTECTED,
'extends': TokenType.EXTENDS,
'implements': TokenType.IMPLEMENTS,
'from': TokenType.FROM,
'as': TokenType.AS,
'with': TokenType.WITH,
'do': TokenType.DO,
'goto': TokenType.GOTO,
'label': TokenType.LABEL,
'yield': TokenType.YIELD,
'generator': TokenType.GENERATOR,
'console': TokenType.CONSOLE,
// Testing keywords
'describe': TokenType.DESCRIBE,
'it': TokenType.IT,
'expect': TokenType.EXPECT,
'mock': TokenType.MOCK,
'spy': TokenType.SPY,
'assert': TokenType.ASSERT,
'before': TokenType.BEFORE,
'after': TokenType.AFTER,
'beforeEach': TokenType.BEFORE_EACH,
'afterEach': TokenType.AFTER_EACH,
// Decorator keywords
'decorator': TokenType.DECORATOR,
// HTTP keywords (apenas os essenciais como keywords)
'server': TokenType.SERVER,
'route': TokenType.ROUTE,
'middleware': TokenType.MIDDLEWARE,
'createServer': TokenType.CREATE_SERVER,
// Promise keywords (apenas os essenciais)
'Promise': TokenType.PROMISE,
'log': TokenType.LOG
};
return keywords[text];
}
number() {
while (this.isDigit(this.peek())) this.advance();
if (this.peek() === '.' && this.isDigit(this.peekNext())) {
this.advance();
while (this.isDigit(this.peek())) this.advance();
}
this.addToken(TokenType.NUMBER,
parseFloat(this.source.substring(this.start, this.current)));
}
string() {
while (this.peek() !== '"' && !this.isAtEnd()) {
if (this.peek() === '\n') {
this.line++;
this.column = 1;
}
if (this.peek() === '\\') {
this.advance(); // consume backslash
this.advance(); // consume escaped character
} else {
this.advance();
}
}
if (this.isAtEnd()) {
throw new Error(`Unterminated string at line ${this.line}`);
}
this.advance();
const value = this.source.substring(this.start + 1, this.current - 1);
this.addToken(TokenType.STRING, this.processEscapes(value));
}
singleQuoteString() {
while (this.peek() !== "'" && !this.isAtEnd()) {
if (this.peek() === '\n') {
this.line++;
this.column = 1;
}
if (this.peek() === '\\') {
this.advance(); // consume backslash
this.advance(); // consume escaped character
} else {
this.advance();
}
}
if (this.isAtEnd()) {
throw new Error(`Unterminated string at line ${this.line}`);
}
this.advance();
const value = this.source.substring(this.start + 1, this.current - 1);
this.addToken(TokenType.STRING, this.processEscapes(value));
}
processEscapes(str) {
return str.replace(/\\(.)/g, (match, char) => {
switch (char) {
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
case '\\': return '\\';
case '"': return '"';
case "'": return "'";
default: return char;
}
});
}
match(expected) {
if (this.isAtEnd()) return false;
if (this.source.charAt(this.current) !== expected) return false;
this.current++;
this.column++;
return true;
}
peek() {
if (this.isAtEnd()) return '\0';
return this.source.charAt(this.current);
}
peekNext() {
if (this.current + 1 >= this.source.length) return '\0';
return this.source.charAt(this.current + 1);
}
isAlpha(c) {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c === '_';
}
isAlphaNumeric(c) {
return this.isAlpha(c) || this.isDigit(c);
}
isDigit(c) {
return c >= '0' && c <= '9';
}
isAtEnd() {
return this.current >= this.source.length;
}
advance() {
this.column++;
return this.source.charAt(this.current++);
}
addToken(type, literal = null) {
const text = this.source.substring(this.start, this.current);
this.tokens.push(new Token(type, text, literal, this.line, this.column));
}
}
module.exports = Tokenizer;