UNPKG

petcarescript

Version:

PetCareScript - A modern, expressive programming language designed for humans

500 lines (450 loc) 16.1 kB
/** * PetCareScript Tokenizer * Analyzes source code and converts it to tokens */ const { TokenType, Token } = require('./tokens'); class Tokenizer { constructor(source) { this.source = source; this.tokens = []; this.start = 0; this.current = 0; this.line = 1; this.column = 1; } tokenize() { while (!this.isAtEnd()) { this.start = this.current; this.scanToken(); } this.tokens.push(new Token(TokenType.EOF, '', null, this.line, this.column)); return this.tokens; } scanToken() { const c = this.advance(); switch (c) { case ' ': case '\r': case '\t': this.column++; break; case '\n': this.line++; this.column = 1; break; case '(': this.addToken(TokenType.LEFT_PAREN); break; case ')': this.addToken(TokenType.RIGHT_PAREN); break; case '{': this.addToken(TokenType.LEFT_BRACE); break; case '}': this.addToken(TokenType.RIGHT_BRACE); break; case '[': this.addToken(TokenType.LEFT_BRACKET); break; case ']': this.addToken(TokenType.RIGHT_BRACKET); break; case ',': this.addToken(TokenType.COMMA); break; case '.': // Check for spread operator (...) if (this.match('.') && this.match('.')) { this.addToken(TokenType.DOT_DOT_DOT); } else { this.addToken(TokenType.DOT); } break; case '-': if (this.match('-')) { this.addToken(TokenType.MINUS_MINUS); } else if (this.match('=')) { this.addToken(TokenType.MINUS_EQUAL); } else if (this.match('>')) { this.addToken(TokenType.ARROW); } else { this.addToken(TokenType.MINUS); } break; case '+': if (this.match('+')) { this.addToken(TokenType.PLUS_PLUS); } else if (this.match('=')) { this.addToken(TokenType.PLUS_EQUAL); } else { this.addToken(TokenType.PLUS); } break; case ';': this.addToken(TokenType.SEMICOLON); break; case '*': this.addToken(TokenType.STAR); break; case '%': this.addToken(TokenType.PERCENT); break; case ':': this.addToken(TokenType.COLON); break; case '?': this.addToken(TokenType.QUESTION); break; case '@': this.addToken(TokenType.AT); break; case '#': this.addToken(TokenType.HASH); break; case '!': this.addToken(this.match('=') ? TokenType.BANG_EQUAL : TokenType.BANG); break; case '=': if (this.match('=')) { this.addToken(this.match('=') ? TokenType.EQUAL_EQUAL_EQUAL : TokenType.EQUAL_EQUAL); } else if (this.match('>')) { this.addToken(TokenType.ARROW); } else { this.addToken(TokenType.EQUAL); } break; case '<': this.addToken(this.match('=') ? TokenType.LESS_EQUAL : TokenType.LESS); break; case '>': this.addToken(this.match('=') ? TokenType.GREATER_EQUAL : TokenType.GREATER); break; case '&': if (this.match('&')) { this.addToken(TokenType.AND_AND); } else { this.addToken(TokenType.AMPERSAND); } break; case '|': if (this.match('|')) { this.addToken(TokenType.OR_OR); } else { this.addToken(TokenType.PIPE); } break; case '/': if (this.match('/')) { while (this.peek() !== '\n' && !this.isAtEnd()) this.advance(); } else if (this.match('*')) { this.blockComment(); } else { this.addToken(TokenType.SLASH); } break; case '"': this.string(); break; case "'": this.singleQuoteString(); break; case '`': this.templateString(); break; default: if (this.isDigit(c)) { this.number(); } else if (this.isAlpha(c)) { this.identifier(); } else { throw new Error(`Unexpected character: ${c} at line ${this.line}`); } break; } } templateString() { let value = ''; const expressions = []; while (this.peek() !== '`' && !this.isAtEnd()) { if (this.peek() === '${') { // Add the current string part if (value.length > 0) { this.addToken(TokenType.TEMPLATE_STRING, value); value = ''; } // Skip ${ this.advance(); // $ this.advance(); // { // Parse the expression inside ${} let braceCount = 1; let expr = ''; while (braceCount > 0 && !this.isAtEnd()) { const ch = this.advance(); if (ch === '{') braceCount++; else if (ch === '}') braceCount--; if (braceCount > 0) { expr += ch; } } // Tokenize the expression const exprTokenizer = new Tokenizer(expr); const exprTokens = exprTokenizer.tokenize(); expressions.push(exprTokens); this.addToken(TokenType.TEMPLATE_EXPRESSION, exprTokens); } else { if (this.peek() === '\n') { this.line++; this.column = 1; } if (this.peek() === '\\') { this.advance(); // consume backslash value += this.advance(); // consume escaped character } else { value += this.advance(); } } } if (this.isAtEnd()) { throw new Error(`Unterminated template string at line ${this.line}`); } this.advance(); // closing ` if (value.length > 0) { this.addToken(TokenType.TEMPLATE_STRING, value); } this.addToken(TokenType.TEMPLATE_END); } blockComment() { while (!this.isAtEnd()) { if (this.peek() === '*' && this.peekNext() === '/') { this.advance(); // consume '*' this.advance(); // consume '/' break; } if (this.peek() === '\n') { this.line++; this.column = 1; } this.advance(); } } identifier() { while (this.isAlphaNumeric(this.peek())) this.advance(); const text = this.source.substring(this.start, this.current); const type = this.getKeywordType(text) || TokenType.IDENTIFIER; this.addToken(type); } getKeywordType(text) { const keywords = { // PetCareScript original keywords 'store': TokenType.STORE, 'when': TokenType.WHEN, 'otherwise': TokenType.OTHERWISE, 'repeat': TokenType.REPEAT, 'loop': TokenType.LOOP, 'again': TokenType.AGAIN, 'until': TokenType.UNTIL, 'unless': TokenType.UNLESS, 'foreach': TokenType.FOREACH, 'build': TokenType.BUILD, 'give': TokenType.GIVE, 'yes': TokenType.YES, 'no': TokenType.NO, 'empty': TokenType.EMPTY, 'show': TokenType.SHOW, 'blueprint': TokenType.BLUEPRINT, 'self': TokenType.SELF, 'parent': TokenType.PARENT, 'also': TokenType.ALSO, 'either': TokenType.EITHER, 'and': TokenType.AND, 'or': TokenType.OR, 'not': TokenType.NOT, 'is': TokenType.IS, 'isnt': TokenType.ISNT, 'between': TokenType.BETWEEN, 'contains': TokenType.CONTAINS, // MANTIDO como keyword para usar como operador 'in': TokenType.IN, 'like': TokenType.LIKE, 'break': TokenType.BREAK, 'continue': TokenType.CONTINUE, 'import': TokenType.IMPORT, 'export': TokenType.EXPORT, 'attempt': TokenType.ATTEMPT, 'catch': TokenType.CATCH, 'finally': TokenType.FINALLY, 'throw': TokenType.THROW, 'switch': TokenType.SWITCH, 'case': TokenType.CASE, 'default': TokenType.DEFAULT, // Type system 'interface': TokenType.INTERFACE, 'enum': TokenType.ENUM, 'type': TokenType.TYPE, 'namespace': TokenType.NAMESPACE, 'module': TokenType.MODULE, 'void': TokenType.VOID, // JavaScript-style alternatives 'async': TokenType.ASYNC, 'await': TokenType.AWAIT, 'try': TokenType.TRY, 'class': TokenType.CLASS, 'function': TokenType.FUNCTION, 'let': TokenType.LET, 'const': TokenType.CONST, 'var': TokenType.VAR, 'if': TokenType.IF, 'else': TokenType.ELSE, 'while': TokenType.WHILE, 'for': TokenType.FOR, 'return': TokenType.RETURN, 'true': TokenType.TRUE, 'false': TokenType.FALSE, 'null': TokenType.NULL, 'undefined': TokenType.UNDEFINED, 'this': TokenType.THIS, 'super': TokenType.SUPER, 'new': TokenType.NEW, 'delete': TokenType.DELETE, 'instanceof': TokenType.INSTANCEOF, 'of': TokenType.OF, 'static': TokenType.STATIC, 'public': TokenType.PUBLIC, 'private': TokenType.PRIVATE, 'protected': TokenType.PROTECTED, 'extends': TokenType.EXTENDS, 'implements': TokenType.IMPLEMENTS, 'from': TokenType.FROM, 'as': TokenType.AS, 'with': TokenType.WITH, 'do': TokenType.DO, 'goto': TokenType.GOTO, 'label': TokenType.LABEL, 'yield': TokenType.YIELD, 'generator': TokenType.GENERATOR, 'console': TokenType.CONSOLE, // Testing keywords 'describe': TokenType.DESCRIBE, 'it': TokenType.IT, 'expect': TokenType.EXPECT, 'mock': TokenType.MOCK, 'spy': TokenType.SPY, 'assert': TokenType.ASSERT, 'before': TokenType.BEFORE, 'after': TokenType.AFTER, 'beforeEach': TokenType.BEFORE_EACH, 'afterEach': TokenType.AFTER_EACH, // Decorator keywords 'decorator': TokenType.DECORATOR, // HTTP keywords (apenas os essenciais como keywords) 'server': TokenType.SERVER, 'route': TokenType.ROUTE, 'middleware': TokenType.MIDDLEWARE, 'createServer': TokenType.CREATE_SERVER, // Promise keywords (apenas os essenciais) 'Promise': TokenType.PROMISE, 'log': TokenType.LOG }; return keywords[text]; } number() { while (this.isDigit(this.peek())) this.advance(); if (this.peek() === '.' && this.isDigit(this.peekNext())) { this.advance(); while (this.isDigit(this.peek())) this.advance(); } this.addToken(TokenType.NUMBER, parseFloat(this.source.substring(this.start, this.current))); } string() { while (this.peek() !== '"' && !this.isAtEnd()) { if (this.peek() === '\n') { this.line++; this.column = 1; } if (this.peek() === '\\') { this.advance(); // consume backslash this.advance(); // consume escaped character } else { this.advance(); } } if (this.isAtEnd()) { throw new Error(`Unterminated string at line ${this.line}`); } this.advance(); const value = this.source.substring(this.start + 1, this.current - 1); this.addToken(TokenType.STRING, this.processEscapes(value)); } singleQuoteString() { while (this.peek() !== "'" && !this.isAtEnd()) { if (this.peek() === '\n') { this.line++; this.column = 1; } if (this.peek() === '\\') { this.advance(); // consume backslash this.advance(); // consume escaped character } else { this.advance(); } } if (this.isAtEnd()) { throw new Error(`Unterminated string at line ${this.line}`); } this.advance(); const value = this.source.substring(this.start + 1, this.current - 1); this.addToken(TokenType.STRING, this.processEscapes(value)); } processEscapes(str) { return str.replace(/\\(.)/g, (match, char) => { switch (char) { case 'n': return '\n'; case 't': return '\t'; case 'r': return '\r'; case '\\': return '\\'; case '"': return '"'; case "'": return "'"; default: return char; } }); } match(expected) { if (this.isAtEnd()) return false; if (this.source.charAt(this.current) !== expected) return false; this.current++; this.column++; return true; } peek() { if (this.isAtEnd()) return '\0'; return this.source.charAt(this.current); } peekNext() { if (this.current + 1 >= this.source.length) return '\0'; return this.source.charAt(this.current + 1); } isAlpha(c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c === '_'; } isAlphaNumeric(c) { return this.isAlpha(c) || this.isDigit(c); } isDigit(c) { return c >= '0' && c <= '9'; } isAtEnd() { return this.current >= this.source.length; } advance() { this.column++; return this.source.charAt(this.current++); } addToken(type, literal = null) { const text = this.source.substring(this.start, this.current); this.tokens.push(new Token(type, text, literal, this.line, this.column)); } } module.exports = Tokenizer;