UNPKG

@prism-lang/core

Version:

A programming language for uncertainty

667 lines 26.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Tokenizer = exports.TokenType = void 0; exports.tokenize = tokenize; var TokenType; (function (TokenType) { // Literals TokenType["NUMBER"] = "NUMBER"; TokenType["STRING"] = "STRING"; TokenType["INTERPOLATED_STRING"] = "INTERPOLATED_STRING"; TokenType["IDENTIFIER"] = "IDENTIFIER"; // Keywords TokenType["IF"] = "IF"; TokenType["ELSE"] = "ELSE"; TokenType["UNCERTAIN"] = "UNCERTAIN"; TokenType["HIGH"] = "HIGH"; TokenType["MEDIUM"] = "MEDIUM"; TokenType["LOW"] = "LOW"; TokenType["DEFAULT"] = "DEFAULT"; TokenType["IN"] = "IN"; TokenType["CONTEXT"] = "CONTEXT"; TokenType["SHIFTING"] = "SHIFTING"; TokenType["TO"] = "TO"; TokenType["AGENTS"] = "AGENTS"; TokenType["AGENT"] = "AGENT"; TokenType["CONFIDENCE"] = "CONFIDENCE"; TokenType["FUNCTION"] = "FUNCTION"; TokenType["RETURN"] = "RETURN"; TokenType["LET"] = "LET"; TokenType["CONST"] = "CONST"; TokenType["TRUE"] = "TRUE"; TokenType["FALSE"] = "FALSE"; TokenType["NULL"] = "NULL"; TokenType["UNDEFINED"] = "UNDEFINED"; TokenType["FOR"] = "FOR"; TokenType["WHILE"] = "WHILE"; TokenType["DO"] = "DO"; TokenType["BREAK"] = "BREAK"; TokenType["CONTINUE"] = "CONTINUE"; TokenType["TYPEOF"] = "TYPEOF"; TokenType["INSTANCEOF"] = "INSTANCEOF"; TokenType["IMPORT"] = "IMPORT"; TokenType["EXPORT"] = "EXPORT"; TokenType["FROM"] = "FROM"; TokenType["AS"] = "AS"; TokenType["ASYNC"] = "ASYNC"; TokenType["AWAIT"] = "AWAIT"; // Operators TokenType["PLUS"] = "PLUS"; TokenType["MINUS"] = "MINUS"; TokenType["STAR"] = "STAR"; TokenType["STAR_STAR"] = "STAR_STAR"; TokenType["SLASH"] = "SLASH"; TokenType["PERCENT"] = "PERCENT"; TokenType["EQUAL"] = "EQUAL"; TokenType["EQUAL_EQUAL"] = "EQUAL_EQUAL"; TokenType["EQUAL_EQUAL_EQUAL"] = "EQUAL_EQUAL_EQUAL"; TokenType["NOT_EQUAL"] = "NOT_EQUAL"; TokenType["NOT_EQUAL_EQUAL"] = "NOT_EQUAL_EQUAL"; TokenType["PLUS_EQUAL"] = "PLUS_EQUAL"; TokenType["MINUS_EQUAL"] = "MINUS_EQUAL"; TokenType["STAR_EQUAL"] = "STAR_EQUAL"; TokenType["SLASH_EQUAL"] = "SLASH_EQUAL"; TokenType["PERCENT_EQUAL"] = "PERCENT_EQUAL"; TokenType["CONFIDENCE_PLUS_EQUAL"] = "CONFIDENCE_PLUS_EQUAL"; TokenType["CONFIDENCE_MINUS_EQUAL"] = "CONFIDENCE_MINUS_EQUAL"; TokenType["CONFIDENCE_STAR_EQUAL"] = "CONFIDENCE_STAR_EQUAL"; TokenType["CONFIDENCE_SLASH_EQUAL"] = "CONFIDENCE_SLASH_EQUAL"; TokenType["LESS"] = "LESS"; TokenType["GREATER"] = "GREATER"; TokenType["LESS_EQUAL"] = "LESS_EQUAL"; TokenType["GREATER_EQUAL"] = "GREATER_EQUAL"; TokenType["CONFIDENCE_ARROW"] = "CONFIDENCE_ARROW"; TokenType["CONFIDENCE_EXTRACT"] = "CONFIDENCE_EXTRACT"; TokenType["CONFIDENCE_CHAIN"] = "CONFIDENCE_CHAIN"; TokenType["CONFIDENCE_COALESCE"] = "CONFIDENCE_COALESCE"; TokenType["CONFIDENCE_AND"] = "CONFIDENCE_AND"; TokenType["CONFIDENCE_OR"] = "CONFIDENCE_OR"; TokenType["CONFIDENCE_PLUS"] = "CONFIDENCE_PLUS"; TokenType["CONFIDENCE_MINUS"] = "CONFIDENCE_MINUS"; TokenType["CONFIDENCE_STAR"] = "CONFIDENCE_STAR"; TokenType["CONFIDENCE_SLASH"] = "CONFIDENCE_SLASH"; TokenType["CONFIDENCE_EQUAL"] = "CONFIDENCE_EQUAL"; TokenType["CONFIDENCE_NOT_EQUAL"] = "CONFIDENCE_NOT_EQUAL"; TokenType["CONFIDENCE_GREATER"] = "CONFIDENCE_GREATER"; TokenType["CONFIDENCE_LESS"] = "CONFIDENCE_LESS"; TokenType["CONFIDENCE_GREATER_EQUAL"] = "CONFIDENCE_GREATER_EQUAL"; TokenType["CONFIDENCE_LESS_EQUAL"] = "CONFIDENCE_LESS_EQUAL"; TokenType["CONFIDENCE_DOT"] = "CONFIDENCE_DOT"; TokenType["CONFIDENCE_QUESTION"] = "CONFIDENCE_QUESTION"; TokenType["CONFIDENCE_IN"] = "CONFIDENCE_IN"; TokenType["CONFIDENCE_INSTANCEOF"] = "CONFIDENCE_INSTANCEOF"; TokenType["PARALLEL_CONFIDENCE"] = "PARALLEL_CONFIDENCE"; TokenType["THRESHOLD_GATE"] = "THRESHOLD_GATE"; TokenType["AND"] = "AND"; TokenType["OR"] = "OR"; TokenType["NOT"] = "NOT"; TokenType["TILDE"] = "TILDE"; TokenType["OPTIONAL_CHAIN"] = "OPTIONAL_CHAIN"; // Delimiters TokenType["LEFT_PAREN"] = "LEFT_PAREN"; TokenType["RIGHT_PAREN"] = "RIGHT_PAREN"; TokenType["LEFT_BRACE"] = "LEFT_BRACE"; TokenType["RIGHT_BRACE"] = "RIGHT_BRACE"; TokenType["LEFT_BRACKET"] = "LEFT_BRACKET"; TokenType["RIGHT_BRACKET"] = "RIGHT_BRACKET"; TokenType["COMMA"] = "COMMA"; TokenType["DOT"] = "DOT"; TokenType["COLON"] = "COLON"; TokenType["SEMICOLON"] = "SEMICOLON"; TokenType["QUESTION"] = "QUESTION"; TokenType["QUESTION_QUESTION"] = "QUESTION_QUESTION"; // Special TokenType["ARROW"] = "ARROW"; TokenType["SPREAD"] = "SPREAD"; TokenType["PIPELINE"] = "PIPELINE"; TokenType["CONFIDENCE_PIPELINE"] = "CONFIDENCE_PIPELINE"; TokenType["CONFIDENCE_THRESHOLD_GATE"] = "CONFIDENCE_THRESHOLD_GATE"; TokenType["PLACEHOLDER"] = "PLACEHOLDER"; TokenType["EOF"] = "EOF"; })(TokenType || (exports.TokenType = TokenType = {})); const keywords = { 'if': TokenType.IF, 'else': TokenType.ELSE, 'uncertain': TokenType.UNCERTAIN, 'high': TokenType.HIGH, 'medium': TokenType.MEDIUM, 'low': TokenType.LOW, 'default': TokenType.DEFAULT, 'in': TokenType.IN, 'context': TokenType.CONTEXT, 'shifting': TokenType.SHIFTING, 'to': TokenType.TO, 'agents': TokenType.AGENTS, 'agent': TokenType.AGENT, 'Agent': TokenType.AGENT, 'function': TokenType.FUNCTION, 'return': TokenType.RETURN, 'let': TokenType.LET, 'const': TokenType.CONST, 'true': TokenType.TRUE, 'false': TokenType.FALSE, 'null': TokenType.NULL, 'undefined': TokenType.UNDEFINED, 'for': TokenType.FOR, 'while': TokenType.WHILE, 'do': TokenType.DO, 'break': TokenType.BREAK, 'continue': TokenType.CONTINUE, 'typeof': TokenType.TYPEOF, 'instanceof': TokenType.INSTANCEOF, 'import': TokenType.IMPORT, 'export': TokenType.EXPORT, 'from': TokenType.FROM, 'as': TokenType.AS, 'async': TokenType.ASYNC, 'await': TokenType.AWAIT, }; class Tokenizer { input; position = 0; line = 1; column = 0; tokens = []; constructor(input) { this.input = input; } tokenize() { while (!this.isAtEnd()) { this.skipWhitespaceAndComments(); if (this.isAtEnd()) break; const token = this.nextToken(); if (token) { this.tokens.push(token); } } this.tokens.push({ type: TokenType.EOF, value: '', line: this.line, column: this.column, }); return this.tokens; } nextToken() { const startColumn = this.column; const char = this.advance(); // Single character tokens (with compound assignment checks) switch (char) { case '+': if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.PLUS_EQUAL, '+=', startColumn); } return this.makeToken(TokenType.PLUS, '+', startColumn); case '-': if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.MINUS_EQUAL, '-=', startColumn); } return this.makeToken(TokenType.MINUS, '-', startColumn); case '*': if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.STAR_EQUAL, '*=', startColumn); } if (this.peek() === '*') { this.advance(); return this.makeToken(TokenType.STAR_STAR, '**', startColumn); } return this.makeToken(TokenType.STAR, '*', startColumn); case '/': if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.SLASH_EQUAL, '/=', startColumn); } return this.makeToken(TokenType.SLASH, '/', startColumn); case '%': if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.PERCENT_EQUAL, '%=', startColumn); } return this.makeToken(TokenType.PERCENT, '%', startColumn); case '(': return this.makeToken(TokenType.LEFT_PAREN, '(', startColumn); case ')': return this.makeToken(TokenType.RIGHT_PAREN, ')', startColumn); case '{': return this.makeToken(TokenType.LEFT_BRACE, '{', startColumn); case '}': return this.makeToken(TokenType.RIGHT_BRACE, '}', startColumn); case '[': return this.makeToken(TokenType.LEFT_BRACKET, '[', startColumn); case ']': return this.makeToken(TokenType.RIGHT_BRACKET, ']', startColumn); case ',': return this.makeToken(TokenType.COMMA, ',', startColumn); case '.': // Check for spread operator ... if (this.peek() === '.' && this.peekNext() === '.') { this.advance(); // consume second dot this.advance(); // consume third dot return this.makeToken(TokenType.SPREAD, '...', startColumn); } return this.makeToken(TokenType.DOT, '.', startColumn); case ':': return this.makeToken(TokenType.COLON, ':', startColumn); case ';': return this.makeToken(TokenType.SEMICOLON, ';', startColumn); case '?': if (this.peek() === '.') { this.advance(); return this.makeToken(TokenType.OPTIONAL_CHAIN, '?.', startColumn); } if (this.peek() === '?') { this.advance(); return this.makeToken(TokenType.QUESTION_QUESTION, '??', startColumn); } return this.makeToken(TokenType.QUESTION, '?', startColumn); } // Two character tokens if (char === '=') { if (this.peek() === '=') { this.advance(); if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.EQUAL_EQUAL_EQUAL, '===', startColumn); } return this.makeToken(TokenType.EQUAL_EQUAL, '==', startColumn); } if (this.peek() === '>') { this.advance(); return this.makeToken(TokenType.ARROW, '=>', startColumn); } return this.makeToken(TokenType.EQUAL, '=', startColumn); } if (char === '!') { if (this.peek() === '=') { this.advance(); if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.NOT_EQUAL_EQUAL, '!==', startColumn); } return this.makeToken(TokenType.NOT_EQUAL, '!=', startColumn); } return this.makeToken(TokenType.NOT, '!', startColumn); } if (char === '<') { if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.LESS_EQUAL, '<=', startColumn); } if (this.peek() === '~') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_EXTRACT, '<~', startColumn); } return this.makeToken(TokenType.LESS, '<', startColumn); } if (char === '>') { if (this.peek() === '=') { this.advance(); return this.makeToken(TokenType.GREATER_EQUAL, '>=', startColumn); } return this.makeToken(TokenType.GREATER, '>', startColumn); } if (char === '~') { if (this.peek() === '>' && this.peekNext() === '=') { this.advance(); // consume > this.advance(); // consume = return this.makeToken(TokenType.CONFIDENCE_GREATER_EQUAL, '~>=', startColumn); } if (this.peek() === '>') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_ARROW, '~>', startColumn); } if (this.peek() === '?' && this.peekNext() === '?') { this.advance(); // consume first ? this.advance(); // consume second ? return this.makeToken(TokenType.CONFIDENCE_COALESCE, '~??', startColumn); } if (this.peek() === '?' && this.peekNext() === '>') { this.advance(); // consume ? this.advance(); // consume > return this.makeToken(TokenType.CONFIDENCE_THRESHOLD_GATE, '~?>', startColumn); } if (this.peek() === '?') { this.advance(); // consume ? return this.makeToken(TokenType.CONFIDENCE_QUESTION, '~?', startColumn); } if (this.peek() === '&' && this.peekNext() === '&') { this.advance(); // consume first & this.advance(); // consume second & return this.makeToken(TokenType.CONFIDENCE_AND, '~&&', startColumn); } if (this.peek() === '@' && this.peekNext() === '>') { this.advance(); // consume @ this.advance(); // consume > return this.makeToken(TokenType.THRESHOLD_GATE, '~@>', startColumn); } if (this.peek() === '|' && this.peekNext() === '>') { this.advance(); // consume | this.advance(); // consume > return this.makeToken(TokenType.CONFIDENCE_PIPELINE, '~|>', startColumn); } if (this.peek() === '|' && this.peekNext() === '|' && this.peekThird() === '>') { this.advance(); // consume first | this.advance(); // consume second | this.advance(); // consume > return this.makeToken(TokenType.PARALLEL_CONFIDENCE, '~||>', startColumn); } if (this.peek() === '|' && this.peekNext() === '|') { this.advance(); // consume first | this.advance(); // consume second | return this.makeToken(TokenType.CONFIDENCE_OR, '~||', startColumn); } if (this.peek() === '+' && this.peekNext() === '=') { this.advance(); // consume + this.advance(); // consume = return this.makeToken(TokenType.CONFIDENCE_PLUS_EQUAL, '~+=', startColumn); } if (this.peek() === '+') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_PLUS, '~+', startColumn); } if (this.peek() === '-' && this.peekNext() === '=') { this.advance(); // consume - this.advance(); // consume = return this.makeToken(TokenType.CONFIDENCE_MINUS_EQUAL, '~-=', startColumn); } if (this.peek() === '-') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_MINUS, '~-', startColumn); } if (this.peek() === '*' && this.peekNext() === '=') { this.advance(); // consume * this.advance(); // consume = return this.makeToken(TokenType.CONFIDENCE_STAR_EQUAL, '~*=', startColumn); } if (this.peek() === '*') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_STAR, '~*', startColumn); } if (this.peek() === '/' && this.peekNext() === '=') { this.advance(); // consume / this.advance(); // consume = return this.makeToken(TokenType.CONFIDENCE_SLASH_EQUAL, '~/=', startColumn); } if (this.peek() === '/') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_SLASH, '~/', startColumn); } if (this.peek() === '=' && this.peekNext() === '=') { this.advance(); // consume first = this.advance(); // consume second = return this.makeToken(TokenType.CONFIDENCE_EQUAL, '~==', startColumn); } if (this.peek() === '!' && this.peekNext() === '=') { this.advance(); // consume ! this.advance(); // consume = return this.makeToken(TokenType.CONFIDENCE_NOT_EQUAL, '~!=', startColumn); } if (this.peek() === '<' && this.peekNext() === '=') { this.advance(); // consume < this.advance(); // consume = return this.makeToken(TokenType.CONFIDENCE_LESS_EQUAL, '~<=', startColumn); } if (this.peek() === '<') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_LESS, '~<', startColumn); } if (this.peek() === '.') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_DOT, '~.', startColumn); } if (this.peek() === '~') { this.advance(); return this.makeToken(TokenType.CONFIDENCE_CHAIN, '~~', startColumn); } return this.makeToken(TokenType.TILDE, '~', startColumn); } if (char === '&' && this.peek() === '&') { this.advance(); return this.makeToken(TokenType.AND, '&&', startColumn); } if (char === '|') { if (this.peek() === '>') { this.advance(); // consume > return this.makeToken(TokenType.PIPELINE, '|>', startColumn); } if (this.peek() === '|') { this.advance(); // consume second | return this.makeToken(TokenType.OR, '||', startColumn); } } // String literals if (char === '"') { return this.string(startColumn); } // Multiline string literals if (char === '`' && this.peek() === '`' && this.peekNext() === '`') { this.advance(); // consume second ` this.advance(); // consume third ` return this.multilineString(startColumn); } // Number literals if (this.isDigit(char)) { return this.number(startColumn); } // Identifiers and keywords if (this.isAlpha(char)) { return this.identifier(startColumn); } throw new Error(`Unexpected character '${char}' at line ${this.line}, column ${startColumn}`); } string(startColumn) { const value = []; let hasInterpolation = false; let braceDepth = 0; while (!this.isAtEnd()) { const char = this.peek(); // Check if we're at the closing quote (only when not inside interpolation) if (char === '"' && braceDepth === 0) { break; } // Handle escape sequences if (char === '\\') { this.advance(); // consume backslash if (this.isAtEnd()) { throw new Error(`Unterminated escape sequence at line ${this.line}`); } const escaped = this.advance(); switch (escaped) { case 'n': value.push('\n'); break; case 't': value.push('\t'); break; case 'r': value.push('\r'); break; case '\\': value.push('\\'); break; case '"': value.push('"'); break; case '\'': value.push('\''); break; default: // For unknown escape sequences, just include the character value.push(escaped); } } else if (char === '$' && this.peekNext() === '{') { // String interpolation detected hasInterpolation = true; value.push(this.advance()); // $ value.push(this.advance()); // { braceDepth++; } else if (char === '{' && braceDepth > 0) { // Track nested braces inside interpolation value.push(this.advance()); braceDepth++; } else if (char === '}' && braceDepth > 0) { // Track closing braces value.push(this.advance()); braceDepth--; } else if (char === '\n') { throw new Error(`Unexpected newline in string at line ${this.line}`); } else { value.push(this.advance()); } } if (this.isAtEnd()) { throw new Error(`Unterminated string at line ${this.line}, column ${startColumn}`); } // Consume closing quote this.advance(); const tokenType = hasInterpolation ? TokenType.INTERPOLATED_STRING : TokenType.STRING; return this.makeToken(tokenType, value.join(''), startColumn); } multilineString(startColumn) { const value = []; const startLine = this.line; let hasInterpolation = false; let braceDepth = 0; while (!this.isAtEnd()) { // Check for closing ``` (only when not inside interpolation) if (this.peek() === '`' && this.peekNext() === '`' && this.peekThird() === '`' && braceDepth === 0) { this.advance(); // consume first ` this.advance(); // consume second ` this.advance(); // consume third ` break; } // Check for interpolation if (this.peek() === '$' && this.peekNext() === '{') { hasInterpolation = true; value.push(this.advance()); // $ value.push(this.advance()); // { braceDepth++; } else if (this.peek() === '{' && braceDepth > 0) { // Track nested braces inside interpolation value.push(this.advance()); braceDepth++; } else if (this.peek() === '}' && braceDepth > 0) { // Track closing braces value.push(this.advance()); braceDepth--; } else { const char = this.advance(); if (char === '\n') { this.line++; this.column = 0; } value.push(char); } } if (this.isAtEnd() && !(this.input[this.position - 3] === '`' && this.input[this.position - 2] === '`' && this.input[this.position - 1] === '`')) { throw new Error(`Unterminated multiline string starting at line ${startLine}, column ${startColumn}`); } const tokenType = hasInterpolation ? TokenType.INTERPOLATED_STRING : TokenType.STRING; return this.makeToken(tokenType, value.join(''), startColumn); } number(startColumn) { const start = this.position - 1; while (this.isDigit(this.peek())) { this.advance(); } // Look for decimal part if (this.peek() === '.' && this.isDigit(this.peekNext())) { this.advance(); // consume '.' while (this.isDigit(this.peek())) { this.advance(); } } const value = this.input.substring(start, this.position); return this.makeToken(TokenType.NUMBER, value, startColumn); } identifier(startColumn) { const start = this.position - 1; while (this.isAlphaNumeric(this.peek())) { this.advance(); } const value = this.input.substring(start, this.position); // Special case: standalone underscore is a placeholder if (value === '_') { return this.makeToken(TokenType.PLACEHOLDER, value, startColumn); } const type = keywords[value] || TokenType.IDENTIFIER; return this.makeToken(type, value, startColumn); } skipWhitespaceAndComments() { let continueLoop = true; while (continueLoop) { const char = this.peek(); if (char === ' ' || char === '\r' || char === '\t') { this.advance(); } else if (char === '\n') { this.line++; this.advance(); this.column = 0; } else if (char === '/' && this.peekNext() === '/') { // Skip single-line comment while (this.peek() !== '\n' && !this.isAtEnd()) { this.advance(); } } else { continueLoop = false; } } } makeToken(type, value, column) { return { type, value, line: this.line, column, }; } isAtEnd() { return this.position >= this.input.length; } advance() { const char = this.input[this.position]; this.position++; this.column++; return char; } peek() { if (this.isAtEnd()) return '\0'; return this.input[this.position]; } peekNext() { if (this.position + 1 >= this.input.length) return '\0'; return this.input[this.position + 1]; } peekThird() { if (this.position + 2 >= this.input.length) return '\0'; return this.input[this.position + 2]; } isDigit(char) { return char >= '0' && char <= '9'; } isAlpha(char) { return (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z') || char === '_'; } isAlphaNumeric(char) { return this.isAlpha(char) || this.isDigit(char); } } exports.Tokenizer = Tokenizer; function tokenize(input) { const tokenizer = new Tokenizer(input); return tokenizer.tokenize(); } //# sourceMappingURL=tokenizer.js.map