@prism-lang/core
Version:
A programming language for uncertainty
701 lines (639 loc) • 22.3 kB
text/typescript
export enum TokenType {
// Literals
NUMBER = 'NUMBER',
STRING = 'STRING',
INTERPOLATED_STRING = 'INTERPOLATED_STRING',
IDENTIFIER = 'IDENTIFIER',
// Keywords
IF = 'IF',
ELSE = 'ELSE',
UNCERTAIN = 'UNCERTAIN',
HIGH = 'HIGH',
MEDIUM = 'MEDIUM',
LOW = 'LOW',
DEFAULT = 'DEFAULT',
IN = 'IN',
CONTEXT = 'CONTEXT',
SHIFTING = 'SHIFTING',
TO = 'TO',
AGENTS = 'AGENTS',
AGENT = 'AGENT',
CONFIDENCE = 'CONFIDENCE',
FUNCTION = 'FUNCTION',
RETURN = 'RETURN',
LET = 'LET',
CONST = 'CONST',
TRUE = 'TRUE',
FALSE = 'FALSE',
NULL = 'NULL',
UNDEFINED = 'UNDEFINED',
FOR = 'FOR',
WHILE = 'WHILE',
DO = 'DO',
BREAK = 'BREAK',
CONTINUE = 'CONTINUE',
TYPEOF = 'TYPEOF',
INSTANCEOF = 'INSTANCEOF',
IMPORT = 'IMPORT',
EXPORT = 'EXPORT',
FROM = 'FROM',
AS = 'AS',
ASYNC = 'ASYNC',
AWAIT = 'AWAIT',
// Operators
PLUS = 'PLUS',
MINUS = 'MINUS',
STAR = 'STAR',
STAR_STAR = 'STAR_STAR',
SLASH = 'SLASH',
PERCENT = 'PERCENT',
EQUAL = 'EQUAL',
EQUAL_EQUAL = 'EQUAL_EQUAL',
EQUAL_EQUAL_EQUAL = 'EQUAL_EQUAL_EQUAL',
NOT_EQUAL = 'NOT_EQUAL',
NOT_EQUAL_EQUAL = 'NOT_EQUAL_EQUAL',
PLUS_EQUAL = 'PLUS_EQUAL',
MINUS_EQUAL = 'MINUS_EQUAL',
STAR_EQUAL = 'STAR_EQUAL',
SLASH_EQUAL = 'SLASH_EQUAL',
PERCENT_EQUAL = 'PERCENT_EQUAL',
CONFIDENCE_PLUS_EQUAL = 'CONFIDENCE_PLUS_EQUAL',
CONFIDENCE_MINUS_EQUAL = 'CONFIDENCE_MINUS_EQUAL',
CONFIDENCE_STAR_EQUAL = 'CONFIDENCE_STAR_EQUAL',
CONFIDENCE_SLASH_EQUAL = 'CONFIDENCE_SLASH_EQUAL',
LESS = 'LESS',
GREATER = 'GREATER',
LESS_EQUAL = 'LESS_EQUAL',
GREATER_EQUAL = 'GREATER_EQUAL',
CONFIDENCE_ARROW = 'CONFIDENCE_ARROW',
CONFIDENCE_EXTRACT = 'CONFIDENCE_EXTRACT',
CONFIDENCE_CHAIN = 'CONFIDENCE_CHAIN',
CONFIDENCE_COALESCE = 'CONFIDENCE_COALESCE',
CONFIDENCE_AND = 'CONFIDENCE_AND',
CONFIDENCE_OR = 'CONFIDENCE_OR',
CONFIDENCE_PLUS = 'CONFIDENCE_PLUS',
CONFIDENCE_MINUS = 'CONFIDENCE_MINUS',
CONFIDENCE_STAR = 'CONFIDENCE_STAR',
CONFIDENCE_SLASH = 'CONFIDENCE_SLASH',
CONFIDENCE_EQUAL = 'CONFIDENCE_EQUAL',
CONFIDENCE_NOT_EQUAL = 'CONFIDENCE_NOT_EQUAL',
CONFIDENCE_GREATER = 'CONFIDENCE_GREATER',
CONFIDENCE_LESS = 'CONFIDENCE_LESS',
CONFIDENCE_GREATER_EQUAL = 'CONFIDENCE_GREATER_EQUAL',
CONFIDENCE_LESS_EQUAL = 'CONFIDENCE_LESS_EQUAL',
CONFIDENCE_DOT = 'CONFIDENCE_DOT',
CONFIDENCE_QUESTION = 'CONFIDENCE_QUESTION',
CONFIDENCE_IN = 'CONFIDENCE_IN',
CONFIDENCE_INSTANCEOF = 'CONFIDENCE_INSTANCEOF',
PARALLEL_CONFIDENCE = 'PARALLEL_CONFIDENCE',
THRESHOLD_GATE = 'THRESHOLD_GATE',
AND = 'AND',
OR = 'OR',
NOT = 'NOT',
TILDE = 'TILDE',
OPTIONAL_CHAIN = 'OPTIONAL_CHAIN',
// Delimiters
LEFT_PAREN = 'LEFT_PAREN',
RIGHT_PAREN = 'RIGHT_PAREN',
LEFT_BRACE = 'LEFT_BRACE',
RIGHT_BRACE = 'RIGHT_BRACE',
LEFT_BRACKET = 'LEFT_BRACKET',
RIGHT_BRACKET = 'RIGHT_BRACKET',
COMMA = 'COMMA',
DOT = 'DOT',
COLON = 'COLON',
SEMICOLON = 'SEMICOLON',
QUESTION = 'QUESTION',
QUESTION_QUESTION = 'QUESTION_QUESTION',
// Special
ARROW = 'ARROW',
SPREAD = 'SPREAD',
PIPELINE = 'PIPELINE',
CONFIDENCE_PIPELINE = 'CONFIDENCE_PIPELINE',
CONFIDENCE_THRESHOLD_GATE = 'CONFIDENCE_THRESHOLD_GATE',
PLACEHOLDER = 'PLACEHOLDER',
EOF = 'EOF',
}
export interface Token {
type: TokenType;
value: string;
line: number;
column: number;
}
const keywords: { [key: string]: TokenType } = {
'if': TokenType.IF,
'else': TokenType.ELSE,
'uncertain': TokenType.UNCERTAIN,
'high': TokenType.HIGH,
'medium': TokenType.MEDIUM,
'low': TokenType.LOW,
'default': TokenType.DEFAULT,
'in': TokenType.IN,
'context': TokenType.CONTEXT,
'shifting': TokenType.SHIFTING,
'to': TokenType.TO,
'agents': TokenType.AGENTS,
'agent': TokenType.AGENT,
'Agent': TokenType.AGENT,
'function': TokenType.FUNCTION,
'return': TokenType.RETURN,
'let': TokenType.LET,
'const': TokenType.CONST,
'true': TokenType.TRUE,
'false': TokenType.FALSE,
'null': TokenType.NULL,
'undefined': TokenType.UNDEFINED,
'for': TokenType.FOR,
'while': TokenType.WHILE,
'do': TokenType.DO,
'break': TokenType.BREAK,
'continue': TokenType.CONTINUE,
'typeof': TokenType.TYPEOF,
'instanceof': TokenType.INSTANCEOF,
'import': TokenType.IMPORT,
'export': TokenType.EXPORT,
'from': TokenType.FROM,
'as': TokenType.AS,
'async': TokenType.ASYNC,
'await': TokenType.AWAIT,
};
export class Tokenizer {
private input: string;
private position: number = 0;
private line: number = 1;
private column: number = 0;
private tokens: Token[] = [];
constructor(input: string) {
this.input = input;
}
tokenize(): Token[] {
while (!this.isAtEnd()) {
this.skipWhitespaceAndComments();
if (this.isAtEnd()) break;
const token = this.nextToken();
if (token) {
this.tokens.push(token);
}
}
this.tokens.push({
type: TokenType.EOF,
value: '',
line: this.line,
column: this.column,
});
return this.tokens;
}
private nextToken(): Token | null {
const startColumn = this.column;
const char = this.advance();
// Single character tokens (with compound assignment checks)
switch (char) {
case '+':
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.PLUS_EQUAL, '+=', startColumn);
}
return this.makeToken(TokenType.PLUS, '+', startColumn);
case '-':
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.MINUS_EQUAL, '-=', startColumn);
}
return this.makeToken(TokenType.MINUS, '-', startColumn);
case '*':
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.STAR_EQUAL, '*=', startColumn);
}
if (this.peek() === '*') {
this.advance();
return this.makeToken(TokenType.STAR_STAR, '**', startColumn);
}
return this.makeToken(TokenType.STAR, '*', startColumn);
case '/':
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.SLASH_EQUAL, '/=', startColumn);
}
return this.makeToken(TokenType.SLASH, '/', startColumn);
case '%':
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.PERCENT_EQUAL, '%=', startColumn);
}
return this.makeToken(TokenType.PERCENT, '%', startColumn);
case '(': return this.makeToken(TokenType.LEFT_PAREN, '(', startColumn);
case ')': return this.makeToken(TokenType.RIGHT_PAREN, ')', startColumn);
case '{': return this.makeToken(TokenType.LEFT_BRACE, '{', startColumn);
case '}': return this.makeToken(TokenType.RIGHT_BRACE, '}', startColumn);
case '[': return this.makeToken(TokenType.LEFT_BRACKET, '[', startColumn);
case ']': return this.makeToken(TokenType.RIGHT_BRACKET, ']', startColumn);
case ',': return this.makeToken(TokenType.COMMA, ',', startColumn);
case '.':
// Check for spread operator ...
if (this.peek() === '.' && this.peekNext() === '.') {
this.advance(); // consume second dot
this.advance(); // consume third dot
return this.makeToken(TokenType.SPREAD, '...', startColumn);
}
return this.makeToken(TokenType.DOT, '.', startColumn);
case ':': return this.makeToken(TokenType.COLON, ':', startColumn);
case ';': return this.makeToken(TokenType.SEMICOLON, ';', startColumn);
case '?':
if (this.peek() === '.') {
this.advance();
return this.makeToken(TokenType.OPTIONAL_CHAIN, '?.', startColumn);
}
if (this.peek() === '?') {
this.advance();
return this.makeToken(TokenType.QUESTION_QUESTION, '??', startColumn);
}
return this.makeToken(TokenType.QUESTION, '?', startColumn);
}
// Two character tokens
if (char === '=') {
if (this.peek() === '=') {
this.advance();
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.EQUAL_EQUAL_EQUAL, '===', startColumn);
}
return this.makeToken(TokenType.EQUAL_EQUAL, '==', startColumn);
}
if (this.peek() === '>') {
this.advance();
return this.makeToken(TokenType.ARROW, '=>', startColumn);
}
return this.makeToken(TokenType.EQUAL, '=', startColumn);
}
if (char === '!') {
if (this.peek() === '=') {
this.advance();
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.NOT_EQUAL_EQUAL, '!==', startColumn);
}
return this.makeToken(TokenType.NOT_EQUAL, '!=', startColumn);
}
return this.makeToken(TokenType.NOT, '!', startColumn);
}
if (char === '<') {
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.LESS_EQUAL, '<=', startColumn);
}
if (this.peek() === '~') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_EXTRACT, '<~', startColumn);
}
return this.makeToken(TokenType.LESS, '<', startColumn);
}
if (char === '>') {
if (this.peek() === '=') {
this.advance();
return this.makeToken(TokenType.GREATER_EQUAL, '>=', startColumn);
}
return this.makeToken(TokenType.GREATER, '>', startColumn);
}
if (char === '~') {
if (this.peek() === '>' && this.peekNext() === '=') {
this.advance(); // consume >
this.advance(); // consume =
return this.makeToken(TokenType.CONFIDENCE_GREATER_EQUAL, '~>=', startColumn);
}
if (this.peek() === '>') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_ARROW, '~>', startColumn);
}
if (this.peek() === '?' && this.peekNext() === '?') {
this.advance(); // consume first ?
this.advance(); // consume second ?
return this.makeToken(TokenType.CONFIDENCE_COALESCE, '~??', startColumn);
}
if (this.peek() === '?' && this.peekNext() === '>') {
this.advance(); // consume ?
this.advance(); // consume >
return this.makeToken(TokenType.CONFIDENCE_THRESHOLD_GATE, '~?>', startColumn);
}
if (this.peek() === '?') {
this.advance(); // consume ?
return this.makeToken(TokenType.CONFIDENCE_QUESTION, '~?', startColumn);
}
if (this.peek() === '&' && this.peekNext() === '&') {
this.advance(); // consume first &
this.advance(); // consume second &
return this.makeToken(TokenType.CONFIDENCE_AND, '~&&', startColumn);
}
if (this.peek() === '@' && this.peekNext() === '>') {
this.advance(); // consume @
this.advance(); // consume >
return this.makeToken(TokenType.THRESHOLD_GATE, '~@>', startColumn);
}
if (this.peek() === '|' && this.peekNext() === '>') {
this.advance(); // consume |
this.advance(); // consume >
return this.makeToken(TokenType.CONFIDENCE_PIPELINE, '~|>', startColumn);
}
if (this.peek() === '|' && this.peekNext() === '|' && this.peekThird() === '>') {
this.advance(); // consume first |
this.advance(); // consume second |
this.advance(); // consume >
return this.makeToken(TokenType.PARALLEL_CONFIDENCE, '~||>', startColumn);
}
if (this.peek() === '|' && this.peekNext() === '|') {
this.advance(); // consume first |
this.advance(); // consume second |
return this.makeToken(TokenType.CONFIDENCE_OR, '~||', startColumn);
}
if (this.peek() === '+' && this.peekNext() === '=') {
this.advance(); // consume +
this.advance(); // consume =
return this.makeToken(TokenType.CONFIDENCE_PLUS_EQUAL, '~+=', startColumn);
}
if (this.peek() === '+') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_PLUS, '~+', startColumn);
}
if (this.peek() === '-' && this.peekNext() === '=') {
this.advance(); // consume -
this.advance(); // consume =
return this.makeToken(TokenType.CONFIDENCE_MINUS_EQUAL, '~-=', startColumn);
}
if (this.peek() === '-') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_MINUS, '~-', startColumn);
}
if (this.peek() === '*' && this.peekNext() === '=') {
this.advance(); // consume *
this.advance(); // consume =
return this.makeToken(TokenType.CONFIDENCE_STAR_EQUAL, '~*=', startColumn);
}
if (this.peek() === '*') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_STAR, '~*', startColumn);
}
if (this.peek() === '/' && this.peekNext() === '=') {
this.advance(); // consume /
this.advance(); // consume =
return this.makeToken(TokenType.CONFIDENCE_SLASH_EQUAL, '~/=', startColumn);
}
if (this.peek() === '/') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_SLASH, '~/', startColumn);
}
if (this.peek() === '=' && this.peekNext() === '=') {
this.advance(); // consume first =
this.advance(); // consume second =
return this.makeToken(TokenType.CONFIDENCE_EQUAL, '~==', startColumn);
}
if (this.peek() === '!' && this.peekNext() === '=') {
this.advance(); // consume !
this.advance(); // consume =
return this.makeToken(TokenType.CONFIDENCE_NOT_EQUAL, '~!=', startColumn);
}
if (this.peek() === '<' && this.peekNext() === '=') {
this.advance(); // consume <
this.advance(); // consume =
return this.makeToken(TokenType.CONFIDENCE_LESS_EQUAL, '~<=', startColumn);
}
if (this.peek() === '<') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_LESS, '~<', startColumn);
}
if (this.peek() === '.') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_DOT, '~.', startColumn);
}
if (this.peek() === '~') {
this.advance();
return this.makeToken(TokenType.CONFIDENCE_CHAIN, '~~', startColumn);
}
return this.makeToken(TokenType.TILDE, '~', startColumn);
}
if (char === '&' && this.peek() === '&') {
this.advance();
return this.makeToken(TokenType.AND, '&&', startColumn);
}
if (char === '|') {
if (this.peek() === '>') {
this.advance(); // consume >
return this.makeToken(TokenType.PIPELINE, '|>', startColumn);
}
if (this.peek() === '|') {
this.advance(); // consume second |
return this.makeToken(TokenType.OR, '||', startColumn);
}
}
// String literals
if (char === '"') {
return this.string(startColumn);
}
// Multiline string literals
if (char === '`' && this.peek() === '`' && this.peekNext() === '`') {
this.advance(); // consume second `
this.advance(); // consume third `
return this.multilineString(startColumn);
}
// Number literals
if (this.isDigit(char)) {
return this.number(startColumn);
}
// Identifiers and keywords
if (this.isAlpha(char)) {
return this.identifier(startColumn);
}
throw new Error(`Unexpected character '${char}' at line ${this.line}, column ${startColumn}`);
}
private string(startColumn: number): Token {
const value: string[] = [];
let hasInterpolation = false;
let braceDepth = 0;
while (!this.isAtEnd()) {
const char = this.peek();
// Check if we're at the closing quote (only when not inside interpolation)
if (char === '"' && braceDepth === 0) {
break;
}
// Handle escape sequences
if (char === '\\') {
this.advance(); // consume backslash
if (this.isAtEnd()) {
throw new Error(`Unterminated escape sequence at line ${this.line}`);
}
const escaped = this.advance();
switch (escaped) {
case 'n': value.push('\n'); break;
case 't': value.push('\t'); break;
case 'r': value.push('\r'); break;
case '\\': value.push('\\'); break;
case '"': value.push('"'); break;
case '\'': value.push('\''); break;
default:
// For unknown escape sequences, just include the character
value.push(escaped);
}
} else if (char === '$' && this.peekNext() === '{') {
// String interpolation detected
hasInterpolation = true;
value.push(this.advance()); // $
value.push(this.advance()); // {
braceDepth++;
} else if (char === '{' && braceDepth > 0) {
// Track nested braces inside interpolation
value.push(this.advance());
braceDepth++;
} else if (char === '}' && braceDepth > 0) {
// Track closing braces
value.push(this.advance());
braceDepth--;
} else if (char === '\n') {
throw new Error(`Unexpected newline in string at line ${this.line}`);
} else {
value.push(this.advance());
}
}
if (this.isAtEnd()) {
throw new Error(`Unterminated string at line ${this.line}, column ${startColumn}`);
}
// Consume closing quote
this.advance();
const tokenType = hasInterpolation ? TokenType.INTERPOLATED_STRING : TokenType.STRING;
return this.makeToken(tokenType, value.join(''), startColumn);
}
private multilineString(startColumn: number): Token {
const value: string[] = [];
const startLine = this.line;
let hasInterpolation = false;
let braceDepth = 0;
while (!this.isAtEnd()) {
// Check for closing ``` (only when not inside interpolation)
if (this.peek() === '`' && this.peekNext() === '`' && this.peekThird() === '`' && braceDepth === 0) {
this.advance(); // consume first `
this.advance(); // consume second `
this.advance(); // consume third `
break;
}
// Check for interpolation
if (this.peek() === '$' && this.peekNext() === '{') {
hasInterpolation = true;
value.push(this.advance()); // $
value.push(this.advance()); // {
braceDepth++;
} else if (this.peek() === '{' && braceDepth > 0) {
// Track nested braces inside interpolation
value.push(this.advance());
braceDepth++;
} else if (this.peek() === '}' && braceDepth > 0) {
// Track closing braces
value.push(this.advance());
braceDepth--;
} else {
const char = this.advance();
if (char === '\n') {
this.line++;
this.column = 0;
}
value.push(char);
}
}
if (this.isAtEnd() && !(this.input[this.position - 3] === '`' &&
this.input[this.position - 2] === '`' &&
this.input[this.position - 1] === '`')) {
throw new Error(`Unterminated multiline string starting at line ${startLine}, column ${startColumn}`);
}
const tokenType = hasInterpolation ? TokenType.INTERPOLATED_STRING : TokenType.STRING;
return this.makeToken(tokenType, value.join(''), startColumn);
}
private number(startColumn: number): Token {
const start = this.position - 1;
while (this.isDigit(this.peek())) {
this.advance();
}
// Look for decimal part
if (this.peek() === '.' && this.isDigit(this.peekNext())) {
this.advance(); // consume '.'
while (this.isDigit(this.peek())) {
this.advance();
}
}
const value = this.input.substring(start, this.position);
return this.makeToken(TokenType.NUMBER, value, startColumn);
}
private identifier(startColumn: number): Token {
const start = this.position - 1;
while (this.isAlphaNumeric(this.peek())) {
this.advance();
}
const value = this.input.substring(start, this.position);
// Special case: standalone underscore is a placeholder
if (value === '_') {
return this.makeToken(TokenType.PLACEHOLDER, value, startColumn);
}
const type = keywords[value] || TokenType.IDENTIFIER;
return this.makeToken(type, value, startColumn);
}
private skipWhitespaceAndComments(): void {
let continueLoop = true;
while (continueLoop) {
const char = this.peek();
if (char === ' ' || char === '\r' || char === '\t') {
this.advance();
} else if (char === '\n') {
this.line++;
this.advance();
this.column = 0;
} else if (char === '/' && this.peekNext() === '/') {
// Skip single-line comment
while (this.peek() !== '\n' && !this.isAtEnd()) {
this.advance();
}
} else {
continueLoop = false;
}
}
}
private makeToken(type: TokenType, value: string, column: number): Token {
return {
type,
value,
line: this.line,
column,
};
}
private isAtEnd(): boolean {
return this.position >= this.input.length;
}
private advance(): string {
const char = this.input[this.position];
this.position++;
this.column++;
return char;
}
private peek(): string {
if (this.isAtEnd()) return '\0';
return this.input[this.position];
}
private peekNext(): string {
if (this.position + 1 >= this.input.length) return '\0';
return this.input[this.position + 1];
}
private peekThird(): string {
if (this.position + 2 >= this.input.length) return '\0';
return this.input[this.position + 2];
}
private isDigit(char: string): boolean {
return char >= '0' && char <= '9';
}
private isAlpha(char: string): boolean {
return (char >= 'a' && char <= 'z') ||
(char >= 'A' && char <= 'Z') ||
char === '_';
}
private isAlphaNumeric(char: string): boolean {
return this.isAlpha(char) || this.isDigit(char);
}
}
export function tokenize(input: string): Token[] {
const tokenizer = new Tokenizer(input);
return tokenizer.tokenize();
}