UNPKG

java2ib

Version:

TypeScript library that converts Java code into IB Computer Science pseudocode format

323 lines 10.2 kB
"use strict"; /** * Lexical analyzer for Java source code */ Object.defineProperty(exports, "__esModule", { value: true }); exports.Lexer = void 0; const types_1 = require("./types"); class Lexer { constructor(input) { this.position = 0; this.line = 1; this.column = 1; this.errors = []; this.input = input; } /** * Tokenize the input Java code * @returns Array of tokens and any lexical errors */ tokenize() { // Performance optimization: Use regular array with push for simplicity and reliability const tokens = []; this.errors = []; while (!this.isAtEnd()) { const token = this.nextToken(); if (token) { tokens.push(token); } } return { tokens, errors: this.errors }; } nextToken() { this.skipWhitespace(); if (this.isAtEnd()) { return null; } const start = this.getCurrentLocation(); const char = this.peek(); // Comments if (char === '/' && this.peekNext() === '/') { return this.readLineComment(start); } if (char === '/' && this.peekNext() === '*') { return this.readBlockComment(start); } // String literals if (char === '"') { return this.readStringLiteral(start); } // Character literals if (char === "'") { return this.readCharLiteral(start); } // Numeric literals if (this.isDigit(char)) { return this.readNumericLiteral(start); } // Identifiers and keywords if (this.isAlpha(char) || char === '_' || char === '$') { return this.readIdentifierOrKeyword(start); } // Operators (check multi-character first) const operator = this.readOperator(); if (operator) { return { type: types_1.TokenType.OPERATOR, value: operator, location: start }; } // Punctuation if (Lexer.PUNCTUATION.has(char)) { this.advance(); return { type: types_1.TokenType.PUNCTUATION, value: char, location: start }; } // Invalid character this.addError(types_1.ErrorType.LEXICAL_ERROR, `Unexpected character '${char}' (Unicode: ${char.charCodeAt(0)}). Only valid Java characters are allowed.`, start); this.advance(); // Skip invalid character return null; } readLineComment(start) { let value = ''; // Skip the // this.advance(); this.advance(); while (!this.isAtEnd() && this.peek() !== '\n') { value += this.advance(); } return { type: types_1.TokenType.COMMENT, value: '//' + value, location: start }; } readBlockComment(start) { let value = ''; // Skip the /* this.advance(); this.advance(); while (!this.isAtEnd()) { if (this.peek() === '*' && this.peekNext() === '/') { this.advance(); // * this.advance(); // / break; } value += this.advance(); } return { type: types_1.TokenType.COMMENT, value: '/*' + value + '*/', location: start }; } readStringLiteral(start) { let value = ''; // Skip opening quote this.advance(); while (!this.isAtEnd() && this.peek() !== '"') { if (this.peek() === '\\') { // Handle escape sequences this.advance(); // Skip backslash if (!this.isAtEnd()) { const escaped = this.advance(); value += '\\' + escaped; } } else { value += this.advance(); } } if (this.isAtEnd()) { this.addError(types_1.ErrorType.LEXICAL_ERROR, 'Unterminated string literal. Missing closing quote (") before end of file.', start); } else { this.advance(); // Skip closing quote } return { type: types_1.TokenType.LITERAL, value: '"' + value + '"', location: start }; } readCharLiteral(start) { let value = ''; // Skip opening quote this.advance(); if (!this.isAtEnd() && this.peek() !== "'") { if (this.peek() === '\\') { // Handle escape sequences this.advance(); // Skip backslash if (!this.isAtEnd()) { const escaped = this.advance(); value += '\\' + escaped; } } else { value += this.advance(); } } if (this.isAtEnd() || this.peek() !== "'") { this.addError(types_1.ErrorType.LEXICAL_ERROR, 'Unterminated character literal. Missing closing single quote (\') or invalid character sequence.', start); } else { this.advance(); // Skip closing quote } return { type: types_1.TokenType.LITERAL, value: "'" + value + "'", location: start }; } readNumericLiteral(start) { let value = ''; // Read integer part while (!this.isAtEnd() && this.isDigit(this.peek())) { value += this.advance(); } // Check for decimal point if (!this.isAtEnd() && this.peek() === '.' && this.isDigit(this.peekNext())) { value += this.advance(); // Add decimal point // Read fractional part while (!this.isAtEnd() && this.isDigit(this.peek())) { value += this.advance(); } } // Check for scientific notation if (!this.isAtEnd() && (this.peek() === 'e' || this.peek() === 'E')) { value += this.advance(); if (!this.isAtEnd() && (this.peek() === '+' || this.peek() === '-')) { value += this.advance(); } while (!this.isAtEnd() && this.isDigit(this.peek())) { value += this.advance(); } } // Check for type suffixes (f, F, d, D, l, L) if (!this.isAtEnd() && /[fFdDlL]/.test(this.peek())) { value += this.advance(); } return { type: types_1.TokenType.LITERAL, value, location: start }; } readIdentifierOrKeyword(start) { let value = ''; while (!this.isAtEnd() && (this.isAlphaNumeric(this.peek()) || this.peek() === '_' || this.peek() === '$')) { value += this.advance(); } const type = Lexer.KEYWORDS.has(value) ? types_1.TokenType.KEYWORD : types_1.TokenType.IDENTIFIER; return { type, value, location: start }; } readOperator() { // Check multi-character operators first for (const op of Lexer.OPERATORS) { if (this.matchString(op)) { for (let i = 0; i < op.length; i++) { this.advance(); } return op; } } return null; } matchString(str) { for (let i = 0; i < str.length; i++) { if (this.position + i >= this.input.length || this.input[this.position + i] !== str[i]) { return false; } } return true; } skipWhitespace() { while (!this.isAtEnd() && this.isWhitespace(this.peek())) { this.advance(); } } isWhitespace(char) { return /\s/.test(char); } isDigit(char) { return /\d/.test(char); } isAlpha(char) { return /[a-zA-Z]/.test(char); } isAlphaNumeric(char) { return this.isAlpha(char) || this.isDigit(char); } peek() { if (this.isAtEnd()) return '\0'; return this.input[this.position]; } peekNext() { if (this.position + 1 >= this.input.length) return '\0'; return this.input[this.position + 1]; } advance() { if (this.isAtEnd()) return '\0'; const char = this.input[this.position]; this.position++; if (char === '\n') { this.line++; this.column = 1; } else { this.column++; } return char; } isAtEnd() { return this.position >= this.input.length; } getCurrentLocation() { return { line: this.line, column: this.column }; } addError(type, message, location) { this.errors.push({ type, message, location, severity: types_1.ErrorSeverity.ERROR }); } } exports.Lexer = Lexer; // Java keywords Lexer.KEYWORDS = new Set([ 'abstract', 'assert', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum', 'extends', 'final', 'finally', 'float', 'for', 'goto', 'if', 'implements', 'import', 'instanceof', 'int', 'interface', 'long', 'native', 'new', 'null', 'package', 'private', 'protected', 'public', 'return', 'short', 'static', 'strictfp', 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', 'try', 'void', 'volatile', 'while', 'true', 'false' ]); // Java operators (multi-character first for proper matching) Lexer.OPERATORS = [ '==', '!=', '<=', '>=', '&&', '||', '++', '--', '+=', '-=', '*=', '/=', '%=', '<<', '>>', '>>>', '&=', '|=', '^=', '<<=', '>>=', '>>>=', '=', '+', '-', '*', '/', '%', '<', '>', '!', '&', '|', '^', '~', '?', ':' ]; // Java punctuation Lexer.PUNCTUATION = new Set([ '(', ')', '{', '}', '[', ']', ';', ',', '.' ]); //# sourceMappingURL=lexer.js.map