java2ib
Version:
TypeScript library that converts Java code into IB Computer Science pseudocode format
323 lines • 10.2 kB
JavaScript
"use strict";
/**
* Lexical analyzer for Java source code
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.Lexer = void 0;
const types_1 = require("./types");
class Lexer {
constructor(input) {
this.position = 0;
this.line = 1;
this.column = 1;
this.errors = [];
this.input = input;
}
/**
* Tokenize the input Java code
* @returns Array of tokens and any lexical errors
*/
tokenize() {
// Performance optimization: Use regular array with push for simplicity and reliability
const tokens = [];
this.errors = [];
while (!this.isAtEnd()) {
const token = this.nextToken();
if (token) {
tokens.push(token);
}
}
return { tokens, errors: this.errors };
}
nextToken() {
this.skipWhitespace();
if (this.isAtEnd()) {
return null;
}
const start = this.getCurrentLocation();
const char = this.peek();
// Comments
if (char === '/' && this.peekNext() === '/') {
return this.readLineComment(start);
}
if (char === '/' && this.peekNext() === '*') {
return this.readBlockComment(start);
}
// String literals
if (char === '"') {
return this.readStringLiteral(start);
}
// Character literals
if (char === "'") {
return this.readCharLiteral(start);
}
// Numeric literals
if (this.isDigit(char)) {
return this.readNumericLiteral(start);
}
// Identifiers and keywords
if (this.isAlpha(char) || char === '_' || char === '$') {
return this.readIdentifierOrKeyword(start);
}
// Operators (check multi-character first)
const operator = this.readOperator();
if (operator) {
return {
type: types_1.TokenType.OPERATOR,
value: operator,
location: start
};
}
// Punctuation
if (Lexer.PUNCTUATION.has(char)) {
this.advance();
return {
type: types_1.TokenType.PUNCTUATION,
value: char,
location: start
};
}
// Invalid character
this.addError(types_1.ErrorType.LEXICAL_ERROR, `Unexpected character '${char}' (Unicode: ${char.charCodeAt(0)}). Only valid Java characters are allowed.`, start);
this.advance(); // Skip invalid character
return null;
}
readLineComment(start) {
let value = '';
// Skip the //
this.advance();
this.advance();
while (!this.isAtEnd() && this.peek() !== '\n') {
value += this.advance();
}
return {
type: types_1.TokenType.COMMENT,
value: '//' + value,
location: start
};
}
readBlockComment(start) {
let value = '';
// Skip the /*
this.advance();
this.advance();
while (!this.isAtEnd()) {
if (this.peek() === '*' && this.peekNext() === '/') {
this.advance(); // *
this.advance(); // /
break;
}
value += this.advance();
}
return {
type: types_1.TokenType.COMMENT,
value: '/*' + value + '*/',
location: start
};
}
readStringLiteral(start) {
let value = '';
// Skip opening quote
this.advance();
while (!this.isAtEnd() && this.peek() !== '"') {
if (this.peek() === '\\') {
// Handle escape sequences
this.advance(); // Skip backslash
if (!this.isAtEnd()) {
const escaped = this.advance();
value += '\\' + escaped;
}
}
else {
value += this.advance();
}
}
if (this.isAtEnd()) {
this.addError(types_1.ErrorType.LEXICAL_ERROR, 'Unterminated string literal. Missing closing quote (") before end of file.', start);
}
else {
this.advance(); // Skip closing quote
}
return {
type: types_1.TokenType.LITERAL,
value: '"' + value + '"',
location: start
};
}
readCharLiteral(start) {
let value = '';
// Skip opening quote
this.advance();
if (!this.isAtEnd() && this.peek() !== "'") {
if (this.peek() === '\\') {
// Handle escape sequences
this.advance(); // Skip backslash
if (!this.isAtEnd()) {
const escaped = this.advance();
value += '\\' + escaped;
}
}
else {
value += this.advance();
}
}
if (this.isAtEnd() || this.peek() !== "'") {
this.addError(types_1.ErrorType.LEXICAL_ERROR, 'Unterminated character literal. Missing closing single quote (\') or invalid character sequence.', start);
}
else {
this.advance(); // Skip closing quote
}
return {
type: types_1.TokenType.LITERAL,
value: "'" + value + "'",
location: start
};
}
readNumericLiteral(start) {
let value = '';
// Read integer part
while (!this.isAtEnd() && this.isDigit(this.peek())) {
value += this.advance();
}
// Check for decimal point
if (!this.isAtEnd() && this.peek() === '.' && this.isDigit(this.peekNext())) {
value += this.advance(); // Add decimal point
// Read fractional part
while (!this.isAtEnd() && this.isDigit(this.peek())) {
value += this.advance();
}
}
// Check for scientific notation
if (!this.isAtEnd() && (this.peek() === 'e' || this.peek() === 'E')) {
value += this.advance();
if (!this.isAtEnd() && (this.peek() === '+' || this.peek() === '-')) {
value += this.advance();
}
while (!this.isAtEnd() && this.isDigit(this.peek())) {
value += this.advance();
}
}
// Check for type suffixes (f, F, d, D, l, L)
if (!this.isAtEnd() && /[fFdDlL]/.test(this.peek())) {
value += this.advance();
}
return {
type: types_1.TokenType.LITERAL,
value,
location: start
};
}
readIdentifierOrKeyword(start) {
let value = '';
while (!this.isAtEnd() && (this.isAlphaNumeric(this.peek()) || this.peek() === '_' || this.peek() === '$')) {
value += this.advance();
}
const type = Lexer.KEYWORDS.has(value) ? types_1.TokenType.KEYWORD : types_1.TokenType.IDENTIFIER;
return {
type,
value,
location: start
};
}
readOperator() {
// Check multi-character operators first
for (const op of Lexer.OPERATORS) {
if (this.matchString(op)) {
for (let i = 0; i < op.length; i++) {
this.advance();
}
return op;
}
}
return null;
}
matchString(str) {
for (let i = 0; i < str.length; i++) {
if (this.position + i >= this.input.length ||
this.input[this.position + i] !== str[i]) {
return false;
}
}
return true;
}
skipWhitespace() {
while (!this.isAtEnd() && this.isWhitespace(this.peek())) {
this.advance();
}
}
isWhitespace(char) {
return /\s/.test(char);
}
isDigit(char) {
return /\d/.test(char);
}
isAlpha(char) {
return /[a-zA-Z]/.test(char);
}
isAlphaNumeric(char) {
return this.isAlpha(char) || this.isDigit(char);
}
peek() {
if (this.isAtEnd())
return '\0';
return this.input[this.position];
}
peekNext() {
if (this.position + 1 >= this.input.length)
return '\0';
return this.input[this.position + 1];
}
advance() {
if (this.isAtEnd())
return '\0';
const char = this.input[this.position];
this.position++;
if (char === '\n') {
this.line++;
this.column = 1;
}
else {
this.column++;
}
return char;
}
isAtEnd() {
return this.position >= this.input.length;
}
getCurrentLocation() {
return {
line: this.line,
column: this.column
};
}
addError(type, message, location) {
this.errors.push({
type,
message,
location,
severity: types_1.ErrorSeverity.ERROR
});
}
}
exports.Lexer = Lexer;
// Java keywords
Lexer.KEYWORDS = new Set([
'abstract', 'assert', 'boolean', 'break', 'byte', 'case', 'catch', 'char',
'class', 'const', 'continue', 'default', 'do', 'double', 'else', 'enum',
'extends', 'final', 'finally', 'float', 'for', 'goto', 'if', 'implements',
'import', 'instanceof', 'int', 'interface', 'long', 'native', 'new', 'null',
'package', 'private', 'protected', 'public', 'return', 'short', 'static',
'strictfp', 'super', 'switch', 'synchronized', 'this', 'throw', 'throws',
'transient', 'try', 'void', 'volatile', 'while', 'true', 'false'
]);
// Java operators (multi-character first for proper matching)
Lexer.OPERATORS = [
'==', '!=', '<=', '>=', '&&', '||', '++', '--', '+=', '-=', '*=', '/=', '%=',
'<<', '>>', '>>>', '&=', '|=', '^=', '<<=', '>>=', '>>>=',
'=', '+', '-', '*', '/', '%', '<', '>', '!', '&', '|', '^', '~', '?', ':'
];
// Java punctuation
Lexer.PUNCTUATION = new Set([
'(', ')', '{', '}', '[', ']', ';', ',', '.'
]);
//# sourceMappingURL=lexer.js.map