python2ib
Version:
Convert Python code to IB Pseudocode format
918 lines • 30.3 kB
JavaScript
/**
* Python AST parser using a simple recursive descent approach
* This is a simplified parser for educational Python constructs
*/
/** Token types for lexical analysis */
export var TokenType;
(function (TokenType) {
// Literals
TokenType["NUMBER"] = "NUMBER";
TokenType["STRING"] = "STRING";
TokenType["IDENTIFIER"] = "IDENTIFIER";
// Keywords
TokenType["IF"] = "IF";
TokenType["ELIF"] = "ELIF";
TokenType["ELSE"] = "ELSE";
TokenType["WHILE"] = "WHILE";
TokenType["FOR"] = "FOR";
TokenType["DEF"] = "DEF";
TokenType["RETURN"] = "RETURN";
TokenType["PRINT"] = "PRINT";
TokenType["INPUT"] = "INPUT";
TokenType["IN"] = "IN";
TokenType["AND"] = "AND";
TokenType["OR"] = "OR";
TokenType["NOT"] = "NOT";
TokenType["TRUE"] = "TRUE";
TokenType["FALSE"] = "FALSE";
TokenType["NONE"] = "NONE";
// Operators
TokenType["ASSIGN"] = "ASSIGN";
TokenType["PLUS"] = "PLUS";
TokenType["MINUS"] = "MINUS";
TokenType["MULTIPLY"] = "MULTIPLY";
TokenType["DIVIDE"] = "DIVIDE";
TokenType["FLOOR_DIVIDE"] = "FLOOR_DIVIDE";
TokenType["MODULO"] = "MODULO";
TokenType["POWER"] = "POWER";
TokenType["EQUAL"] = "EQUAL";
TokenType["NOT_EQUAL"] = "NOT_EQUAL";
TokenType["LESS"] = "LESS";
TokenType["LESS_EQUAL"] = "LESS_EQUAL";
TokenType["GREATER"] = "GREATER";
TokenType["GREATER_EQUAL"] = "GREATER_EQUAL";
// Compound assignment
TokenType["PLUS_ASSIGN"] = "PLUS_ASSIGN";
TokenType["MINUS_ASSIGN"] = "MINUS_ASSIGN";
TokenType["MULTIPLY_ASSIGN"] = "MULTIPLY_ASSIGN";
TokenType["DIVIDE_ASSIGN"] = "DIVIDE_ASSIGN";
// Delimiters
TokenType["LPAREN"] = "LPAREN";
TokenType["RPAREN"] = "RPAREN";
TokenType["LBRACKET"] = "LBRACKET";
TokenType["RBRACKET"] = "RBRACKET";
TokenType["COMMA"] = "COMMA";
TokenType["COLON"] = "COLON";
TokenType["DOT"] = "DOT";
// Special
TokenType["NEWLINE"] = "NEWLINE";
TokenType["INDENT"] = "INDENT";
TokenType["DEDENT"] = "DEDENT";
TokenType["COMMENT"] = "COMMENT";
TokenType["EOF"] = "EOF";
})(TokenType || (TokenType = {}));
/** Simple lexer for Python code */
export class PythonLexer {
code;
position = 0;
line = 1;
column = 1;
tokens = [];
indentStack = [0]; // Track indentation levels
constructor(code) {
this.code = code;
}
/** Tokenize the Python code */
tokenize() {
this.tokens = [];
this.position = 0;
this.line = 1;
this.column = 1;
let atLineStart = true;
while (this.position < this.code.length) {
// Handle indentation at the start of lines
if (atLineStart) {
this.handleIndentation();
atLineStart = false;
}
this.skipWhitespace();
if (this.position >= this.code.length)
break;
const char = this.code[this.position];
// Comments
if (char === '#') {
this.readComment();
continue;
}
// Newlines
if (char === '\n') {
this.addToken(TokenType.NEWLINE, char);
this.advance();
this.line++;
this.column = 1;
atLineStart = true;
continue;
}
// Numbers
if (this.isDigit(char)) {
this.readNumber();
continue;
}
// Strings
if (char === '"' || char === "'") {
this.readString();
continue;
}
// Identifiers and keywords
if (this.isAlpha(char) || char === '_') {
this.readIdentifier();
continue;
}
// Two-character operators
if (this.position + 1 < this.code.length) {
const twoChar = this.code.substr(this.position, 2);
const tokenType = this.getTwoCharOperator(twoChar);
if (tokenType) {
this.addToken(tokenType, twoChar);
this.advance(2);
continue;
}
}
// Single-character operators and delimiters
const tokenType = this.getSingleCharOperator(char);
if (tokenType) {
this.addToken(tokenType, char);
this.advance();
continue;
}
// Unknown character
throw new Error(`Unexpected character '${char}' at line ${this.line}, column ${this.column}`);
}
// Add DEDENT tokens for any remaining indentation at end of file
while (this.indentStack.length > 1) {
this.indentStack.pop();
this.addToken(TokenType.DEDENT, '');
}
this.addToken(TokenType.EOF, '');
return this.tokens;
}
advance(count = 1) {
this.position += count;
this.column += count;
}
addToken(type, value) {
this.tokens.push({
type,
value,
line: this.line,
column: this.column - value.length
});
}
skipWhitespace() {
while (this.position < this.code.length &&
(this.code[this.position] === ' ' || this.code[this.position] === '\t')) {
this.advance();
}
}
handleIndentation() {
let indentLevel = 0;
const start = this.position;
// Count spaces and tabs at the beginning of the line
while (this.position < this.code.length &&
(this.code[this.position] === ' ' || this.code[this.position] === '\t')) {
if (this.code[this.position] === ' ') {
indentLevel++;
}
else if (this.code[this.position] === '\t') {
indentLevel += 4; // Treat tab as 4 spaces
}
this.advance();
}
// Skip empty lines and comment-only lines
if (this.position >= this.code.length ||
this.code[this.position] === '\n' ||
this.code[this.position] === '#') {
return;
}
const currentIndent = this.indentStack[this.indentStack.length - 1];
if (indentLevel > currentIndent) {
// Increased indentation - add INDENT token
this.indentStack.push(indentLevel);
const indentText = this.code.substring(start, this.position);
this.addToken(TokenType.INDENT, indentText);
}
else if (indentLevel < currentIndent) {
// Decreased indentation - add DEDENT tokens
while (this.indentStack.length > 1 && this.indentStack[this.indentStack.length - 1] > indentLevel) {
this.indentStack.pop();
this.addToken(TokenType.DEDENT, '');
}
// Check if indentation level matches any previous level
if (this.indentStack[this.indentStack.length - 1] !== indentLevel) {
throw new Error(`Indentation error at line ${this.line}: unmatched indentation level`);
}
}
// If indentLevel === currentIndent, no change needed
}
readComment() {
const start = this.position;
while (this.position < this.code.length && this.code[this.position] !== '\n') {
this.advance();
}
const value = this.code.substring(start + 1, this.position).trim(); // Skip '#' and trim whitespace
this.addToken(TokenType.COMMENT, value);
}
readNumber() {
const start = this.position;
while (this.position < this.code.length &&
(this.isDigit(this.code[this.position]) || this.code[this.position] === '.')) {
this.advance();
}
const value = this.code.substring(start, this.position);
this.addToken(TokenType.NUMBER, value);
}
readString() {
const quote = this.code[this.position];
this.advance(); // Skip opening quote
const start = this.position;
while (this.position < this.code.length && this.code[this.position] !== quote) {
if (this.code[this.position] === '\\') {
this.advance(2); // Skip escape sequence
}
else {
this.advance();
}
}
if (this.position >= this.code.length) {
throw new Error(`Unterminated string at line ${this.line}`);
}
const value = this.code.substring(start, this.position);
this.advance(); // Skip closing quote
this.addToken(TokenType.STRING, value);
}
readIdentifier() {
const start = this.position;
while (this.position < this.code.length &&
(this.isAlphaNumeric(this.code[this.position]) || this.code[this.position] === '_')) {
this.advance();
}
const value = this.code.substring(start, this.position);
const tokenType = this.getKeywordType(value) || TokenType.IDENTIFIER;
this.addToken(tokenType, value);
}
isDigit(char) {
return char >= '0' && char <= '9';
}
isAlpha(char) {
return (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z');
}
isAlphaNumeric(char) {
return this.isAlpha(char) || this.isDigit(char);
}
getKeywordType(value) {
const keywords = {
'if': TokenType.IF,
'elif': TokenType.ELIF,
'else': TokenType.ELSE,
'while': TokenType.WHILE,
'for': TokenType.FOR,
'def': TokenType.DEF,
'return': TokenType.RETURN,
'print': TokenType.PRINT,
'input': TokenType.INPUT,
'in': TokenType.IN,
'and': TokenType.AND,
'or': TokenType.OR,
'not': TokenType.NOT,
'True': TokenType.TRUE,
'False': TokenType.FALSE,
'None': TokenType.NONE
};
return keywords[value] || null;
}
getTwoCharOperator(value) {
const operators = {
'==': TokenType.EQUAL,
'!=': TokenType.NOT_EQUAL,
'<=': TokenType.LESS_EQUAL,
'>=': TokenType.GREATER_EQUAL,
'//': TokenType.FLOOR_DIVIDE,
'**': TokenType.POWER,
'+=': TokenType.PLUS_ASSIGN,
'-=': TokenType.MINUS_ASSIGN,
'*=': TokenType.MULTIPLY_ASSIGN,
'/=': TokenType.DIVIDE_ASSIGN
};
return operators[value] || null;
}
getSingleCharOperator(char) {
const operators = {
'=': TokenType.ASSIGN,
'+': TokenType.PLUS,
'-': TokenType.MINUS,
'*': TokenType.MULTIPLY,
'/': TokenType.DIVIDE,
'%': TokenType.MODULO,
'<': TokenType.LESS,
'>': TokenType.GREATER,
'(': TokenType.LPAREN,
')': TokenType.RPAREN,
'[': TokenType.LBRACKET,
']': TokenType.RBRACKET,
',': TokenType.COMMA,
':': TokenType.COLON,
'.': TokenType.DOT
};
return operators[char] || null;
}
}
/** Simple AST parser for Python */
export class ASTParser {
tokens = [];
current = 0;
/** Parse Python code to AST */
async parse(code) {
return this.parseSync(code);
}
/** Parse Python code to AST synchronously */
parseSync(code) {
const lexer = new PythonLexer(code);
this.tokens = lexer.tokenize();
this.current = 0;
return this.parseProgram();
}
parseProgram() {
const statements = [];
while (!this.isAtEnd()) {
if (this.check(TokenType.NEWLINE)) {
this.advance();
continue;
}
const stmt = this.parseStatement();
if (stmt) {
statements.push(stmt);
}
}
return {
type: 'Module',
body: statements
};
}
parseStatement() {
if (this.check(TokenType.COMMENT)) {
return this.parseComment();
}
if (this.check(TokenType.IF)) {
return this.parseIf();
}
if (this.check(TokenType.WHILE)) {
return this.parseWhile();
}
if (this.check(TokenType.FOR)) {
return this.parseFor();
}
if (this.check(TokenType.DEF)) {
return this.parseFunction();
}
if (this.check(TokenType.RETURN)) {
return this.parseReturn();
}
if (this.check(TokenType.PRINT)) {
return this.parsePrint();
}
// Try assignment or expression
return this.parseAssignmentOrExpression();
}
parseComment() {
const token = this.advance();
return {
type: 'Comment',
value: token.value,
lineno: token.line
};
}
parseIf() {
this.consume(TokenType.IF, "Expected 'if'");
const test = this.parseExpression();
this.consume(TokenType.COLON, "Expected ':' after if condition");
// Skip newlines
while (this.check(TokenType.NEWLINE)) {
this.advance();
}
// Parse body (indented block)
const body = this.parseBlock();
// Parse elif/else
const orelse = [];
while (this.check(TokenType.ELIF)) {
this.advance(); // consume 'elif'
const elifTest = this.parseExpression();
this.consume(TokenType.COLON, "Expected ':' after elif condition");
// Skip newlines
while (this.check(TokenType.NEWLINE)) {
this.advance();
}
const elifBody = this.parseBlock();
orelse.push({
type: 'If',
test: elifTest,
body: elifBody,
orelse: [],
lineno: this.previous().line
});
}
if (this.check(TokenType.ELSE)) {
this.advance(); // consume 'else'
this.consume(TokenType.COLON, "Expected ':' after else");
// Skip newlines
while (this.check(TokenType.NEWLINE)) {
this.advance();
}
const elseBody = this.parseBlock();
// Create an else node instead of spreading the body
if (elseBody.length > 0) {
orelse.push({
type: 'Else',
body: elseBody,
lineno: this.previous().line
});
}
}
return {
type: 'If',
test,
body,
orelse,
lineno: this.previous().line
};
}
parseWhile() {
this.consume(TokenType.WHILE, "Expected 'while'");
const test = this.parseExpression();
this.consume(TokenType.COLON, "Expected ':' after while condition");
// Parse body statements
const body = [];
this.skipNewlines(); // Skip any newlines after colon
// Parse indented block
if (this.check(TokenType.INDENT)) {
this.advance(); // consume INDENT
while (!this.check(TokenType.DEDENT) && !this.isAtEnd()) {
if (this.check(TokenType.NEWLINE)) {
this.advance();
continue;
}
const stmt = this.parseStatement();
if (stmt) {
body.push(stmt);
}
}
if (this.check(TokenType.DEDENT)) {
this.advance(); // consume DEDENT
}
}
return {
type: 'While',
test,
body,
lineno: this.previous().line
};
}
parseFor() {
this.consume(TokenType.FOR, "Expected 'for'");
const target = this.parseExpression();
this.consume(TokenType.IN, "Expected 'in' in for loop");
const iter = this.parseExpression();
this.consume(TokenType.COLON, "Expected ':' after for clause");
// Parse body statements
const body = [];
this.skipNewlines(); // Skip any newlines after colon
// Parse indented block
if (this.check(TokenType.INDENT)) {
this.advance(); // consume INDENT
while (!this.check(TokenType.DEDENT) && !this.isAtEnd()) {
if (this.check(TokenType.NEWLINE)) {
this.advance();
continue;
}
const stmt = this.parseStatement();
if (stmt) {
body.push(stmt);
}
}
if (this.check(TokenType.DEDENT)) {
this.advance(); // consume DEDENT
}
}
return {
type: 'For',
target,
iter,
body,
lineno: this.previous().line
};
}
parseFunction() {
this.consume(TokenType.DEF, "Expected 'def'");
const name = this.consume(TokenType.IDENTIFIER, "Expected function name").value;
this.consume(TokenType.LPAREN, "Expected '(' after function name");
const args = [];
if (!this.check(TokenType.RPAREN)) {
do {
args.push(this.consume(TokenType.IDENTIFIER, "Expected parameter name").value);
} while (this.match(TokenType.COMMA));
}
this.consume(TokenType.RPAREN, "Expected ')' after parameters");
this.consume(TokenType.COLON, "Expected ':' after function signature");
const body = this.parseBlock();
return {
type: 'FunctionDef',
name,
args: { args },
body,
lineno: this.previous().line
};
}
parseReturn() {
const token = this.advance();
let value = null;
if (!this.check(TokenType.NEWLINE) && !this.isAtEnd()) {
value = this.parseExpression();
}
return {
type: 'Return',
value,
lineno: token.line
};
}
parsePrint() {
const token = this.advance(); // consume PRINT
this.consume(TokenType.LPAREN, "Expected '(' after print");
const args = [];
if (!this.check(TokenType.RPAREN)) {
do {
// Handle f-string case: f"string"
if (this.check(TokenType.IDENTIFIER) && this.peek()?.value === 'f' &&
this.tokens[this.current + 1]?.type === TokenType.STRING) {
this.advance(); // consume 'f'
const stringToken = this.advance(); // consume string
args.push({
type: 'JoinedStr',
values: [{
type: 'Constant',
value: stringToken.value,
lineno: stringToken.line
}],
lineno: stringToken.line
});
}
else {
args.push(this.parseExpression());
}
} while (this.match(TokenType.COMMA));
}
this.consume(TokenType.RPAREN, "Expected ')' after print arguments");
return {
type: 'Call',
func: {
type: 'Name',
id: 'print'
},
args,
lineno: token.line
};
}
parseAssignmentOrExpression() {
const expr = this.parseExpression();
// Check for assignment
if (this.match(TokenType.ASSIGN, TokenType.PLUS_ASSIGN, TokenType.MINUS_ASSIGN, TokenType.MULTIPLY_ASSIGN, TokenType.DIVIDE_ASSIGN)) {
const operator = this.previous();
const value = this.parseExpression();
return {
type: 'Assign',
targets: [expr],
value,
operator: operator.value,
lineno: operator.line
};
}
return {
type: 'Expr',
value: expr,
lineno: expr.lineno
};
}
parseExpression() {
const expr = this.parseOr();
// Check for comma-separated expressions (tuple)
if (this.check(TokenType.COMMA)) {
const elements = [expr];
while (this.match(TokenType.COMMA)) {
elements.push(this.parseOr());
}
return {
type: 'Tuple',
elts: elements,
lineno: expr.lineno
};
}
return expr;
}
parseOr() {
let expr = this.parseAnd();
while (this.match(TokenType.OR)) {
const operator = this.previous();
const right = this.parseAnd();
expr = {
type: 'BoolOp',
op: 'Or',
values: [expr, right],
lineno: operator.line
};
}
return expr;
}
parseAnd() {
let expr = this.parseEquality();
while (this.match(TokenType.AND)) {
const operator = this.previous();
const right = this.parseEquality();
expr = {
type: 'BoolOp',
op: 'And',
values: [expr, right],
lineno: operator.line
};
}
return expr;
}
parseEquality() {
let expr = this.parseComparison();
while (this.match(TokenType.EQUAL, TokenType.NOT_EQUAL)) {
const operator = this.previous();
const right = this.parseComparison();
expr = {
type: 'Compare',
left: expr,
ops: [operator.value === '==' ? 'Eq' : 'NotEq'],
comparators: [right],
lineno: operator.line
};
}
return expr;
}
parseComparison() {
let expr = this.parseTerm();
while (this.match(TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.LESS, TokenType.LESS_EQUAL)) {
const operator = this.previous();
const right = this.parseTerm();
const opMap = {
'>': 'Gt',
'>=': 'GtE',
'<': 'Lt',
'<=': 'LtE'
};
expr = {
type: 'Compare',
left: expr,
ops: [opMap[operator.value]],
comparators: [right],
lineno: operator.line
};
}
return expr;
}
parseTerm() {
let expr = this.parseFactor();
while (this.match(TokenType.MINUS, TokenType.PLUS)) {
const operator = this.previous();
const right = this.parseFactor();
expr = {
type: 'BinOp',
left: expr,
op: operator.value === '+' ? 'Add' : 'Sub',
right,
lineno: operator.line
};
}
return expr;
}
parseFactor() {
let expr = this.parseUnary();
while (this.match(TokenType.DIVIDE, TokenType.MULTIPLY, TokenType.MODULO, TokenType.FLOOR_DIVIDE)) {
const operator = this.previous();
const right = this.parseUnary();
const opMap = {
'/': 'Div',
'*': 'Mult',
'%': 'Mod',
'//': 'FloorDiv'
};
expr = {
type: 'BinOp',
left: expr,
op: opMap[operator.value],
right,
lineno: operator.line
};
}
return expr;
}
parseUnary() {
if (this.match(TokenType.NOT, TokenType.MINUS)) {
const operator = this.previous();
const right = this.parseUnary();
return {
type: 'UnaryOp',
op: operator.value === 'not' ? 'Not' : 'USub',
operand: right,
lineno: operator.line
};
}
return this.parsePower();
}
parsePower() {
let expr = this.parseCall();
if (this.match(TokenType.POWER)) {
const operator = this.previous();
const right = this.parseUnary(); // Right associative
expr = {
type: 'BinOp',
left: expr,
op: 'Pow',
right,
lineno: operator.line
};
}
return expr;
}
parseCall() {
let expr = this.parsePrimary();
// eslint-disable-next-line no-constant-condition
while (true) {
if (this.match(TokenType.LPAREN)) {
expr = this.finishCall(expr);
}
else if (this.match(TokenType.LBRACKET)) {
expr = this.finishSubscript(expr);
}
else if (this.match(TokenType.DOT)) {
expr = this.finishAttribute(expr);
}
else {
break;
}
}
return expr;
}
finishCall(callee) {
const args = [];
if (!this.check(TokenType.RPAREN)) {
do {
args.push(this.parseOr());
} while (this.match(TokenType.COMMA));
}
this.consume(TokenType.RPAREN, "Expected ')' after arguments");
return {
type: 'Call',
func: callee,
args,
lineno: callee.lineno
};
}
finishSubscript(value) {
const slice = this.parseExpression();
this.consume(TokenType.RBRACKET, "Expected ']' after subscript");
return {
type: 'Subscript',
value,
slice,
lineno: value.lineno
};
}
finishAttribute(value) {
const attr = this.consume(TokenType.IDENTIFIER, "Expected attribute name after '.'");
return {
type: 'Attribute',
value,
attr: attr.value,
lineno: value.lineno
};
}
parsePrimary() {
if (this.match(TokenType.TRUE)) {
return {
type: 'Constant',
value: true,
lineno: this.previous().line
};
}
if (this.match(TokenType.FALSE)) {
return {
type: 'Constant',
value: false,
lineno: this.previous().line
};
}
if (this.match(TokenType.NONE)) {
return {
type: 'Constant',
value: null,
lineno: this.previous().line
};
}
if (this.match(TokenType.NUMBER)) {
const value = this.previous().value;
return {
type: 'Constant',
value: value.includes('.') ? parseFloat(value) : parseInt(value),
lineno: this.previous().line
};
}
if (this.match(TokenType.STRING)) {
return {
type: 'Constant',
value: this.previous().value,
lineno: this.previous().line
};
}
if (this.match(TokenType.IDENTIFIER, TokenType.PRINT, TokenType.INPUT)) {
return {
type: 'Name',
id: this.previous().value,
lineno: this.previous().line
};
}
if (this.match(TokenType.LPAREN)) {
const expr = this.parseExpression();
this.consume(TokenType.RPAREN, "Expected ')' after expression");
return expr;
}
if (this.match(TokenType.LBRACKET)) {
const elements = [];
if (!this.check(TokenType.RBRACKET)) {
do {
elements.push(this.parseExpression());
} while (this.match(TokenType.COMMA));
}
this.consume(TokenType.RBRACKET, "Expected ']' after list elements");
return {
type: 'List',
elts: elements,
lineno: this.previous().line
};
}
throw new Error(`Unexpected token: ${this.peek().value} at line ${this.peek().line}`);
}
/** Parse an indented block of statements */
parseBlock() {
const statements = [];
this.skipNewlines();
if (this.match(TokenType.INDENT)) {
while (!this.check(TokenType.DEDENT) && !this.isAtEnd()) {
this.skipNewlines();
if (this.check(TokenType.DEDENT) || this.isAtEnd()) {
break;
}
const stmt = this.parseStatement();
if (stmt) {
statements.push(stmt);
}
this.skipNewlines();
}
if (this.match(TokenType.DEDENT)) {
// Successfully consumed DEDENT
}
}
return statements;
}
skipNewlines() {
while (this.check(TokenType.NEWLINE)) {
this.advance();
}
}
// Helper methods
match(...types) {
for (const type of types) {
if (this.check(type)) {
this.advance();
return true;
}
}
return false;
}
check(type) {
if (this.isAtEnd())
return false;
return this.peek().type === type;
}
advance() {
if (!this.isAtEnd())
this.current++;
return this.previous();
}
isAtEnd() {
return this.peek().type === TokenType.EOF;
}
peek() {
return this.tokens[this.current];
}
previous() {
return this.tokens[this.current - 1];
}
consume(type, message) {
if (this.check(type))
return this.advance();
const current = this.peek();
throw new Error(`${message}. Got '${current.value}' at line ${current.line}`);
}
}
//# sourceMappingURL=ast-parser.js.map