UNPKG

python2ib

Version:

Convert Python code to IB Pseudocode format

918 lines 30.3 kB
/** * Python AST parser using a simple recursive descent approach * This is a simplified parser for educational Python constructs */ /** Token types for lexical analysis */ export var TokenType; (function (TokenType) { // Literals TokenType["NUMBER"] = "NUMBER"; TokenType["STRING"] = "STRING"; TokenType["IDENTIFIER"] = "IDENTIFIER"; // Keywords TokenType["IF"] = "IF"; TokenType["ELIF"] = "ELIF"; TokenType["ELSE"] = "ELSE"; TokenType["WHILE"] = "WHILE"; TokenType["FOR"] = "FOR"; TokenType["DEF"] = "DEF"; TokenType["RETURN"] = "RETURN"; TokenType["PRINT"] = "PRINT"; TokenType["INPUT"] = "INPUT"; TokenType["IN"] = "IN"; TokenType["AND"] = "AND"; TokenType["OR"] = "OR"; TokenType["NOT"] = "NOT"; TokenType["TRUE"] = "TRUE"; TokenType["FALSE"] = "FALSE"; TokenType["NONE"] = "NONE"; // Operators TokenType["ASSIGN"] = "ASSIGN"; TokenType["PLUS"] = "PLUS"; TokenType["MINUS"] = "MINUS"; TokenType["MULTIPLY"] = "MULTIPLY"; TokenType["DIVIDE"] = "DIVIDE"; TokenType["FLOOR_DIVIDE"] = "FLOOR_DIVIDE"; TokenType["MODULO"] = "MODULO"; TokenType["POWER"] = "POWER"; TokenType["EQUAL"] = "EQUAL"; TokenType["NOT_EQUAL"] = "NOT_EQUAL"; TokenType["LESS"] = "LESS"; TokenType["LESS_EQUAL"] = "LESS_EQUAL"; TokenType["GREATER"] = "GREATER"; TokenType["GREATER_EQUAL"] = "GREATER_EQUAL"; // Compound assignment TokenType["PLUS_ASSIGN"] = "PLUS_ASSIGN"; TokenType["MINUS_ASSIGN"] = "MINUS_ASSIGN"; TokenType["MULTIPLY_ASSIGN"] = "MULTIPLY_ASSIGN"; TokenType["DIVIDE_ASSIGN"] = "DIVIDE_ASSIGN"; // Delimiters TokenType["LPAREN"] = "LPAREN"; TokenType["RPAREN"] = "RPAREN"; TokenType["LBRACKET"] = "LBRACKET"; TokenType["RBRACKET"] = "RBRACKET"; TokenType["COMMA"] = "COMMA"; TokenType["COLON"] = "COLON"; TokenType["DOT"] = "DOT"; // Special TokenType["NEWLINE"] = "NEWLINE"; TokenType["INDENT"] = "INDENT"; TokenType["DEDENT"] = "DEDENT"; TokenType["COMMENT"] = "COMMENT"; TokenType["EOF"] = "EOF"; })(TokenType || (TokenType = {})); /** Simple lexer for Python code */ export class PythonLexer { code; position = 0; line = 1; column = 1; tokens = []; indentStack = [0]; // Track indentation levels constructor(code) { this.code = code; } /** Tokenize the Python code */ tokenize() { this.tokens = []; this.position = 0; this.line = 1; this.column = 1; let atLineStart = true; while (this.position < this.code.length) { // Handle indentation at the start of lines if (atLineStart) { this.handleIndentation(); atLineStart = false; } this.skipWhitespace(); if (this.position >= this.code.length) break; const char = this.code[this.position]; // Comments if (char === '#') { this.readComment(); continue; } // Newlines if (char === '\n') { this.addToken(TokenType.NEWLINE, char); this.advance(); this.line++; this.column = 1; atLineStart = true; continue; } // Numbers if (this.isDigit(char)) { this.readNumber(); continue; } // Strings if (char === '"' || char === "'") { this.readString(); continue; } // Identifiers and keywords if (this.isAlpha(char) || char === '_') { this.readIdentifier(); continue; } // Two-character operators if (this.position + 1 < this.code.length) { const twoChar = this.code.substr(this.position, 2); const tokenType = this.getTwoCharOperator(twoChar); if (tokenType) { this.addToken(tokenType, twoChar); this.advance(2); continue; } } // Single-character operators and delimiters const tokenType = this.getSingleCharOperator(char); if (tokenType) { this.addToken(tokenType, char); this.advance(); continue; } // Unknown character throw new Error(`Unexpected character '${char}' at line ${this.line}, column ${this.column}`); } // Add DEDENT tokens for any remaining indentation at end of file while (this.indentStack.length > 1) { this.indentStack.pop(); this.addToken(TokenType.DEDENT, ''); } this.addToken(TokenType.EOF, ''); return this.tokens; } advance(count = 1) { this.position += count; this.column += count; } addToken(type, value) { this.tokens.push({ type, value, line: this.line, column: this.column - value.length }); } skipWhitespace() { while (this.position < this.code.length && (this.code[this.position] === ' ' || this.code[this.position] === '\t')) { this.advance(); } } handleIndentation() { let indentLevel = 0; const start = this.position; // Count spaces and tabs at the beginning of the line while (this.position < this.code.length && (this.code[this.position] === ' ' || this.code[this.position] === '\t')) { if (this.code[this.position] === ' ') { indentLevel++; } else if (this.code[this.position] === '\t') { indentLevel += 4; // Treat tab as 4 spaces } this.advance(); } // Skip empty lines and comment-only lines if (this.position >= this.code.length || this.code[this.position] === '\n' || this.code[this.position] === '#') { return; } const currentIndent = this.indentStack[this.indentStack.length - 1]; if (indentLevel > currentIndent) { // Increased indentation - add INDENT token this.indentStack.push(indentLevel); const indentText = this.code.substring(start, this.position); this.addToken(TokenType.INDENT, indentText); } else if (indentLevel < currentIndent) { // Decreased indentation - add DEDENT tokens while (this.indentStack.length > 1 && this.indentStack[this.indentStack.length - 1] > indentLevel) { this.indentStack.pop(); this.addToken(TokenType.DEDENT, ''); } // Check if indentation level matches any previous level if (this.indentStack[this.indentStack.length - 1] !== indentLevel) { throw new Error(`Indentation error at line ${this.line}: unmatched indentation level`); } } // If indentLevel === currentIndent, no change needed } readComment() { const start = this.position; while (this.position < this.code.length && this.code[this.position] !== '\n') { this.advance(); } const value = this.code.substring(start + 1, this.position).trim(); // Skip '#' and trim whitespace this.addToken(TokenType.COMMENT, value); } readNumber() { const start = this.position; while (this.position < this.code.length && (this.isDigit(this.code[this.position]) || this.code[this.position] === '.')) { this.advance(); } const value = this.code.substring(start, this.position); this.addToken(TokenType.NUMBER, value); } readString() { const quote = this.code[this.position]; this.advance(); // Skip opening quote const start = this.position; while (this.position < this.code.length && this.code[this.position] !== quote) { if (this.code[this.position] === '\\') { this.advance(2); // Skip escape sequence } else { this.advance(); } } if (this.position >= this.code.length) { throw new Error(`Unterminated string at line ${this.line}`); } const value = this.code.substring(start, this.position); this.advance(); // Skip closing quote this.addToken(TokenType.STRING, value); } readIdentifier() { const start = this.position; while (this.position < this.code.length && (this.isAlphaNumeric(this.code[this.position]) || this.code[this.position] === '_')) { this.advance(); } const value = this.code.substring(start, this.position); const tokenType = this.getKeywordType(value) || TokenType.IDENTIFIER; this.addToken(tokenType, value); } isDigit(char) { return char >= '0' && char <= '9'; } isAlpha(char) { return (char >= 'a' && char <= 'z') || (char >= 'A' && char <= 'Z'); } isAlphaNumeric(char) { return this.isAlpha(char) || this.isDigit(char); } getKeywordType(value) { const keywords = { 'if': TokenType.IF, 'elif': TokenType.ELIF, 'else': TokenType.ELSE, 'while': TokenType.WHILE, 'for': TokenType.FOR, 'def': TokenType.DEF, 'return': TokenType.RETURN, 'print': TokenType.PRINT, 'input': TokenType.INPUT, 'in': TokenType.IN, 'and': TokenType.AND, 'or': TokenType.OR, 'not': TokenType.NOT, 'True': TokenType.TRUE, 'False': TokenType.FALSE, 'None': TokenType.NONE }; return keywords[value] || null; } getTwoCharOperator(value) { const operators = { '==': TokenType.EQUAL, '!=': TokenType.NOT_EQUAL, '<=': TokenType.LESS_EQUAL, '>=': TokenType.GREATER_EQUAL, '//': TokenType.FLOOR_DIVIDE, '**': TokenType.POWER, '+=': TokenType.PLUS_ASSIGN, '-=': TokenType.MINUS_ASSIGN, '*=': TokenType.MULTIPLY_ASSIGN, '/=': TokenType.DIVIDE_ASSIGN }; return operators[value] || null; } getSingleCharOperator(char) { const operators = { '=': TokenType.ASSIGN, '+': TokenType.PLUS, '-': TokenType.MINUS, '*': TokenType.MULTIPLY, '/': TokenType.DIVIDE, '%': TokenType.MODULO, '<': TokenType.LESS, '>': TokenType.GREATER, '(': TokenType.LPAREN, ')': TokenType.RPAREN, '[': TokenType.LBRACKET, ']': TokenType.RBRACKET, ',': TokenType.COMMA, ':': TokenType.COLON, '.': TokenType.DOT }; return operators[char] || null; } } /** Simple AST parser for Python */ export class ASTParser { tokens = []; current = 0; /** Parse Python code to AST */ async parse(code) { return this.parseSync(code); } /** Parse Python code to AST synchronously */ parseSync(code) { const lexer = new PythonLexer(code); this.tokens = lexer.tokenize(); this.current = 0; return this.parseProgram(); } parseProgram() { const statements = []; while (!this.isAtEnd()) { if (this.check(TokenType.NEWLINE)) { this.advance(); continue; } const stmt = this.parseStatement(); if (stmt) { statements.push(stmt); } } return { type: 'Module', body: statements }; } parseStatement() { if (this.check(TokenType.COMMENT)) { return this.parseComment(); } if (this.check(TokenType.IF)) { return this.parseIf(); } if (this.check(TokenType.WHILE)) { return this.parseWhile(); } if (this.check(TokenType.FOR)) { return this.parseFor(); } if (this.check(TokenType.DEF)) { return this.parseFunction(); } if (this.check(TokenType.RETURN)) { return this.parseReturn(); } if (this.check(TokenType.PRINT)) { return this.parsePrint(); } // Try assignment or expression return this.parseAssignmentOrExpression(); } parseComment() { const token = this.advance(); return { type: 'Comment', value: token.value, lineno: token.line }; } parseIf() { this.consume(TokenType.IF, "Expected 'if'"); const test = this.parseExpression(); this.consume(TokenType.COLON, "Expected ':' after if condition"); // Skip newlines while (this.check(TokenType.NEWLINE)) { this.advance(); } // Parse body (indented block) const body = this.parseBlock(); // Parse elif/else const orelse = []; while (this.check(TokenType.ELIF)) { this.advance(); // consume 'elif' const elifTest = this.parseExpression(); this.consume(TokenType.COLON, "Expected ':' after elif condition"); // Skip newlines while (this.check(TokenType.NEWLINE)) { this.advance(); } const elifBody = this.parseBlock(); orelse.push({ type: 'If', test: elifTest, body: elifBody, orelse: [], lineno: this.previous().line }); } if (this.check(TokenType.ELSE)) { this.advance(); // consume 'else' this.consume(TokenType.COLON, "Expected ':' after else"); // Skip newlines while (this.check(TokenType.NEWLINE)) { this.advance(); } const elseBody = this.parseBlock(); // Create an else node instead of spreading the body if (elseBody.length > 0) { orelse.push({ type: 'Else', body: elseBody, lineno: this.previous().line }); } } return { type: 'If', test, body, orelse, lineno: this.previous().line }; } parseWhile() { this.consume(TokenType.WHILE, "Expected 'while'"); const test = this.parseExpression(); this.consume(TokenType.COLON, "Expected ':' after while condition"); // Parse body statements const body = []; this.skipNewlines(); // Skip any newlines after colon // Parse indented block if (this.check(TokenType.INDENT)) { this.advance(); // consume INDENT while (!this.check(TokenType.DEDENT) && !this.isAtEnd()) { if (this.check(TokenType.NEWLINE)) { this.advance(); continue; } const stmt = this.parseStatement(); if (stmt) { body.push(stmt); } } if (this.check(TokenType.DEDENT)) { this.advance(); // consume DEDENT } } return { type: 'While', test, body, lineno: this.previous().line }; } parseFor() { this.consume(TokenType.FOR, "Expected 'for'"); const target = this.parseExpression(); this.consume(TokenType.IN, "Expected 'in' in for loop"); const iter = this.parseExpression(); this.consume(TokenType.COLON, "Expected ':' after for clause"); // Parse body statements const body = []; this.skipNewlines(); // Skip any newlines after colon // Parse indented block if (this.check(TokenType.INDENT)) { this.advance(); // consume INDENT while (!this.check(TokenType.DEDENT) && !this.isAtEnd()) { if (this.check(TokenType.NEWLINE)) { this.advance(); continue; } const stmt = this.parseStatement(); if (stmt) { body.push(stmt); } } if (this.check(TokenType.DEDENT)) { this.advance(); // consume DEDENT } } return { type: 'For', target, iter, body, lineno: this.previous().line }; } parseFunction() { this.consume(TokenType.DEF, "Expected 'def'"); const name = this.consume(TokenType.IDENTIFIER, "Expected function name").value; this.consume(TokenType.LPAREN, "Expected '(' after function name"); const args = []; if (!this.check(TokenType.RPAREN)) { do { args.push(this.consume(TokenType.IDENTIFIER, "Expected parameter name").value); } while (this.match(TokenType.COMMA)); } this.consume(TokenType.RPAREN, "Expected ')' after parameters"); this.consume(TokenType.COLON, "Expected ':' after function signature"); const body = this.parseBlock(); return { type: 'FunctionDef', name, args: { args }, body, lineno: this.previous().line }; } parseReturn() { const token = this.advance(); let value = null; if (!this.check(TokenType.NEWLINE) && !this.isAtEnd()) { value = this.parseExpression(); } return { type: 'Return', value, lineno: token.line }; } parsePrint() { const token = this.advance(); // consume PRINT this.consume(TokenType.LPAREN, "Expected '(' after print"); const args = []; if (!this.check(TokenType.RPAREN)) { do { // Handle f-string case: f"string" if (this.check(TokenType.IDENTIFIER) && this.peek()?.value === 'f' && this.tokens[this.current + 1]?.type === TokenType.STRING) { this.advance(); // consume 'f' const stringToken = this.advance(); // consume string args.push({ type: 'JoinedStr', values: [{ type: 'Constant', value: stringToken.value, lineno: stringToken.line }], lineno: stringToken.line }); } else { args.push(this.parseExpression()); } } while (this.match(TokenType.COMMA)); } this.consume(TokenType.RPAREN, "Expected ')' after print arguments"); return { type: 'Call', func: { type: 'Name', id: 'print' }, args, lineno: token.line }; } parseAssignmentOrExpression() { const expr = this.parseExpression(); // Check for assignment if (this.match(TokenType.ASSIGN, TokenType.PLUS_ASSIGN, TokenType.MINUS_ASSIGN, TokenType.MULTIPLY_ASSIGN, TokenType.DIVIDE_ASSIGN)) { const operator = this.previous(); const value = this.parseExpression(); return { type: 'Assign', targets: [expr], value, operator: operator.value, lineno: operator.line }; } return { type: 'Expr', value: expr, lineno: expr.lineno }; } parseExpression() { const expr = this.parseOr(); // Check for comma-separated expressions (tuple) if (this.check(TokenType.COMMA)) { const elements = [expr]; while (this.match(TokenType.COMMA)) { elements.push(this.parseOr()); } return { type: 'Tuple', elts: elements, lineno: expr.lineno }; } return expr; } parseOr() { let expr = this.parseAnd(); while (this.match(TokenType.OR)) { const operator = this.previous(); const right = this.parseAnd(); expr = { type: 'BoolOp', op: 'Or', values: [expr, right], lineno: operator.line }; } return expr; } parseAnd() { let expr = this.parseEquality(); while (this.match(TokenType.AND)) { const operator = this.previous(); const right = this.parseEquality(); expr = { type: 'BoolOp', op: 'And', values: [expr, right], lineno: operator.line }; } return expr; } parseEquality() { let expr = this.parseComparison(); while (this.match(TokenType.EQUAL, TokenType.NOT_EQUAL)) { const operator = this.previous(); const right = this.parseComparison(); expr = { type: 'Compare', left: expr, ops: [operator.value === '==' ? 'Eq' : 'NotEq'], comparators: [right], lineno: operator.line }; } return expr; } parseComparison() { let expr = this.parseTerm(); while (this.match(TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.LESS, TokenType.LESS_EQUAL)) { const operator = this.previous(); const right = this.parseTerm(); const opMap = { '>': 'Gt', '>=': 'GtE', '<': 'Lt', '<=': 'LtE' }; expr = { type: 'Compare', left: expr, ops: [opMap[operator.value]], comparators: [right], lineno: operator.line }; } return expr; } parseTerm() { let expr = this.parseFactor(); while (this.match(TokenType.MINUS, TokenType.PLUS)) { const operator = this.previous(); const right = this.parseFactor(); expr = { type: 'BinOp', left: expr, op: operator.value === '+' ? 'Add' : 'Sub', right, lineno: operator.line }; } return expr; } parseFactor() { let expr = this.parseUnary(); while (this.match(TokenType.DIVIDE, TokenType.MULTIPLY, TokenType.MODULO, TokenType.FLOOR_DIVIDE)) { const operator = this.previous(); const right = this.parseUnary(); const opMap = { '/': 'Div', '*': 'Mult', '%': 'Mod', '//': 'FloorDiv' }; expr = { type: 'BinOp', left: expr, op: opMap[operator.value], right, lineno: operator.line }; } return expr; } parseUnary() { if (this.match(TokenType.NOT, TokenType.MINUS)) { const operator = this.previous(); const right = this.parseUnary(); return { type: 'UnaryOp', op: operator.value === 'not' ? 'Not' : 'USub', operand: right, lineno: operator.line }; } return this.parsePower(); } parsePower() { let expr = this.parseCall(); if (this.match(TokenType.POWER)) { const operator = this.previous(); const right = this.parseUnary(); // Right associative expr = { type: 'BinOp', left: expr, op: 'Pow', right, lineno: operator.line }; } return expr; } parseCall() { let expr = this.parsePrimary(); // eslint-disable-next-line no-constant-condition while (true) { if (this.match(TokenType.LPAREN)) { expr = this.finishCall(expr); } else if (this.match(TokenType.LBRACKET)) { expr = this.finishSubscript(expr); } else if (this.match(TokenType.DOT)) { expr = this.finishAttribute(expr); } else { break; } } return expr; } finishCall(callee) { const args = []; if (!this.check(TokenType.RPAREN)) { do { args.push(this.parseOr()); } while (this.match(TokenType.COMMA)); } this.consume(TokenType.RPAREN, "Expected ')' after arguments"); return { type: 'Call', func: callee, args, lineno: callee.lineno }; } finishSubscript(value) { const slice = this.parseExpression(); this.consume(TokenType.RBRACKET, "Expected ']' after subscript"); return { type: 'Subscript', value, slice, lineno: value.lineno }; } finishAttribute(value) { const attr = this.consume(TokenType.IDENTIFIER, "Expected attribute name after '.'"); return { type: 'Attribute', value, attr: attr.value, lineno: value.lineno }; } parsePrimary() { if (this.match(TokenType.TRUE)) { return { type: 'Constant', value: true, lineno: this.previous().line }; } if (this.match(TokenType.FALSE)) { return { type: 'Constant', value: false, lineno: this.previous().line }; } if (this.match(TokenType.NONE)) { return { type: 'Constant', value: null, lineno: this.previous().line }; } if (this.match(TokenType.NUMBER)) { const value = this.previous().value; return { type: 'Constant', value: value.includes('.') ? parseFloat(value) : parseInt(value), lineno: this.previous().line }; } if (this.match(TokenType.STRING)) { return { type: 'Constant', value: this.previous().value, lineno: this.previous().line }; } if (this.match(TokenType.IDENTIFIER, TokenType.PRINT, TokenType.INPUT)) { return { type: 'Name', id: this.previous().value, lineno: this.previous().line }; } if (this.match(TokenType.LPAREN)) { const expr = this.parseExpression(); this.consume(TokenType.RPAREN, "Expected ')' after expression"); return expr; } if (this.match(TokenType.LBRACKET)) { const elements = []; if (!this.check(TokenType.RBRACKET)) { do { elements.push(this.parseExpression()); } while (this.match(TokenType.COMMA)); } this.consume(TokenType.RBRACKET, "Expected ']' after list elements"); return { type: 'List', elts: elements, lineno: this.previous().line }; } throw new Error(`Unexpected token: ${this.peek().value} at line ${this.peek().line}`); } /** Parse an indented block of statements */ parseBlock() { const statements = []; this.skipNewlines(); if (this.match(TokenType.INDENT)) { while (!this.check(TokenType.DEDENT) && !this.isAtEnd()) { this.skipNewlines(); if (this.check(TokenType.DEDENT) || this.isAtEnd()) { break; } const stmt = this.parseStatement(); if (stmt) { statements.push(stmt); } this.skipNewlines(); } if (this.match(TokenType.DEDENT)) { // Successfully consumed DEDENT } } return statements; } skipNewlines() { while (this.check(TokenType.NEWLINE)) { this.advance(); } } // Helper methods match(...types) { for (const type of types) { if (this.check(type)) { this.advance(); return true; } } return false; } check(type) { if (this.isAtEnd()) return false; return this.peek().type === type; } advance() { if (!this.isAtEnd()) this.current++; return this.previous(); } isAtEnd() { return this.peek().type === TokenType.EOF; } peek() { return this.tokens[this.current]; } previous() { return this.tokens[this.current - 1]; } consume(type, message) { if (this.check(type)) return this.advance(); const current = this.peek(); throw new Error(`${message}. Got '${current.value}' at line ${current.line}`); } } //# sourceMappingURL=ast-parser.js.map