UNPKG

python2igcse

Version:

Convert Python code to IGCSE Pseudocode format

1,364 lines 51.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.PythonASTVisitor = void 0; const ir_1 = require("../types/ir"); const base_parser_1 = require("./base-parser"); const statement_visitor_1 = require("./statement-visitor"); const definition_visitor_1 = require("./definition-visitor"); /** * Visitor for converting Python AST to IR */ class PythonASTVisitor extends base_parser_1.BaseParser { constructor() { super(); this.statementVisitor = new statement_visitor_1.StatementVisitor(); this.definitionVisitor = new definition_visitor_1.DefinitionVisitor(); // Share context with visitors this.statementVisitor.setContext(this.context); this.definitionVisitor.setContext(this.context); } /** * Main parse function */ parse(source) { this.startParsing(); try { // In actual implementation, use Python AST parser // Here we provide a simplified implementation const ast = this.parseToAST(source); // Two-pass processing: first pre-register all class definitions this.preRegisterAllClasses(ast.body); // First pass: collect function call information this.collectFunctionCalls(ast); // After class definition registration, re-share latest context with visitors this.statementVisitor.setContext(this.context); this.definitionVisitor.setContext(this.context); const ir = this.visitNode(ast); // If IR is not an array, return its child elements if (ir.kind === 'compound' && ir.children) { return this.createParseResult(ir.children); } return this.createParseResult([ir]); } catch (error) { this.addError(`Parse failed: ${error instanceof Error ? error.message : 'Unknown error'}`, 'syntax_error'); // Return empty IR on error const emptyIR = (0, ir_1.createIR)('statement', '', []); return this.createParseResult([emptyIR]); } } /** * First pass: collect function call information */ collectFunctionCalls(ast) { if (ast.type === 'Call' && ast.func && ast.func.type === 'Name') { const functionName = ast.func.id; const argumentTypes = ast.args ? ast.args.map((arg) => this.inferTypeFromValue(arg)) : []; this.recordFunctionCall(functionName, argumentTypes); } // Recursively process child nodes if (ast.body && Array.isArray(ast.body)) { ast.body.forEach((child) => this.collectFunctionCalls(child)); } if (ast.children && Array.isArray(ast.children)) { ast.children.forEach((child) => this.collectFunctionCalls(child)); } if (ast.orelse && Array.isArray(ast.orelse)) { ast.orelse.forEach((child) => this.collectFunctionCalls(child)); } if (ast.value) { this.collectFunctionCalls(ast.value); } if (ast.test) { this.collectFunctionCalls(ast.test); } if (ast.iter) { this.collectFunctionCalls(ast.iter); } if (ast.args && Array.isArray(ast.args)) { ast.args.forEach((arg) => this.collectFunctionCalls(arg)); } if (ast.targets && Array.isArray(ast.targets)) { ast.targets.forEach((target) => this.collectFunctionCalls(target)); } } /** * Infer type from value */ inferTypeFromValue(node) { switch (node.type) { case 'Num': return Number.isInteger(node.n) ? 'INTEGER' : 'REAL'; case 'Constant': if (typeof node.value === 'number') { return Number.isInteger(node.value) ? 'INTEGER' : 'REAL'; } if (typeof node.value === 'string') return 'STRING'; if (typeof node.value === 'boolean') return 'BOOLEAN'; return 'STRING'; case 'Str': return 'STRING'; case 'List': return 'ARRAY'; case 'Dict': return 'ARRAY'; case 'NameConstant': return 'BOOLEAN'; case 'Name': return 'STRING'; // Default to STRING default: return 'STRING'; } } /** * Record function call */ recordFunctionCall(functionName, argumentTypes) { const existingCalls = this.context.functionCalls.get(functionName); if (existingCalls) { existingCalls.argumentTypes.push(...argumentTypes); existingCalls.callCount++; } else { this.context.functionCalls.set(functionName, { name: functionName, argumentTypes: [...argumentTypes], callCount: 1 }); } } /** * Simple AST parser (in actual implementation, use external library) */ parseToAST(source) { // In actual implementation, use python-ast or pyodide // Here we provide a simplified implementation const lines = source.split('\n'); const nodes = []; const processedLines = new Set(); let i = 0; while (i < lines.length) { if (processedLines.has(i)) { i++; continue; } const line = lines[i]; const trimmed = line.trim(); if (trimmed.startsWith('#')) { // Process comment line const commentNode = { type: 'Comment', value: trimmed.substring(1).trim(), lineno: i + 1 }; nodes.push(commentNode); processedLines.add(i); i++; } else if (trimmed) { const result = this.parseStatement(lines, i); if (result.node) { nodes.push(result.node); // Mark processed lines for (let j = i; j < result.nextIndex; j++) { processedLines.add(j); } } i = result.nextIndex; } else { processedLines.add(i); i++; } } return { type: 'Module', body: nodes }; } /** * Parse statement and its child blocks */ parseStatement(lines, startIndex) { const line = lines[startIndex]; const trimmed = line.trim(); const indent = line.length - line.trimStart().length; // Create basic statement node const node = this.parseLineToASTNode(trimmed, startIndex + 1); if (!node) { return { node: null, nextIndex: startIndex + 1 }; } // For statements ending with colon (block statements), parse child blocks if (trimmed.endsWith(':')) { const bodyNodes = []; let i = startIndex + 1; // Parse child blocks from next line while (i < lines.length) { const childLine = lines[i]; const childTrimmed = childLine.trim(); const childIndent = childLine.length - childLine.trimStart().length; // Skip empty lines and comment lines if (!childTrimmed || childTrimmed.startsWith('#')) { i++; continue; } // For IF statements, handle ELIF and ELSE statements specially if (node.type === 'If' && childIndent === indent) { if (childTrimmed.startsWith('elif ')) { // Process ELIF statement as new IF statement and add to orelse const elifResult = this.parseStatement(lines, i); if (elifResult.node) { node.orelse = [elifResult.node]; } i = elifResult.nextIndex; break; } else if (childTrimmed.startsWith('else:')) { // Process ELSE clause const elseNodes = []; i++; // Skip else line // Parse child blocks of ELSE clause while (i < lines.length) { const elseChildLine = lines[i]; const elseChildTrimmed = elseChildLine.trim(); const elseChildIndent = elseChildLine.length - elseChildLine.trimStart().length; // Skip empty lines and comment lines if (!elseChildTrimmed || elseChildTrimmed.startsWith('#')) { i++; continue; } // If indent is same or less, ELSE clause ends if (elseChildIndent <= indent) { break; } // Parse child statements of ELSE clause const elseChildResult = this.parseStatement(lines, i); if (elseChildResult.node) { elseNodes.push(elseChildResult.node); } i = elseChildResult.nextIndex; } // Set ELSE clause to node node.orelse = elseNodes; break; } } // If indent is same or less, block ends if (childIndent <= indent) { break; } // Parse child statement const childResult = this.parseStatement(lines, i); if (childResult.node) { bodyNodes.push(childResult.node); } i = childResult.nextIndex; } // Set child blocks to node if (node.type === 'If' || node.type === 'For' || node.type === 'While' || node.type === 'FunctionDef' || node.type === 'ClassDef') { node.body = bodyNodes; } else if (node.type === 'Match') { // For match statements, set case statements to cases array node.cases = bodyNodes.filter(child => child.type === 'match_case'); } else if (node.type === 'match_case') { // For case statements, set child nodes to body node.body = bodyNodes; } return { node, nextIndex: i }; } return { node, nextIndex: startIndex + 1 }; } /** * Convert single line to AST node */ parseLineToASTNode(line, lineNumber) { const trimmed = line.trim(); // Detect IF statements if (trimmed.startsWith('if ')) { return this.parseIfStatement(trimmed, lineNumber); } // Detect ELIF statements (process as IF statements) if (trimmed.startsWith('elif ')) { // Replace 'elif' with 'if' and process const ifLine = 'if ' + trimmed.substring(5); return this.parseIfStatement(ifLine, lineNumber); } // Detect FOR statements if (trimmed.startsWith('for ')) { return this.parseForStatement(trimmed, lineNumber); } // Detect WHILE statements if (trimmed.startsWith('while ')) { return this.parseWhileStatement(trimmed, lineNumber); } // Detect class definitions if (trimmed.startsWith('class ')) { return this.parseClassDef(trimmed, lineNumber); } // Detect function definitions if (trimmed.startsWith('def ')) { return this.parseFunctionDef(trimmed, lineNumber); } // Detect type-annotated assignment statements (e.g., items: list[str] = []) if (trimmed.includes(': ') && trimmed.includes(' = ')) { const colonIndex = trimmed.indexOf(': '); const equalIndex = trimmed.indexOf(' = '); // If colon comes before equals sign, it's a type-annotated assignment if (colonIndex < equalIndex) { const varName = trimmed.substring(0, colonIndex).trim(); const typeAnnotation = trimmed.substring(colonIndex + 2, equalIndex).trim(); const value = trimmed.substring(equalIndex + 3).trim(); if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(varName)) { // Process type annotation let annotation; if (typeAnnotation.includes('[')) { // For array type annotations (e.g., list[str]) annotation = { type: 'Subscript', value: { type: 'Name', id: typeAnnotation.substring(0, typeAnnotation.indexOf('[')) }, slice: { type: 'Name', id: typeAnnotation.substring(typeAnnotation.indexOf('[') + 1, typeAnnotation.indexOf(']')) } }; } else { // For simple type annotations (e.g., int, str, float, bool) annotation = { type: 'Name', id: typeAnnotation }; } return { type: 'AnnAssign', target: { type: 'Name', id: varName, ctx: 'Store' }, annotation: annotation, value: value ? this.parseExpression(value) : null, lineno: lineNumber }; } } } // Detect assignment statements if (trimmed.includes(' = ')) { // Check before and after = to determine if it's an assignment statement const equalIndex = trimmed.indexOf(' = '); const beforeEqual = trimmed.substring(0, equalIndex).trim(); const afterEqual = trimmed.substring(equalIndex + 3).trim(); // Detect array element assignment (e.g., data[1] = 100) const arrayAssignMatch = beforeEqual.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\[(.+)\]$/); if (arrayAssignMatch && afterEqual.length > 0) { const [, arrayName, indexExpr] = arrayAssignMatch; return { type: 'Assign', targets: [{ type: 'Subscript', value: { type: 'Name', id: arrayName, ctx: 'Load' }, slice: { type: 'Index', value: { type: 'Constant', value: parseInt(indexExpr), kind: null } }, ctx: 'Store' }], value: this.parseExpression(afterEqual), lineno: lineNumber }; } // Detect attribute assignment (e.g., self.name = value) const attrAssignMatch = beforeEqual.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\.([a-zA-Z_][a-zA-Z0-9_]*)$/); if (attrAssignMatch && afterEqual.length > 0) { const [, objName, attrName] = attrAssignMatch; return { type: 'Assign', targets: [{ type: 'Attribute', value: { type: 'Name', id: objName, ctx: 'Load' }, attr: attrName, ctx: 'Store' }], value: this.parseExpression(afterEqual), lineno: lineNumber }; } // If left side is a simple variable name and right side exists, it's an assignment statement if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(beforeEqual) && afterEqual.length > 0) { return this.parseAssignStatement(trimmed, lineNumber); } } // Detect print statements if (trimmed.startsWith('print(')) { return this.parsePrintStatement(trimmed, lineNumber); } // Detect function definitions if (trimmed.startsWith('def ')) { return this.parseFunctionDef(trimmed, lineNumber); } // Detect class definitions if (trimmed.startsWith('class ')) { return this.parseClassDef(trimmed, lineNumber); } // Detect return statements if (trimmed.startsWith('return')) { return this.parseReturnStatement(trimmed, lineNumber); } // Detect match statements if (trimmed.startsWith('match ')) { return this.parseMatchStatement(trimmed, lineNumber); } // Detect case statements if (trimmed.startsWith('case ')) { return this.parseCaseStatement(trimmed, lineNumber); } // Detect break statements if (trimmed === 'break') { return { type: 'Break', lineno: lineNumber }; } // Detect continue statements if (trimmed === 'continue') { return { type: 'Continue', lineno: lineNumber }; } // Detect augmented assignment statements (+=, -=, *=, /=, %=) if (/^[a-zA-Z_][a-zA-Z0-9_]*\s*[+\-*/%]=/.test(trimmed)) { return this.parseAugAssignStatement(trimmed, lineNumber); } // Detect function calls const callMatch = trimmed.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\((.*)\)$/); if (callMatch) { const funcName = callMatch[1]; const argsStr = callMatch[2]; const args = this.parseArguments(argsStr); return { type: 'Expr', lineno: lineNumber, value: { type: 'Call', func: { type: 'Name', id: funcName }, args: args } }; } // Detect attribute method calls (e.g., names.append("Alice")) const attrCallMatch = trimmed.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\.([a-zA-Z_][a-zA-Z0-9_]*)\((.*)\)$/); if (attrCallMatch) { const objName = attrCallMatch[1]; const methodName = attrCallMatch[2]; const argsStr = attrCallMatch[3]; const args = this.parseArguments(argsStr); return { type: 'Expr', lineno: lineNumber, value: { type: 'Call', func: { type: 'Attribute', value: { type: 'Name', id: objName, ctx: 'Load' }, attr: methodName, ctx: 'Load' }, args: args } }; } // Process as other expression statements return { type: 'Expr', lineno: lineNumber, value: { type: 'Call', func: { type: 'Name', id: 'unknown' }, args: [], raw: trimmed } }; } /** * Parse augmented assignment statements */ parseAugAssignStatement(line, lineNumber) { const match = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\s*([+\-*/%])=\s*(.+)$/); if (!match) { // If no match, process as regular expression return { type: 'Expr', lineno: lineNumber, value: { type: 'Call', func: { type: 'Name', id: 'unknown' }, args: [], raw: line } }; } const [, target, op, value] = match; return { type: 'AugAssign', lineno: lineNumber, target: { type: 'Name', id: target }, op: { type: this.getAugAssignOpType(op) }, value: { type: 'Num', n: isNaN(Number(value)) ? value : Number(value), raw: value } }; } /** * Get augmented assignment operator type */ getAugAssignOpType(op) { switch (op) { case '+': return 'Add'; case '-': return 'Sub'; case '*': return 'Mult'; case '/': return 'Div'; case '%': return 'Mod'; default: return 'Add'; } } parseIfStatement(line, lineNumber) { // Parse "if condition:" format const match = line.match(/^if\s+(.+):\s*$/); const condition = match ? match[1] : line.substring(3, line.length - 1); // Parse condition expression const testNode = this.parseCondition(condition); return { type: 'If', lineno: lineNumber, test: testNode, body: [], orelse: [] }; } /** * Parse condition expression */ parseCondition(condition) { // Parse equality comparison (variable == value) const eqMatch = condition.match(/^(\w+)\s*==\s*(.+)$/); if (eqMatch) { const [, variable, value] = eqMatch; return { type: 'Compare', left: { type: 'Name', id: variable }, ops: [{ type: 'Eq' }], comparators: [this.parseExpression(value.trim())], raw: condition }; } // Parse inequality comparison (variable != value) const neMatch = condition.match(/^(\w+)\s*!=\s*(.+)$/); if (neMatch) { const [, variable, value] = neMatch; return { type: 'Compare', left: { type: 'Name', id: variable }, ops: [{ type: 'NotEq' }], comparators: [this.parseExpression(value.trim())], raw: condition }; } // Other comparison operators const compMatch = condition.match(/^(\w+)\s*(<=|>=|<|>)\s*(.+)$/); if (compMatch) { const [, variable, op, value] = compMatch; const opType = this.getComparisonOpType(op); return { type: 'Compare', left: { type: 'Name', id: variable }, ops: [{ type: opType }], comparators: [this.parseExpression(value.trim())], raw: condition }; } // For simple variables or expressions return { type: 'Name', id: condition, raw: condition }; } /** * Convert comparison operator string to AST type */ getComparisonOpType(op) { switch (op) { case '==': return 'Eq'; case '!=': return 'NotEq'; case '<': return 'Lt'; case '<=': return 'LtE'; case '>': return 'Gt'; case '>=': return 'GtE'; default: return 'Eq'; } } parseForStatement(line, lineNumber) { // Parse "for var in iterable:" format const match = line.match(/^for\s+(\w+)\s+in\s+(.+):\s*$/); const target = match ? match[1] : 'i'; const iter = match ? match[2] : 'range(1)'; // Parse range function arguments let args = []; if (iter.startsWith('range(') && iter.endsWith(')')) { const argsStr = iter.slice(6, -1); // Remove "range(" and ")" if (argsStr.trim()) { const argParts = argsStr.split(',').map(arg => arg.trim()); args = argParts.map(arg => ({ type: 'Num', n: isNaN(Number(arg)) ? arg : Number(arg), raw: arg })); } } // For direct iteration over arrays or lists if (!iter.startsWith('range(')) { return { type: 'For', lineno: lineNumber, target: { type: 'Name', id: target }, iter: { type: 'Name', id: iter }, body: [], orelse: [] }; } return { type: 'For', lineno: lineNumber, target: { type: 'Name', id: target }, iter: { type: 'Call', func: { type: 'Name', id: 'range' }, args: args, raw: iter }, body: [], orelse: [] }; } parseWhileStatement(line, lineNumber) { // Parse "while condition:" format const match = line.match(/^while\s+(.+):\s*$/); const condition = match ? match[1] : line.substring(6, line.length - 1); return { type: 'While', lineno: lineNumber, test: { type: 'Compare', raw: condition }, body: [], orelse: [] }; } parseMatchStatement(line, lineNumber) { // Parse "match subject:" format const match = line.match(/^match\s+(.+):\s*$/); const subject = match ? match[1] : line.substring(6, line.length - 1); return { type: 'Match', lineno: lineNumber, subject: { type: 'Name', id: subject, ctx: 'Load' }, cases: [] }; } parseCaseStatement(line, lineNumber) { // Parse "case pattern:" format const match = line.match(/^case\s+(.+):\s*$/); const pattern = match ? match[1] : line.substring(5, line.length - 1); // For wildcard pattern (_) if (pattern === '_') { return { type: 'match_case', lineno: lineNumber, pattern: { type: 'MatchAs', pattern: null, name: null }, guard: null, body: [] }; } // For value patterns return { type: 'match_case', lineno: lineNumber, pattern: { type: 'MatchValue', value: this.parseExpression(pattern) }, guard: null, body: [] }; } parseAssignStatement(line, lineNumber) { // Parse "var = value" format const parts = line.split(' = '); const target = parts[0].trim(); let value = parts.slice(1).join(' = ').trim(); // Extract inline comment part (after #) let inlineComment = ''; const commentIndex = value.indexOf('#'); if (commentIndex !== -1) { inlineComment = value.substring(commentIndex + 1).trim(); value = value.substring(0, commentIndex).trim(); } // Detect array literals if (value.startsWith('[') && value.endsWith(']')) { const elementsStr = value.slice(1, -1).trim(); const elements = elementsStr ? elementsStr.split(',').map(elem => { const trimmed = elem.trim(); // Check if it's a number if (/^-?\d+(\.\d+)?$/.test(trimmed)) { return { type: 'Num', n: parseFloat(trimmed) }; } else if (trimmed.startsWith('"') && trimmed.endsWith('"')) { return { type: 'Str', s: trimmed.slice(1, -1) }; } else if (trimmed.startsWith("'") && trimmed.endsWith("'")) { return { type: 'Str', s: trimmed.slice(1, -1) }; } else { return { type: 'Name', id: trimmed }; } }) : []; return { type: 'Assign', lineno: lineNumber, targets: [{ type: 'Name', id: target }], value: { type: 'List', elts: elements } }; } // Detect array access (e.g., my_array[0]) const arrayAccessMatch = value.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\[(\d+)\]$/); if (arrayAccessMatch) { const [, arrayName, indexStr] = arrayAccessMatch; return { type: 'Assign', lineno: lineNumber, targets: [{ type: 'Name', id: target }], value: { type: 'Subscript', value: { type: 'Name', id: arrayName }, slice: { type: 'Num', n: parseInt(indexStr) } } }; } // Detect expressions containing comparison operators const valueNode = this.parseExpression(value); const assignNode = { type: 'Assign', lineno: lineNumber, targets: [{ type: 'Name', id: target }], value: valueNode }; if (inlineComment) { assignNode.inlineComment = inlineComment; } return assignNode; } /** * Parse expression and convert to AST node */ parseExpression(expr) { const trimmed = expr.trim(); // Detect empty list if (trimmed === '[]') { return { type: 'List', elts: [], ctx: 'Load' }; } // Detect list literals if (trimmed.startsWith('[') && trimmed.endsWith(']')) { const content = trimmed.slice(1, -1).trim(); if (!content) { return { type: 'List', elts: [], ctx: 'Load' }; } // Parse list elements const elements = content.split(',').map(elem => { const elemTrimmed = elem.trim(); // Detect numbers if (/^\d+$/.test(elemTrimmed)) { return { type: 'Constant', value: parseInt(elemTrimmed), kind: null }; } // Detect strings if ((elemTrimmed.startsWith('"') && elemTrimmed.endsWith('"')) || (elemTrimmed.startsWith("'") && elemTrimmed.endsWith("'"))) { return { type: 'Constant', value: elemTrimmed.slice(1, -1), kind: null }; } // Detect variable names if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(elemTrimmed)) { return { type: 'Name', id: elemTrimmed, ctx: 'Load' }; } // Other expressions return { type: 'Name', id: elemTrimmed, ctx: 'Load' }; }); return { type: 'List', elts: elements, ctx: 'Load' }; } // Detect NOT operator (highest priority) if (trimmed.startsWith('not ')) { const operand = trimmed.substring(4).trim(); return { type: 'UnaryOp', op: { type: 'Not' }, operand: this.parseExpression(operand) }; } // Handle parenthesized expressions if (trimmed.startsWith('(') && trimmed.endsWith(')')) { const innerExpr = trimmed.slice(1, -1); const innerNode = this.parseExpression(innerExpr); // Explicitly mark as parenthesized expression return { type: 'Expr', value: innerNode, parenthesized: true }; } // Detect comparison operators const compareOps = ['==', '!=', '<=', '>=', '<', '>']; for (const op of compareOps) { const index = trimmed.indexOf(op); if (index !== -1) { const left = trimmed.substring(0, index).trim(); const right = trimmed.substring(index + op.length).trim(); return { type: 'Compare', left: this.parseSimpleExpression(left), ops: [this.getCompareOpNode(op)], comparators: [this.parseSimpleExpression(right)] }; } } // Detect logical operators if (trimmed.includes(' and ')) { const parts = trimmed.split(' and '); return { type: 'BoolOp', op: { type: 'And' }, values: parts.map(part => this.parseExpression(part.trim())) }; } if (trimmed.includes(' or ')) { const parts = trimmed.split(' or '); return { type: 'BoolOp', op: { type: 'Or' }, values: parts.map(part => this.parseExpression(part.trim())) }; } // Detect method calls (e.g., text.upper()) const methodCallMatch = trimmed.match(/^(.+)\.([a-zA-Z_][a-zA-Z0-9_]*)\((.*)\)$/); if (methodCallMatch) { const [, objectExpr, methodName, argsStr] = methodCallMatch; const args = this.parseArguments(argsStr); return { type: 'Call', func: { type: 'Attribute', value: this.parseSimpleExpression(objectExpr), attr: methodName, ctx: 'Load' }, args: args }; } // Detect function calls const callMatch = trimmed.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\((.*)\)$/); if (callMatch) { const [, funcName, argsStr] = callMatch; const args = this.parseArguments(argsStr); return { type: 'Call', func: { type: 'Name', id: funcName }, args: args }; } // Detect arithmetic operators (detect longer operators first) const arithOps = ['//', '+', '-', '*', '/', '%']; for (const op of arithOps) { const index = trimmed.indexOf(op); if (index !== -1) { const left = trimmed.substring(0, index).trim(); const right = trimmed.substring(index + op.length).trim(); return { type: 'BinOp', left: this.parseSimpleExpression(left), op: this.getArithOpNode(op), right: this.parseSimpleExpression(right) }; } } // Process as simple expression return this.parseSimpleExpression(trimmed); } /** * Parse simple expressions (variables, literals) */ parseSimpleExpression(expr) { const trimmed = expr.trim(); // Detect attribute access (e.g., path[0].x) const attrMatch = trimmed.match(/^(.+)\.([a-zA-Z_][a-zA-Z0-9_]*)$/); if (attrMatch) { const [, valueExpr, attr] = attrMatch; return { type: 'Attribute', value: this.parseSimpleExpression(valueExpr), attr: attr, ctx: 'Load' }; } // Detect array index access (e.g., path[0]) const subscriptMatch = trimmed.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\[(.+)\]$/); if (subscriptMatch) { const [, arrayName, indexExpr] = subscriptMatch; return { type: 'Subscript', value: { type: 'Name', id: arrayName, ctx: 'Load' }, slice: this.parseSimpleExpression(indexExpr), ctx: 'Load' }; } // String literals if ((trimmed.startsWith('"') && trimmed.endsWith('"')) || (trimmed.startsWith("'") && trimmed.endsWith("'"))) { return { type: 'Str', s: trimmed.slice(1, -1) }; } // Numeric literals if (/^-?\d+(\.\d+)?$/.test(trimmed)) { return { type: 'Num', n: parseFloat(trimmed), raw: trimmed // Preserve original string representation }; } // Boolean values if (trimmed === 'True' || trimmed === 'False') { return { type: 'NameConstant', value: trimmed === 'True' }; } // Variable names return { type: 'Name', id: trimmed }; } /** * Get AST node for comparison operators */ getCompareOpNode(op) { switch (op) { case '==': return { type: 'Eq' }; case '!=': return { type: 'NotEq' }; case '<': return { type: 'Lt' }; case '<=': return { type: 'LtE' }; case '>': return { type: 'Gt' }; case '>=': return { type: 'GtE' }; default: return { type: 'Eq' }; } } /** * Get AST node for arithmetic operators */ getArithOpNode(op) { switch (op) { case '+': return { type: 'Add' }; case '-': return { type: 'Sub' }; case '*': return { type: 'Mult' }; case '/': return { type: 'Div' }; case '//': return { type: 'FloorDiv' }; case '%': return { type: 'Mod' }; default: return { type: 'Add' }; } } parsePrintStatement(line, lineNumber) { // Parse "print(...)" format const match = line.match(/^print\((.*)\)\s*$/); const argsStr = match ? match[1] : ''; // Parse arguments appropriately const args = this.parseArguments(argsStr); return { type: 'Expr', lineno: lineNumber, value: { type: 'Call', func: { type: 'Name', id: 'print' }, args: args } }; } /** * Convert AST node to IR */ visitNode(node) { if (!node) { return (0, ir_1.createIR)('statement', '', []); } // Set visitNode method to visitor this.statementVisitor.visitNode = this.visitNode.bind(this); this.definitionVisitor.visitNode = this.visitNode.bind(this); switch (node.type) { case 'Module': return this.visitModule(node); // Delegate statement processing case 'Assign': return this.statementVisitor.visitAssign(node); case 'AugAssign': return this.statementVisitor.visitAugAssign(node); case 'AnnAssign': return this.statementVisitor.visitAnnAssign(node); case 'If': return this.statementVisitor.visitIf(node); case 'For': return this.statementVisitor.visitFor(node); case 'While': return this.statementVisitor.visitWhile(node); case 'Return': return this.statementVisitor.visitReturn(node); case 'Call': return this.statementVisitor.visitCall(node); case 'Expr': return this.statementVisitor.visitExpr(node); case 'Comment': return this.statementVisitor.visitComment(node); case 'Pass': return this.statementVisitor.visitPass(node); case 'Break': return this.statementVisitor.visitBreak(node); case 'Continue': return this.statementVisitor.visitContinue(node); case 'Import': case 'ImportFrom': return this.statementVisitor.visitImport(node); case 'Try': return this.statementVisitor.visitTry(node); case 'Raise': return this.statementVisitor.visitRaise(node); case 'With': return this.statementVisitor.visitWith(node); case 'Assert': return this.statementVisitor.visitAssert(node); case 'Global': case 'Nonlocal': return this.statementVisitor.visitGlobal(node); case 'Delete': return this.statementVisitor.visitDelete(node); case 'Match': return this.statementVisitor.visitMatch(node); // Delegate definition processing case 'FunctionDef': return this.definitionVisitor.visitFunctionDef(node); case 'ClassDef': // Class definitions are already registered by preRegisterAllClasses return this.definitionVisitor.visitClassDef(node); default: // For unsupported node types, output as comment return this.createIRNode('comment', `// Unsupported node type: ${node.type}`); } } visitModule(node) { const children = []; for (const child of node.body) { const childIR = this.visitNode(child); children.push(childIR); } return this.createIRNode('compound', '', children); } /** * Parse function definition */ parseFunctionDef(line, lineNumber) { // Parse "def function_name(params):" format const match = line.match(/^def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*([^:]+))?:\s*$/); if (!match) { // If no match, process as basic function definition return { type: 'FunctionDef', name: 'unknown_function', args: { args: [] }, returns: null, body: [], lineno: lineNumber }; } const [, funcName, paramsStr, returnType] = match; // Parse parameters const params = this.parseParameters(paramsStr); return { type: 'FunctionDef', name: funcName, args: { args: params }, returns: returnType ? { type: 'Name', id: returnType.trim() } : null, body: [], lineno: lineNumber }; } /** * Parse argument list */ parseArguments(argsStr) { if (!argsStr.trim()) { return []; } // Split arguments considering parentheses balance const args = this.splitArgumentsRespectingParentheses(argsStr); return args.map(arg => { const trimmed = arg.trim(); // For array access, function calls, and other complex expressions, use parseExpression if (trimmed.includes('[') || trimmed.includes('(') || trimmed.includes('.')) { return this.parseExpression(trimmed); } // For string literals if (trimmed.startsWith('"') && trimmed.endsWith('"')) { return { type: 'Str', s: trimmed.slice(1, -1) }; } // For numbers if (/^\d+$/.test(trimmed)) { return { type: 'Num', n: parseInt(trimmed) }; } // For variable names return { type: 'Name', id: trimmed }; }); } /** * Split arguments considering parentheses balance */ splitArgumentsRespectingParentheses(argsStr) { const args = []; let currentArg = ''; let parenDepth = 0; let inString = false; let stringChar = ''; for (let i = 0; i < argsStr.length; i++) { const char = argsStr[i]; if (!inString) { if (char === '"' || char === "'") { inString = true; stringChar = char; } else if (char === '(') { parenDepth++; } else if (char === ')') { parenDepth--; } else if (char === ',' && parenDepth === 0) { args.push(currentArg.trim()); currentArg = ''; continue; } } else { if (char === stringChar && (i === 0 || argsStr[i - 1] !== '\\')) { inString = false; stringChar = ''; } } currentArg += char; } if (currentArg.trim()) { args.push(currentArg.trim()); } return args; } /** * Parse parameter list */ parseParameters(paramsStr) { if (!paramsStr.trim()) { return []; } return paramsStr.split(',').map(param => { const trimmed = param.trim(); // With type annotation: "param: type" const typeMatch = trimmed.match(/^(\w+)\s*:\s*(.+)$/); if (typeMatch) { const [, paramName, paramType] = typeMatch; return { arg: paramName, annotation: { type: 'Name', id: paramType.trim() } }; } // Without type annotation return { arg: trimmed, annotation: null }; }); } /** * Parse return statement */ parseReturnStatement(line, lineNumber) { const match = line.match(/^return\s*(.*)$/); const value = match ? match[1].trim() : ''; return { type: 'Return', value: value ? this.parseExpression(value) : null, lineno: lineNumber }; } /** * Parse class definition */ parseClassDef(line, lineNumber) { const match = line.match(/^class\s+(\w+)(?:\s*\(([^)]*)\))?\s*:/); if (!match) { this.addError(`Invalid class definition: ${line}`, 'syntax_error'); return { type: 'Unknown', lineno: lineNumber }; } const [, className, baseClasses] = match; const bases = baseClasses ? baseClasses.split(',').map(base => ({ type: 'Name', id: base.trim() })) : []; return { type: 'ClassDef', name: className, bases, body: [], lineno: lineNumber }; } /** * Register class definition to context */ registerClassDefinition(node) { const className = node.name; // Extract attributes from __init__ method const constructor = node.body.find((item) => item.type === 'FunctionDef' && item.name === '__init__'); const attributes = []; if (constructor) { // Get attribute names from constructor parameters if (constructor.args && constructor.args.args) { constructor.args.args.forEach((arg) => { if (arg.arg !== 'self') { attributes.push(arg.arg); } }); } } // Extract inheritance information const bases = []; if (node.bases && node.bases.length > 0) { node.bases.forEach((base) => { if (base.type === 'Name') { bases.push(base.id); } }); } // Register to context if (!this.context.classDefinitions) { this.context.classDefinitions = {}; } this.context.classDefinitions[className] = { attribut