python2igcse
Version:
Convert Python code to IGCSE Pseudocode format
1,364 lines • 51.1 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.PythonASTVisitor = void 0;
const ir_1 = require("../types/ir");
const base_parser_1 = require("./base-parser");
const statement_visitor_1 = require("./statement-visitor");
const definition_visitor_1 = require("./definition-visitor");
/**
* Visitor for converting Python AST to IR
*/
class PythonASTVisitor extends base_parser_1.BaseParser {
constructor() {
super();
this.statementVisitor = new statement_visitor_1.StatementVisitor();
this.definitionVisitor = new definition_visitor_1.DefinitionVisitor();
// Share context with visitors
this.statementVisitor.setContext(this.context);
this.definitionVisitor.setContext(this.context);
}
/**
* Main parse function
*/
parse(source) {
this.startParsing();
try {
// In actual implementation, use Python AST parser
// Here we provide a simplified implementation
const ast = this.parseToAST(source);
// Two-pass processing: first pre-register all class definitions
this.preRegisterAllClasses(ast.body);
// First pass: collect function call information
this.collectFunctionCalls(ast);
// After class definition registration, re-share latest context with visitors
this.statementVisitor.setContext(this.context);
this.definitionVisitor.setContext(this.context);
const ir = this.visitNode(ast);
// If IR is not an array, return its child elements
if (ir.kind === 'compound' && ir.children) {
return this.createParseResult(ir.children);
}
return this.createParseResult([ir]);
}
catch (error) {
this.addError(`Parse failed: ${error instanceof Error ? error.message : 'Unknown error'}`, 'syntax_error');
// Return empty IR on error
const emptyIR = (0, ir_1.createIR)('statement', '', []);
return this.createParseResult([emptyIR]);
}
}
/**
* First pass: collect function call information
*/
collectFunctionCalls(ast) {
if (ast.type === 'Call' && ast.func && ast.func.type === 'Name') {
const functionName = ast.func.id;
const argumentTypes = ast.args ? ast.args.map((arg) => this.inferTypeFromValue(arg)) : [];
this.recordFunctionCall(functionName, argumentTypes);
}
// Recursively process child nodes
if (ast.body && Array.isArray(ast.body)) {
ast.body.forEach((child) => this.collectFunctionCalls(child));
}
if (ast.children && Array.isArray(ast.children)) {
ast.children.forEach((child) => this.collectFunctionCalls(child));
}
if (ast.orelse && Array.isArray(ast.orelse)) {
ast.orelse.forEach((child) => this.collectFunctionCalls(child));
}
if (ast.value) {
this.collectFunctionCalls(ast.value);
}
if (ast.test) {
this.collectFunctionCalls(ast.test);
}
if (ast.iter) {
this.collectFunctionCalls(ast.iter);
}
if (ast.args && Array.isArray(ast.args)) {
ast.args.forEach((arg) => this.collectFunctionCalls(arg));
}
if (ast.targets && Array.isArray(ast.targets)) {
ast.targets.forEach((target) => this.collectFunctionCalls(target));
}
}
/**
* Infer type from value
*/
inferTypeFromValue(node) {
switch (node.type) {
case 'Num':
return Number.isInteger(node.n) ? 'INTEGER' : 'REAL';
case 'Constant':
if (typeof node.value === 'number') {
return Number.isInteger(node.value) ? 'INTEGER' : 'REAL';
}
if (typeof node.value === 'string')
return 'STRING';
if (typeof node.value === 'boolean')
return 'BOOLEAN';
return 'STRING';
case 'Str':
return 'STRING';
case 'List':
return 'ARRAY';
case 'Dict':
return 'ARRAY';
case 'NameConstant':
return 'BOOLEAN';
case 'Name':
return 'STRING'; // Default to STRING
default:
return 'STRING';
}
}
/**
* Record function call
*/
recordFunctionCall(functionName, argumentTypes) {
const existingCalls = this.context.functionCalls.get(functionName);
if (existingCalls) {
existingCalls.argumentTypes.push(...argumentTypes);
existingCalls.callCount++;
}
else {
this.context.functionCalls.set(functionName, {
name: functionName,
argumentTypes: [...argumentTypes],
callCount: 1
});
}
}
/**
* Simple AST parser (in actual implementation, use external library)
*/
parseToAST(source) {
// In actual implementation, use python-ast or pyodide
// Here we provide a simplified implementation
const lines = source.split('\n');
const nodes = [];
const processedLines = new Set();
let i = 0;
while (i < lines.length) {
if (processedLines.has(i)) {
i++;
continue;
}
const line = lines[i];
const trimmed = line.trim();
if (trimmed.startsWith('#')) {
// Process comment line
const commentNode = {
type: 'Comment',
value: trimmed.substring(1).trim(),
lineno: i + 1
};
nodes.push(commentNode);
processedLines.add(i);
i++;
}
else if (trimmed) {
const result = this.parseStatement(lines, i);
if (result.node) {
nodes.push(result.node);
// Mark processed lines
for (let j = i; j < result.nextIndex; j++) {
processedLines.add(j);
}
}
i = result.nextIndex;
}
else {
processedLines.add(i);
i++;
}
}
return {
type: 'Module',
body: nodes
};
}
/**
* Parse statement and its child blocks
*/
parseStatement(lines, startIndex) {
const line = lines[startIndex];
const trimmed = line.trim();
const indent = line.length - line.trimStart().length;
// Create basic statement node
const node = this.parseLineToASTNode(trimmed, startIndex + 1);
if (!node) {
return { node: null, nextIndex: startIndex + 1 };
}
// For statements ending with colon (block statements), parse child blocks
if (trimmed.endsWith(':')) {
const bodyNodes = [];
let i = startIndex + 1;
// Parse child blocks from next line
while (i < lines.length) {
const childLine = lines[i];
const childTrimmed = childLine.trim();
const childIndent = childLine.length - childLine.trimStart().length;
// Skip empty lines and comment lines
if (!childTrimmed || childTrimmed.startsWith('#')) {
i++;
continue;
}
// For IF statements, handle ELIF and ELSE statements specially
if (node.type === 'If' && childIndent === indent) {
if (childTrimmed.startsWith('elif ')) {
// Process ELIF statement as new IF statement and add to orelse
const elifResult = this.parseStatement(lines, i);
if (elifResult.node) {
node.orelse = [elifResult.node];
}
i = elifResult.nextIndex;
break;
}
else if (childTrimmed.startsWith('else:')) {
// Process ELSE clause
const elseNodes = [];
i++; // Skip else line
// Parse child blocks of ELSE clause
while (i < lines.length) {
const elseChildLine = lines[i];
const elseChildTrimmed = elseChildLine.trim();
const elseChildIndent = elseChildLine.length - elseChildLine.trimStart().length;
// Skip empty lines and comment lines
if (!elseChildTrimmed || elseChildTrimmed.startsWith('#')) {
i++;
continue;
}
// If indent is same or less, ELSE clause ends
if (elseChildIndent <= indent) {
break;
}
// Parse child statements of ELSE clause
const elseChildResult = this.parseStatement(lines, i);
if (elseChildResult.node) {
elseNodes.push(elseChildResult.node);
}
i = elseChildResult.nextIndex;
}
// Set ELSE clause to node
node.orelse = elseNodes;
break;
}
}
// If indent is same or less, block ends
if (childIndent <= indent) {
break;
}
// Parse child statement
const childResult = this.parseStatement(lines, i);
if (childResult.node) {
bodyNodes.push(childResult.node);
}
i = childResult.nextIndex;
}
// Set child blocks to node
if (node.type === 'If' || node.type === 'For' || node.type === 'While' || node.type === 'FunctionDef' || node.type === 'ClassDef') {
node.body = bodyNodes;
}
else if (node.type === 'Match') {
// For match statements, set case statements to cases array
node.cases = bodyNodes.filter(child => child.type === 'match_case');
}
else if (node.type === 'match_case') {
// For case statements, set child nodes to body
node.body = bodyNodes;
}
return { node, nextIndex: i };
}
return { node, nextIndex: startIndex + 1 };
}
/**
* Convert single line to AST node
*/
parseLineToASTNode(line, lineNumber) {
const trimmed = line.trim();
// Detect IF statements
if (trimmed.startsWith('if ')) {
return this.parseIfStatement(trimmed, lineNumber);
}
// Detect ELIF statements (process as IF statements)
if (trimmed.startsWith('elif ')) {
// Replace 'elif' with 'if' and process
const ifLine = 'if ' + trimmed.substring(5);
return this.parseIfStatement(ifLine, lineNumber);
}
// Detect FOR statements
if (trimmed.startsWith('for ')) {
return this.parseForStatement(trimmed, lineNumber);
}
// Detect WHILE statements
if (trimmed.startsWith('while ')) {
return this.parseWhileStatement(trimmed, lineNumber);
}
// Detect class definitions
if (trimmed.startsWith('class ')) {
return this.parseClassDef(trimmed, lineNumber);
}
// Detect function definitions
if (trimmed.startsWith('def ')) {
return this.parseFunctionDef(trimmed, lineNumber);
}
// Detect type-annotated assignment statements (e.g., items: list[str] = [])
if (trimmed.includes(': ') && trimmed.includes(' = ')) {
const colonIndex = trimmed.indexOf(': ');
const equalIndex = trimmed.indexOf(' = ');
// If colon comes before equals sign, it's a type-annotated assignment
if (colonIndex < equalIndex) {
const varName = trimmed.substring(0, colonIndex).trim();
const typeAnnotation = trimmed.substring(colonIndex + 2, equalIndex).trim();
const value = trimmed.substring(equalIndex + 3).trim();
if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(varName)) {
// Process type annotation
let annotation;
if (typeAnnotation.includes('[')) {
// For array type annotations (e.g., list[str])
annotation = {
type: 'Subscript',
value: {
type: 'Name',
id: typeAnnotation.substring(0, typeAnnotation.indexOf('['))
},
slice: {
type: 'Name',
id: typeAnnotation.substring(typeAnnotation.indexOf('[') + 1, typeAnnotation.indexOf(']'))
}
};
}
else {
// For simple type annotations (e.g., int, str, float, bool)
annotation = {
type: 'Name',
id: typeAnnotation
};
}
return {
type: 'AnnAssign',
target: {
type: 'Name',
id: varName,
ctx: 'Store'
},
annotation: annotation,
value: value ? this.parseExpression(value) : null,
lineno: lineNumber
};
}
}
}
// Detect assignment statements
if (trimmed.includes(' = ')) {
// Check before and after = to determine if it's an assignment statement
const equalIndex = trimmed.indexOf(' = ');
const beforeEqual = trimmed.substring(0, equalIndex).trim();
const afterEqual = trimmed.substring(equalIndex + 3).trim();
// Detect array element assignment (e.g., data[1] = 100)
const arrayAssignMatch = beforeEqual.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\[(.+)\]$/);
if (arrayAssignMatch && afterEqual.length > 0) {
const [, arrayName, indexExpr] = arrayAssignMatch;
return {
type: 'Assign',
targets: [{
type: 'Subscript',
value: {
type: 'Name',
id: arrayName,
ctx: 'Load'
},
slice: {
type: 'Index',
value: {
type: 'Constant',
value: parseInt(indexExpr),
kind: null
}
},
ctx: 'Store'
}],
value: this.parseExpression(afterEqual),
lineno: lineNumber
};
}
// Detect attribute assignment (e.g., self.name = value)
const attrAssignMatch = beforeEqual.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\.([a-zA-Z_][a-zA-Z0-9_]*)$/);
if (attrAssignMatch && afterEqual.length > 0) {
const [, objName, attrName] = attrAssignMatch;
return {
type: 'Assign',
targets: [{
type: 'Attribute',
value: {
type: 'Name',
id: objName,
ctx: 'Load'
},
attr: attrName,
ctx: 'Store'
}],
value: this.parseExpression(afterEqual),
lineno: lineNumber
};
}
// If left side is a simple variable name and right side exists, it's an assignment statement
if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(beforeEqual) && afterEqual.length > 0) {
return this.parseAssignStatement(trimmed, lineNumber);
}
}
// Detect print statements
if (trimmed.startsWith('print(')) {
return this.parsePrintStatement(trimmed, lineNumber);
}
// Detect function definitions
if (trimmed.startsWith('def ')) {
return this.parseFunctionDef(trimmed, lineNumber);
}
// Detect class definitions
if (trimmed.startsWith('class ')) {
return this.parseClassDef(trimmed, lineNumber);
}
// Detect return statements
if (trimmed.startsWith('return')) {
return this.parseReturnStatement(trimmed, lineNumber);
}
// Detect match statements
if (trimmed.startsWith('match ')) {
return this.parseMatchStatement(trimmed, lineNumber);
}
// Detect case statements
if (trimmed.startsWith('case ')) {
return this.parseCaseStatement(trimmed, lineNumber);
}
// Detect break statements
if (trimmed === 'break') {
return {
type: 'Break',
lineno: lineNumber
};
}
// Detect continue statements
if (trimmed === 'continue') {
return {
type: 'Continue',
lineno: lineNumber
};
}
// Detect augmented assignment statements (+=, -=, *=, /=, %=)
if (/^[a-zA-Z_][a-zA-Z0-9_]*\s*[+\-*/%]=/.test(trimmed)) {
return this.parseAugAssignStatement(trimmed, lineNumber);
}
// Detect function calls
const callMatch = trimmed.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\((.*)\)$/);
if (callMatch) {
const funcName = callMatch[1];
const argsStr = callMatch[2];
const args = this.parseArguments(argsStr);
return {
type: 'Expr',
lineno: lineNumber,
value: {
type: 'Call',
func: { type: 'Name', id: funcName },
args: args
}
};
}
// Detect attribute method calls (e.g., names.append("Alice"))
const attrCallMatch = trimmed.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\.([a-zA-Z_][a-zA-Z0-9_]*)\((.*)\)$/);
if (attrCallMatch) {
const objName = attrCallMatch[1];
const methodName = attrCallMatch[2];
const argsStr = attrCallMatch[3];
const args = this.parseArguments(argsStr);
return {
type: 'Expr',
lineno: lineNumber,
value: {
type: 'Call',
func: {
type: 'Attribute',
value: {
type: 'Name',
id: objName,
ctx: 'Load'
},
attr: methodName,
ctx: 'Load'
},
args: args
}
};
}
// Process as other expression statements
return {
type: 'Expr',
lineno: lineNumber,
value: {
type: 'Call',
func: { type: 'Name', id: 'unknown' },
args: [],
raw: trimmed
}
};
}
/**
* Parse augmented assignment statements
*/
parseAugAssignStatement(line, lineNumber) {
const match = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\s*([+\-*/%])=\s*(.+)$/);
if (!match) {
// If no match, process as regular expression
return {
type: 'Expr',
lineno: lineNumber,
value: {
type: 'Call',
func: { type: 'Name', id: 'unknown' },
args: [],
raw: line
}
};
}
const [, target, op, value] = match;
return {
type: 'AugAssign',
lineno: lineNumber,
target: {
type: 'Name',
id: target
},
op: {
type: this.getAugAssignOpType(op)
},
value: {
type: 'Num',
n: isNaN(Number(value)) ? value : Number(value),
raw: value
}
};
}
/**
* Get augmented assignment operator type
*/
getAugAssignOpType(op) {
switch (op) {
case '+': return 'Add';
case '-': return 'Sub';
case '*': return 'Mult';
case '/': return 'Div';
case '%': return 'Mod';
default: return 'Add';
}
}
parseIfStatement(line, lineNumber) {
// Parse "if condition:" format
const match = line.match(/^if\s+(.+):\s*$/);
const condition = match ? match[1] : line.substring(3, line.length - 1);
// Parse condition expression
const testNode = this.parseCondition(condition);
return {
type: 'If',
lineno: lineNumber,
test: testNode,
body: [],
orelse: []
};
}
/**
* Parse condition expression
*/
parseCondition(condition) {
// Parse equality comparison (variable == value)
const eqMatch = condition.match(/^(\w+)\s*==\s*(.+)$/);
if (eqMatch) {
const [, variable, value] = eqMatch;
return {
type: 'Compare',
left: { type: 'Name', id: variable },
ops: [{ type: 'Eq' }],
comparators: [this.parseExpression(value.trim())],
raw: condition
};
}
// Parse inequality comparison (variable != value)
const neMatch = condition.match(/^(\w+)\s*!=\s*(.+)$/);
if (neMatch) {
const [, variable, value] = neMatch;
return {
type: 'Compare',
left: { type: 'Name', id: variable },
ops: [{ type: 'NotEq' }],
comparators: [this.parseExpression(value.trim())],
raw: condition
};
}
// Other comparison operators
const compMatch = condition.match(/^(\w+)\s*(<=|>=|<|>)\s*(.+)$/);
if (compMatch) {
const [, variable, op, value] = compMatch;
const opType = this.getComparisonOpType(op);
return {
type: 'Compare',
left: { type: 'Name', id: variable },
ops: [{ type: opType }],
comparators: [this.parseExpression(value.trim())],
raw: condition
};
}
// For simple variables or expressions
return {
type: 'Name',
id: condition,
raw: condition
};
}
/**
* Convert comparison operator string to AST type
*/
getComparisonOpType(op) {
switch (op) {
case '==': return 'Eq';
case '!=': return 'NotEq';
case '<': return 'Lt';
case '<=': return 'LtE';
case '>': return 'Gt';
case '>=': return 'GtE';
default: return 'Eq';
}
}
parseForStatement(line, lineNumber) {
// Parse "for var in iterable:" format
const match = line.match(/^for\s+(\w+)\s+in\s+(.+):\s*$/);
const target = match ? match[1] : 'i';
const iter = match ? match[2] : 'range(1)';
// Parse range function arguments
let args = [];
if (iter.startsWith('range(') && iter.endsWith(')')) {
const argsStr = iter.slice(6, -1); // Remove "range(" and ")"
if (argsStr.trim()) {
const argParts = argsStr.split(',').map(arg => arg.trim());
args = argParts.map(arg => ({
type: 'Num',
n: isNaN(Number(arg)) ? arg : Number(arg),
raw: arg
}));
}
}
// For direct iteration over arrays or lists
if (!iter.startsWith('range(')) {
return {
type: 'For',
lineno: lineNumber,
target: { type: 'Name', id: target },
iter: {
type: 'Name',
id: iter
},
body: [],
orelse: []
};
}
return {
type: 'For',
lineno: lineNumber,
target: { type: 'Name', id: target },
iter: {
type: 'Call',
func: { type: 'Name', id: 'range' },
args: args,
raw: iter
},
body: [],
orelse: []
};
}
parseWhileStatement(line, lineNumber) {
// Parse "while condition:" format
const match = line.match(/^while\s+(.+):\s*$/);
const condition = match ? match[1] : line.substring(6, line.length - 1);
return {
type: 'While',
lineno: lineNumber,
test: {
type: 'Compare',
raw: condition
},
body: [],
orelse: []
};
}
parseMatchStatement(line, lineNumber) {
// Parse "match subject:" format
const match = line.match(/^match\s+(.+):\s*$/);
const subject = match ? match[1] : line.substring(6, line.length - 1);
return {
type: 'Match',
lineno: lineNumber,
subject: {
type: 'Name',
id: subject,
ctx: 'Load'
},
cases: []
};
}
parseCaseStatement(line, lineNumber) {
// Parse "case pattern:" format
const match = line.match(/^case\s+(.+):\s*$/);
const pattern = match ? match[1] : line.substring(5, line.length - 1);
// For wildcard pattern (_)
if (pattern === '_') {
return {
type: 'match_case',
lineno: lineNumber,
pattern: {
type: 'MatchAs',
pattern: null,
name: null
},
guard: null,
body: []
};
}
// For value patterns
return {
type: 'match_case',
lineno: lineNumber,
pattern: {
type: 'MatchValue',
value: this.parseExpression(pattern)
},
guard: null,
body: []
};
}
parseAssignStatement(line, lineNumber) {
// Parse "var = value" format
const parts = line.split(' = ');
const target = parts[0].trim();
let value = parts.slice(1).join(' = ').trim();
// Extract inline comment part (after #)
let inlineComment = '';
const commentIndex = value.indexOf('#');
if (commentIndex !== -1) {
inlineComment = value.substring(commentIndex + 1).trim();
value = value.substring(0, commentIndex).trim();
}
// Detect array literals
if (value.startsWith('[') && value.endsWith(']')) {
const elementsStr = value.slice(1, -1).trim();
const elements = elementsStr ? elementsStr.split(',').map(elem => {
const trimmed = elem.trim();
// Check if it's a number
if (/^-?\d+(\.\d+)?$/.test(trimmed)) {
return {
type: 'Num',
n: parseFloat(trimmed)
};
}
else if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
return {
type: 'Str',
s: trimmed.slice(1, -1)
};
}
else if (trimmed.startsWith("'") && trimmed.endsWith("'")) {
return {
type: 'Str',
s: trimmed.slice(1, -1)
};
}
else {
return {
type: 'Name',
id: trimmed
};
}
}) : [];
return {
type: 'Assign',
lineno: lineNumber,
targets: [{ type: 'Name', id: target }],
value: {
type: 'List',
elts: elements
}
};
}
// Detect array access (e.g., my_array[0])
const arrayAccessMatch = value.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\[(\d+)\]$/);
if (arrayAccessMatch) {
const [, arrayName, indexStr] = arrayAccessMatch;
return {
type: 'Assign',
lineno: lineNumber,
targets: [{ type: 'Name', id: target }],
value: {
type: 'Subscript',
value: { type: 'Name', id: arrayName },
slice: { type: 'Num', n: parseInt(indexStr) }
}
};
}
// Detect expressions containing comparison operators
const valueNode = this.parseExpression(value);
const assignNode = {
type: 'Assign',
lineno: lineNumber,
targets: [{
type: 'Name',
id: target
}],
value: valueNode
};
if (inlineComment) {
assignNode.inlineComment = inlineComment;
}
return assignNode;
}
/**
* Parse expression and convert to AST node
*/
parseExpression(expr) {
const trimmed = expr.trim();
// Detect empty list
if (trimmed === '[]') {
return {
type: 'List',
elts: [],
ctx: 'Load'
};
}
// Detect list literals
if (trimmed.startsWith('[') && trimmed.endsWith(']')) {
const content = trimmed.slice(1, -1).trim();
if (!content) {
return {
type: 'List',
elts: [],
ctx: 'Load'
};
}
// Parse list elements
const elements = content.split(',').map(elem => {
const elemTrimmed = elem.trim();
// Detect numbers
if (/^\d+$/.test(elemTrimmed)) {
return {
type: 'Constant',
value: parseInt(elemTrimmed),
kind: null
};
}
// Detect strings
if ((elemTrimmed.startsWith('"') && elemTrimmed.endsWith('"')) ||
(elemTrimmed.startsWith("'") && elemTrimmed.endsWith("'"))) {
return {
type: 'Constant',
value: elemTrimmed.slice(1, -1),
kind: null
};
}
// Detect variable names
if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(elemTrimmed)) {
return {
type: 'Name',
id: elemTrimmed,
ctx: 'Load'
};
}
// Other expressions
return {
type: 'Name',
id: elemTrimmed,
ctx: 'Load'
};
});
return {
type: 'List',
elts: elements,
ctx: 'Load'
};
}
// Detect NOT operator (highest priority)
if (trimmed.startsWith('not ')) {
const operand = trimmed.substring(4).trim();
return {
type: 'UnaryOp',
op: { type: 'Not' },
operand: this.parseExpression(operand)
};
}
// Handle parenthesized expressions
if (trimmed.startsWith('(') && trimmed.endsWith(')')) {
const innerExpr = trimmed.slice(1, -1);
const innerNode = this.parseExpression(innerExpr);
// Explicitly mark as parenthesized expression
return {
type: 'Expr',
value: innerNode,
parenthesized: true
};
}
// Detect comparison operators
const compareOps = ['==', '!=', '<=', '>=', '<', '>'];
for (const op of compareOps) {
const index = trimmed.indexOf(op);
if (index !== -1) {
const left = trimmed.substring(0, index).trim();
const right = trimmed.substring(index + op.length).trim();
return {
type: 'Compare',
left: this.parseSimpleExpression(left),
ops: [this.getCompareOpNode(op)],
comparators: [this.parseSimpleExpression(right)]
};
}
}
// Detect logical operators
if (trimmed.includes(' and ')) {
const parts = trimmed.split(' and ');
return {
type: 'BoolOp',
op: { type: 'And' },
values: parts.map(part => this.parseExpression(part.trim()))
};
}
if (trimmed.includes(' or ')) {
const parts = trimmed.split(' or ');
return {
type: 'BoolOp',
op: { type: 'Or' },
values: parts.map(part => this.parseExpression(part.trim()))
};
}
// Detect method calls (e.g., text.upper())
const methodCallMatch = trimmed.match(/^(.+)\.([a-zA-Z_][a-zA-Z0-9_]*)\((.*)\)$/);
if (methodCallMatch) {
const [, objectExpr, methodName, argsStr] = methodCallMatch;
const args = this.parseArguments(argsStr);
return {
type: 'Call',
func: {
type: 'Attribute',
value: this.parseSimpleExpression(objectExpr),
attr: methodName,
ctx: 'Load'
},
args: args
};
}
// Detect function calls
const callMatch = trimmed.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\((.*)\)$/);
if (callMatch) {
const [, funcName, argsStr] = callMatch;
const args = this.parseArguments(argsStr);
return {
type: 'Call',
func: { type: 'Name', id: funcName },
args: args
};
}
// Detect arithmetic operators (detect longer operators first)
const arithOps = ['//', '+', '-', '*', '/', '%'];
for (const op of arithOps) {
const index = trimmed.indexOf(op);
if (index !== -1) {
const left = trimmed.substring(0, index).trim();
const right = trimmed.substring(index + op.length).trim();
return {
type: 'BinOp',
left: this.parseSimpleExpression(left),
op: this.getArithOpNode(op),
right: this.parseSimpleExpression(right)
};
}
}
// Process as simple expression
return this.parseSimpleExpression(trimmed);
}
/**
* Parse simple expressions (variables, literals)
*/
parseSimpleExpression(expr) {
const trimmed = expr.trim();
// Detect attribute access (e.g., path[0].x)
const attrMatch = trimmed.match(/^(.+)\.([a-zA-Z_][a-zA-Z0-9_]*)$/);
if (attrMatch) {
const [, valueExpr, attr] = attrMatch;
return {
type: 'Attribute',
value: this.parseSimpleExpression(valueExpr),
attr: attr,
ctx: 'Load'
};
}
// Detect array index access (e.g., path[0])
const subscriptMatch = trimmed.match(/^([a-zA-Z_][a-zA-Z0-9_]*)\[(.+)\]$/);
if (subscriptMatch) {
const [, arrayName, indexExpr] = subscriptMatch;
return {
type: 'Subscript',
value: {
type: 'Name',
id: arrayName,
ctx: 'Load'
},
slice: this.parseSimpleExpression(indexExpr),
ctx: 'Load'
};
}
// String literals
if ((trimmed.startsWith('"') && trimmed.endsWith('"')) ||
(trimmed.startsWith("'") && trimmed.endsWith("'"))) {
return {
type: 'Str',
s: trimmed.slice(1, -1)
};
}
// Numeric literals
if (/^-?\d+(\.\d+)?$/.test(trimmed)) {
return {
type: 'Num',
n: parseFloat(trimmed),
raw: trimmed // Preserve original string representation
};
}
// Boolean values
if (trimmed === 'True' || trimmed === 'False') {
return {
type: 'NameConstant',
value: trimmed === 'True'
};
}
// Variable names
return {
type: 'Name',
id: trimmed
};
}
/**
* Get AST node for comparison operators
*/
getCompareOpNode(op) {
switch (op) {
case '==': return { type: 'Eq' };
case '!=': return { type: 'NotEq' };
case '<': return { type: 'Lt' };
case '<=': return { type: 'LtE' };
case '>': return { type: 'Gt' };
case '>=': return { type: 'GtE' };
default: return { type: 'Eq' };
}
}
/**
* Get AST node for arithmetic operators
*/
getArithOpNode(op) {
switch (op) {
case '+': return { type: 'Add' };
case '-': return { type: 'Sub' };
case '*': return { type: 'Mult' };
case '/': return { type: 'Div' };
case '//': return { type: 'FloorDiv' };
case '%': return { type: 'Mod' };
default: return { type: 'Add' };
}
}
parsePrintStatement(line, lineNumber) {
// Parse "print(...)" format
const match = line.match(/^print\((.*)\)\s*$/);
const argsStr = match ? match[1] : '';
// Parse arguments appropriately
const args = this.parseArguments(argsStr);
return {
type: 'Expr',
lineno: lineNumber,
value: {
type: 'Call',
func: { type: 'Name', id: 'print' },
args: args
}
};
}
/**
* Convert AST node to IR
*/
visitNode(node) {
if (!node) {
return (0, ir_1.createIR)('statement', '', []);
}
// Set visitNode method to visitor
this.statementVisitor.visitNode = this.visitNode.bind(this);
this.definitionVisitor.visitNode = this.visitNode.bind(this);
switch (node.type) {
case 'Module':
return this.visitModule(node);
// Delegate statement processing
case 'Assign':
return this.statementVisitor.visitAssign(node);
case 'AugAssign':
return this.statementVisitor.visitAugAssign(node);
case 'AnnAssign':
return this.statementVisitor.visitAnnAssign(node);
case 'If':
return this.statementVisitor.visitIf(node);
case 'For':
return this.statementVisitor.visitFor(node);
case 'While':
return this.statementVisitor.visitWhile(node);
case 'Return':
return this.statementVisitor.visitReturn(node);
case 'Call':
return this.statementVisitor.visitCall(node);
case 'Expr':
return this.statementVisitor.visitExpr(node);
case 'Comment':
return this.statementVisitor.visitComment(node);
case 'Pass':
return this.statementVisitor.visitPass(node);
case 'Break':
return this.statementVisitor.visitBreak(node);
case 'Continue':
return this.statementVisitor.visitContinue(node);
case 'Import':
case 'ImportFrom':
return this.statementVisitor.visitImport(node);
case 'Try':
return this.statementVisitor.visitTry(node);
case 'Raise':
return this.statementVisitor.visitRaise(node);
case 'With':
return this.statementVisitor.visitWith(node);
case 'Assert':
return this.statementVisitor.visitAssert(node);
case 'Global':
case 'Nonlocal':
return this.statementVisitor.visitGlobal(node);
case 'Delete':
return this.statementVisitor.visitDelete(node);
case 'Match':
return this.statementVisitor.visitMatch(node);
// Delegate definition processing
case 'FunctionDef':
return this.definitionVisitor.visitFunctionDef(node);
case 'ClassDef':
// Class definitions are already registered by preRegisterAllClasses
return this.definitionVisitor.visitClassDef(node);
default:
// For unsupported node types, output as comment
return this.createIRNode('comment', `// Unsupported node type: ${node.type}`);
}
}
visitModule(node) {
const children = [];
for (const child of node.body) {
const childIR = this.visitNode(child);
children.push(childIR);
}
return this.createIRNode('compound', '', children);
}
/**
* Parse function definition
*/
parseFunctionDef(line, lineNumber) {
// Parse "def function_name(params):" format
const match = line.match(/^def\s+(\w+)\s*\(([^)]*)\)\s*(?:->\s*([^:]+))?:\s*$/);
if (!match) {
// If no match, process as basic function definition
return {
type: 'FunctionDef',
name: 'unknown_function',
args: { args: [] },
returns: null,
body: [],
lineno: lineNumber
};
}
const [, funcName, paramsStr, returnType] = match;
// Parse parameters
const params = this.parseParameters(paramsStr);
return {
type: 'FunctionDef',
name: funcName,
args: { args: params },
returns: returnType ? { type: 'Name', id: returnType.trim() } : null,
body: [],
lineno: lineNumber
};
}
/**
* Parse argument list
*/
parseArguments(argsStr) {
if (!argsStr.trim()) {
return [];
}
// Split arguments considering parentheses balance
const args = this.splitArgumentsRespectingParentheses(argsStr);
return args.map(arg => {
const trimmed = arg.trim();
// For array access, function calls, and other complex expressions, use parseExpression
if (trimmed.includes('[') || trimmed.includes('(') || trimmed.includes('.')) {
return this.parseExpression(trimmed);
}
// For string literals
if (trimmed.startsWith('"') && trimmed.endsWith('"')) {
return {
type: 'Str',
s: trimmed.slice(1, -1)
};
}
// For numbers
if (/^\d+$/.test(trimmed)) {
return {
type: 'Num',
n: parseInt(trimmed)
};
}
// For variable names
return {
type: 'Name',
id: trimmed
};
});
}
/**
* Split arguments considering parentheses balance
*/
splitArgumentsRespectingParentheses(argsStr) {
const args = [];
let currentArg = '';
let parenDepth = 0;
let inString = false;
let stringChar = '';
for (let i = 0; i < argsStr.length; i++) {
const char = argsStr[i];
if (!inString) {
if (char === '"' || char === "'") {
inString = true;
stringChar = char;
}
else if (char === '(') {
parenDepth++;
}
else if (char === ')') {
parenDepth--;
}
else if (char === ',' && parenDepth === 0) {
args.push(currentArg.trim());
currentArg = '';
continue;
}
}
else {
if (char === stringChar && (i === 0 || argsStr[i - 1] !== '\\')) {
inString = false;
stringChar = '';
}
}
currentArg += char;
}
if (currentArg.trim()) {
args.push(currentArg.trim());
}
return args;
}
/**
* Parse parameter list
*/
parseParameters(paramsStr) {
if (!paramsStr.trim()) {
return [];
}
return paramsStr.split(',').map(param => {
const trimmed = param.trim();
// With type annotation: "param: type"
const typeMatch = trimmed.match(/^(\w+)\s*:\s*(.+)$/);
if (typeMatch) {
const [, paramName, paramType] = typeMatch;
return {
arg: paramName,
annotation: { type: 'Name', id: paramType.trim() }
};
}
// Without type annotation
return {
arg: trimmed,
annotation: null
};
});
}
/**
* Parse return statement
*/
parseReturnStatement(line, lineNumber) {
const match = line.match(/^return\s*(.*)$/);
const value = match ? match[1].trim() : '';
return {
type: 'Return',
value: value ? this.parseExpression(value) : null,
lineno: lineNumber
};
}
/**
* Parse class definition
*/
parseClassDef(line, lineNumber) {
const match = line.match(/^class\s+(\w+)(?:\s*\(([^)]*)\))?\s*:/);
if (!match) {
this.addError(`Invalid class definition: ${line}`, 'syntax_error');
return {
type: 'Unknown',
lineno: lineNumber
};
}
const [, className, baseClasses] = match;
const bases = baseClasses ? baseClasses.split(',').map(base => ({
type: 'Name',
id: base.trim()
})) : [];
return {
type: 'ClassDef',
name: className,
bases,
body: [],
lineno: lineNumber
};
}
/**
* Register class definition to context
*/
registerClassDefinition(node) {
const className = node.name;
// Extract attributes from __init__ method
const constructor = node.body.find((item) => item.type === 'FunctionDef' && item.name === '__init__');
const attributes = [];
if (constructor) {
// Get attribute names from constructor parameters
if (constructor.args && constructor.args.args) {
constructor.args.args.forEach((arg) => {
if (arg.arg !== 'self') {
attributes.push(arg.arg);
}
});
}
}
// Extract inheritance information
const bases = [];
if (node.bases && node.bases.length > 0) {
node.bases.forEach((base) => {
if (base.type === 'Name') {
bases.push(base.id);
}
});
}
// Register to context
if (!this.context.classDefinitions) {
this.context.classDefinitions = {};
}
this.context.classDefinitions[className] = {
attribut