UNPKG

@bcoders.gr/evm-disassembler

Version:

A comprehensive EVM bytecode disassembler and analyzer with support for multiple EVM versions

579 lines (494 loc) 18.1 kB
/** * Core bytecode decoder functionality * @module decoder */ const { InvalidBytecodeError } = require('./errors'); const { getOpcodesForVersion } = require('./opcodes'); const { CONSTANTS } = require('./constants'); /** * Decode EVM bytecode into instructions * @param {string} bytecode - Hex string of bytecode * @param {Object} options - Decoding options * @returns {Array} Array of decoded instructions */ function decodeBytecode(bytecode, options = {}) { const { evmVersion = 'latest', includeMetadata = true, stopAtMetadata = true } = options; // Validate and normalize bytecode bytecode = normalizeBytecode(bytecode); // Get opcodes for specified EVM version const opcodes = getOpcodesForVersion(evmVersion); const instructions = []; let pc = 0; let metadataBoundary = null; // Detect metadata boundary if needed if (stopAtMetadata) { const { detectMetadata } = require('./metadata'); const metadata = detectMetadata(bytecode); if (metadata) { metadataBoundary = metadata.startIndex; } } while (pc < bytecode.length) { // Stop at metadata boundary if requested if (stopAtMetadata && metadataBoundary && pc >= metadataBoundary) { break; } const instruction = decodeInstruction(bytecode, pc, opcodes); instructions.push(instruction); pc = instruction.nextPc; } return instructions; } /** * Decode a single instruction at given position * @param {string} bytecode - Hex string of bytecode * @param {number} pc - Program counter position * @param {Object} opcodes - Opcodes map * @returns {Object} Decoded instruction */ function decodeInstruction(bytecode, pc, opcodes) { const bytePos = pc / 2; const opcodeByte = parseInt(bytecode.substr(pc, 2), 16); if (isNaN(opcodeByte)) { throw new InvalidBytecodeError( `Invalid byte at position ${pc}`, bytecode, pc ); } const opcode = opcodes[opcodeByte] || 'INVALID'; const instruction = { pc, bytePos, opcode, opcodeHex: bytecode.substr(pc, 2), raw: bytecode.substr(pc, 2) }; // Handle PUSH instructions if (opcode.startsWith('PUSH')) { const pushBytes = parseInt(opcode.substring(4)); const dataStart = pc + 2; const dataEnd = dataStart + (pushBytes * 2); if (dataEnd > bytecode.length) { // Truncated push data instruction.pushData = bytecode.substring(dataStart); instruction.truncated = true; instruction.nextPc = bytecode.length; } else { instruction.pushData = bytecode.substring(dataStart, dataEnd); instruction.nextPc = dataEnd; } instruction.raw = bytecode.substring(pc, instruction.nextPc); // Add push value interpretation if (instruction.pushData) { instruction.pushValue = '0x' + instruction.pushData; instruction.pushDecimal = BigInt('0x' + instruction.pushData).toString(); } } else { instruction.nextPc = pc + 2; } return instruction; } /** * Normalize bytecode input * @param {string} bytecode - Input bytecode * @returns {string} Normalized bytecode */ function normalizeBytecode(bytecode) { if (!bytecode || typeof bytecode !== 'string') { throw new InvalidBytecodeError('Bytecode must be a non-empty string'); } // Remove 0x prefix if present if (bytecode.startsWith('0x') || bytecode.startsWith('0X')) { bytecode = bytecode.substring(2); } // Remove whitespace and convert to lowercase bytecode = bytecode.replace(/\s/g, '').toLowerCase(); // Validate hex string if (!/^[0-9a-f]*$/.test(bytecode)) { throw new InvalidBytecodeError('Bytecode must be a valid hex string'); } // Ensure even length if (bytecode.length % 2 !== 0) { throw new InvalidBytecodeError('Bytecode must have even length'); } // Check size limit if (bytecode.length > CONSTANTS.MAX_BYTECODE_SIZE) { throw new InvalidBytecodeError( `Bytecode exceeds maximum size of ${CONSTANTS.MAX_BYTECODE_SIZE / 2} bytes` ); } return bytecode; } /** * Quick validation of bytecode without full decoding * @param {string} bytecode - Hex string of bytecode * @returns {Object} Validation result */ function validateBytecode(bytecode) { try { bytecode = normalizeBytecode(bytecode); const result = { valid: true, size: bytecode.length / 2, hasMetadata: false, errors: [] }; // Check for metadata const { detectMetadata } = require('./metadata'); const metadata = detectMetadata(bytecode); if (metadata) { result.hasMetadata = true; result.metadataInfo = metadata; } // Quick scan for invalid opcodes for (let i = 0; i < bytecode.length; i += 2) { const byte = parseInt(bytecode.substr(i, 2), 16); // Skip PUSH data if (byte >= 0x60 && byte <= 0x7f) { const pushBytes = byte - 0x5f; i += pushBytes * 2; } } return result; } catch (error) { return { valid: false, error: error.message, errors: [error] }; } } /** * Extract all PUSH values from bytecode * @param {Array} instructions - Decoded instructions * @returns {Array} Array of push values */ function extractPushValues(instructions) { return instructions .filter(inst => inst.opcode.startsWith('PUSH') && inst.pushData) .map(inst => ({ pc: inst.pc, opcode: inst.opcode, value: inst.pushValue, decimal: inst.pushDecimal, data: inst.pushData, bytes: inst.pushData.length / 2 })); } /** * Find all jump destinations in bytecode * @param {Array} instructions - Decoded instructions * @returns {Array} Array of jump destinations */ function findJumpDestinations(instructions) { const jumpDests = []; for (const inst of instructions) { if (inst.opcode === 'JUMPDEST') { jumpDests.push({ pc: inst.pc, bytePos: inst.bytePos }); } } return jumpDests; } /** * Extract potential addresses from PUSH20 instructions * @param {Array} instructions - Decoded instructions * @returns {Array} Array of potential addresses */ function extractAddresses(instructions) { return instructions .filter(inst => inst.opcode === 'PUSH20' && inst.pushData) .map(inst => ({ pc: inst.pc, address: '0x' + inst.pushData, checksumAddress: toChecksumAddress('0x' + inst.pushData) })) .filter(item => item.address !== '0x' + '0'.repeat(40)); // Filter out zero address } /** * Convert address to checksum format (EIP-55) * @param {string} address - Ethereum address * @returns {string} Checksum address */ function toChecksumAddress(address) { // Simple checksum implementation (would need keccak256 for full implementation) // This is a placeholder - in production, use a proper implementation return address; } /** * ERC20 Extractor - Node.js Module * Extracts functions, variables, and mappings from Solidity ERC20 contracts * Returns: { functions: [], variables: [], mappings: [] } */ function isERC20Contract(sourceCode) { /** * Check if source code contains ERC20 token implementation */ const erc20Functions = [ 'transfer(', 'approve(', 'transferFrom(', 'balanceOf(', 'totalSupply()' ]; const sourceLower = sourceCode.toLowerCase(); // Check if at least 4 out of 5 required functions are present let foundFunctions = 0; for (const func of erc20Functions) { if (sourceLower.includes('function ' + func.toLowerCase())) { foundFunctions++; } } return foundFunctions >= 4; // Allow for 80% match } function extractFunctions(sourceCode) { /** * Extract all functions with their parameters and visibility */ const functions = []; // Enhanced pattern to match function declarations const functionPattern = /function\s+(\w+)\s*\(([^)]*)\)\s*(public|private|internal|external)?\s*(view|pure|payable)?\s*(override)?\s*(virtual)?\s*(returns\s*\([^)]*\))?\s*[{;]/gm; let match; while ((match = functionPattern.exec(sourceCode)) !== null) { const funcName = match[1]; const params = match[2] ? match[2].trim() : ''; const visibility = match[3] || 'internal'; // default visibility const stateMutability = match[4] || ''; const override = match[5] || ''; const virtual = match[6] || ''; const returns = match[7] || ''; // Clean up parameters const cleanParams = params.replace(/\s+/g, ' '); // Parse parameters const paramList = []; if (cleanParams) { cleanParams.split(',').forEach(param => { const trimmedParam = param.trim(); if (trimmedParam) { paramList.push(trimmedParam); } }); } functions.push({ name: funcName, parameters: paramList, visibility: visibility, state_mutability: stateMutability, override: Boolean(override), virtual: Boolean(virtual), returns: returns ? returns.replace('returns', '').trim() : '', full_signature: `${funcName}(${cleanParams})` }); } return functions; } function extractVariables(sourceCode) { /** * Extract state variables */ const variables = []; // Pattern to match state variable declarations const variablePatterns = [ /(uint256|uint|int256|int|address|bool|string|bytes32|bytes)\s+(public|private|internal)?\s+(\w+)(?:\s*=\s*[^;]+)?;/g, /(address\s+payable)\s+(public|private|internal)?\s+(\w+)(?:\s*=\s*[^;]+)?;/g ]; const lines = sourceCode.split('\n'); let inContract = false; let contractDepth = 0; for (const line of lines) { const trimmedLine = line.trim(); // Skip comments and empty lines if (trimmedLine.startsWith('//') || trimmedLine.startsWith('/*') || !trimmedLine) { continue; } // Track contract boundaries if (trimmedLine.includes('contract ') && trimmedLine.includes('{')) { inContract = true; contractDepth = 1; continue; } else if (inContract) { contractDepth += (trimmedLine.match(/{/g) || []).length - (trimmedLine.match(/}/g) || []).length; if (contractDepth <= 0) { inContract = false; continue; } } if (!inContract) { continue; } // Skip function definitions, modifiers, events, etc. const skipKeywords = ['function ', 'modifier ', 'event ', 'constructor', 'receive(', 'fallback(']; if (skipKeywords.some(keyword => trimmedLine.includes(keyword))) { continue; } // Extract variables for (const pattern of variablePatterns) { pattern.lastIndex = 0; // Reset regex let match; while ((match = pattern.exec(trimmedLine)) !== null) { const varType = match[1].trim().replace(/\s+/g, ' '); const visibility = match[2] || 'internal'; // default visibility const varName = match[3].trim(); variables.push({ name: varName, type: varType, visibility: visibility, declaration: trimmedLine }); } } } return variables; } function extractMappings(sourceCode) { /** * Extract mapping declarations */ const mappings = []; // Pattern to match mapping declarations const mappingPattern = /mapping\s*\(([^)]+)\)\s+(public|private|internal)?\s+(\w+)/g; const lines = sourceCode.split('\n'); let inContract = false; let contractDepth = 0; for (const line of lines) { const trimmedLine = line.trim(); // Skip comments and empty lines if (trimmedLine.startsWith('//') || trimmedLine.startsWith('/*') || !trimmedLine) { continue; } // Track contract boundaries if (trimmedLine.includes('contract ') && trimmedLine.includes('{')) { inContract = true; contractDepth = 1; continue; } else if (inContract) { contractDepth += (trimmedLine.match(/{/g) || []).length - (trimmedLine.match(/}/g) || []).length; if (contractDepth <= 0) { inContract = false; continue; } } if (!inContract) { continue; } // Skip function definitions if (trimmedLine.includes('function ')) { continue; } // Extract mappings mappingPattern.lastIndex = 0; // Reset regex let match; while ((match = mappingPattern.exec(trimmedLine)) !== null) { const mappingType = match[1].trim(); const visibility = match[2] || 'internal'; // default visibility const mappingName = match[3].trim(); // Parse mapping type (key => value) const keyValue = mappingType.split('=>'); const keyType = keyValue.length > 0 ? keyValue[0].trim() : ''; const valueType = keyValue.length > 1 ? keyValue[1].trim() : ''; mappings.push({ name: mappingName, key_type: keyType, value_type: valueType, visibility: visibility, full_type: `mapping(${mappingType})`, declaration: trimmedLine }); } } return mappings; } function extractContractName(sourceCode) { /** * Extract the main ERC20 contract name */ // Skip common base contracts const skipContracts = new Set(['Context', 'Ownable', 'IERC20', 'SafeMath', 'ERC20']); // Look for contract declarations const contractMatches = sourceCode.match(/contract\s+(\w+)/g); if (contractMatches) { for (const match of contractMatches) { const contractName = match.replace('contract ', '').trim(); if (!skipContracts.has(contractName)) { return contractName; } } // If all contracts are in skip list, return the first one return contractMatches[0].replace('contract ', '').trim(); } return 'Unknown'; } function extractERC20Data(sourceCode) { /** * Main function to extract ERC20 contract data * @param {string} sourceCode - Solidity source code * @returns {Object} - { functions: [], variables: [], mappings: [] } */ if (!sourceCode || typeof sourceCode !== 'string') { throw new Error('Invalid source code provided'); } // Check if it's an ERC20 contract const isERC20 = isERC20Contract(sourceCode); if (!isERC20) { // Still extract data but mark as potentially non-ERC20 console.warn('Warning: Contract may not be a complete ERC20 implementation'); } try { // Extract contract components const functions = extractFunctions(sourceCode); const variables = extractVariables(sourceCode); const mappings = extractMappings(sourceCode); const contractName = extractContractName(sourceCode); // Create extraction summary const extractionSummary = { contract_name: contractName, total_functions: functions.length, public_functions: functions.filter(f => f.visibility === 'public').length, private_functions: functions.filter(f => f.visibility === 'private').length, internal_functions: functions.filter(f => f.visibility === 'internal').length, external_functions: functions.filter(f => f.visibility === 'external').length, total_variables: variables.length, public_variables: variables.filter(v => v.visibility === 'public').length, private_variables: variables.filter(v => v.visibility === 'private').length, total_mappings: mappings.length, public_mappings: mappings.filter(m => m.visibility === 'public').length, private_mappings: mappings.filter(m => m.visibility === 'private').length }; return { functions, variables, mappings, extraction_summary: extractionSummary }; } catch (error) { return { functions: [], variables: [], mappings: [], error: `Extraction failed: ${error.message}` }; } } module.exports = { decodeBytecode, decodeInstruction, normalizeBytecode, validateBytecode, extractPushValues, findJumpDestinations, extractAddresses, // ERC20 extraction functions extractERC20Data, isERC20Contract, extractFunctions, extractVariables, extractMappings, extractContractName };