@bcoders.gr/evm-disassembler
Version:
A comprehensive EVM bytecode disassembler and analyzer with support for multiple EVM versions
579 lines (494 loc) • 18.1 kB
JavaScript
/**
* Core bytecode decoder functionality
* @module decoder
*/
const { InvalidBytecodeError } = require('./errors');
const { getOpcodesForVersion } = require('./opcodes');
const { CONSTANTS } = require('./constants');
/**
* Decode EVM bytecode into instructions
* @param {string} bytecode - Hex string of bytecode
* @param {Object} options - Decoding options
* @returns {Array} Array of decoded instructions
*/
function decodeBytecode(bytecode, options = {}) {
const {
evmVersion = 'latest',
includeMetadata = true,
stopAtMetadata = true
} = options;
// Validate and normalize bytecode
bytecode = normalizeBytecode(bytecode);
// Get opcodes for specified EVM version
const opcodes = getOpcodesForVersion(evmVersion);
const instructions = [];
let pc = 0;
let metadataBoundary = null;
// Detect metadata boundary if needed
if (stopAtMetadata) {
const { detectMetadata } = require('./metadata');
const metadata = detectMetadata(bytecode);
if (metadata) {
metadataBoundary = metadata.startIndex;
}
}
while (pc < bytecode.length) {
// Stop at metadata boundary if requested
if (stopAtMetadata && metadataBoundary && pc >= metadataBoundary) {
break;
}
const instruction = decodeInstruction(bytecode, pc, opcodes);
instructions.push(instruction);
pc = instruction.nextPc;
}
return instructions;
}
/**
* Decode a single instruction at given position
* @param {string} bytecode - Hex string of bytecode
* @param {number} pc - Program counter position
* @param {Object} opcodes - Opcodes map
* @returns {Object} Decoded instruction
*/
function decodeInstruction(bytecode, pc, opcodes) {
const bytePos = pc / 2;
const opcodeByte = parseInt(bytecode.substr(pc, 2), 16);
if (isNaN(opcodeByte)) {
throw new InvalidBytecodeError(
`Invalid byte at position ${pc}`,
bytecode,
pc
);
}
const opcode = opcodes[opcodeByte] || 'INVALID';
const instruction = {
pc,
bytePos,
opcode,
opcodeHex: bytecode.substr(pc, 2),
raw: bytecode.substr(pc, 2)
};
// Handle PUSH instructions
if (opcode.startsWith('PUSH')) {
const pushBytes = parseInt(opcode.substring(4));
const dataStart = pc + 2;
const dataEnd = dataStart + (pushBytes * 2);
if (dataEnd > bytecode.length) {
// Truncated push data
instruction.pushData = bytecode.substring(dataStart);
instruction.truncated = true;
instruction.nextPc = bytecode.length;
} else {
instruction.pushData = bytecode.substring(dataStart, dataEnd);
instruction.nextPc = dataEnd;
}
instruction.raw = bytecode.substring(pc, instruction.nextPc);
// Add push value interpretation
if (instruction.pushData) {
instruction.pushValue = '0x' + instruction.pushData;
instruction.pushDecimal = BigInt('0x' + instruction.pushData).toString();
}
} else {
instruction.nextPc = pc + 2;
}
return instruction;
}
/**
* Normalize bytecode input
* @param {string} bytecode - Input bytecode
* @returns {string} Normalized bytecode
*/
function normalizeBytecode(bytecode) {
if (!bytecode || typeof bytecode !== 'string') {
throw new InvalidBytecodeError('Bytecode must be a non-empty string');
}
// Remove 0x prefix if present
if (bytecode.startsWith('0x') || bytecode.startsWith('0X')) {
bytecode = bytecode.substring(2);
}
// Remove whitespace and convert to lowercase
bytecode = bytecode.replace(/\s/g, '').toLowerCase();
// Validate hex string
if (!/^[0-9a-f]*$/.test(bytecode)) {
throw new InvalidBytecodeError('Bytecode must be a valid hex string');
}
// Ensure even length
if (bytecode.length % 2 !== 0) {
throw new InvalidBytecodeError('Bytecode must have even length');
}
// Check size limit
if (bytecode.length > CONSTANTS.MAX_BYTECODE_SIZE) {
throw new InvalidBytecodeError(
`Bytecode exceeds maximum size of ${CONSTANTS.MAX_BYTECODE_SIZE / 2} bytes`
);
}
return bytecode;
}
/**
* Quick validation of bytecode without full decoding
* @param {string} bytecode - Hex string of bytecode
* @returns {Object} Validation result
*/
function validateBytecode(bytecode) {
try {
bytecode = normalizeBytecode(bytecode);
const result = {
valid: true,
size: bytecode.length / 2,
hasMetadata: false,
errors: []
};
// Check for metadata
const { detectMetadata } = require('./metadata');
const metadata = detectMetadata(bytecode);
if (metadata) {
result.hasMetadata = true;
result.metadataInfo = metadata;
}
// Quick scan for invalid opcodes
for (let i = 0; i < bytecode.length; i += 2) {
const byte = parseInt(bytecode.substr(i, 2), 16);
// Skip PUSH data
if (byte >= 0x60 && byte <= 0x7f) {
const pushBytes = byte - 0x5f;
i += pushBytes * 2;
}
}
return result;
} catch (error) {
return {
valid: false,
error: error.message,
errors: [error]
};
}
}
/**
* Extract all PUSH values from bytecode
* @param {Array} instructions - Decoded instructions
* @returns {Array} Array of push values
*/
function extractPushValues(instructions) {
return instructions
.filter(inst => inst.opcode.startsWith('PUSH') && inst.pushData)
.map(inst => ({
pc: inst.pc,
opcode: inst.opcode,
value: inst.pushValue,
decimal: inst.pushDecimal,
data: inst.pushData,
bytes: inst.pushData.length / 2
}));
}
/**
* Find all jump destinations in bytecode
* @param {Array} instructions - Decoded instructions
* @returns {Array} Array of jump destinations
*/
function findJumpDestinations(instructions) {
const jumpDests = [];
for (const inst of instructions) {
if (inst.opcode === 'JUMPDEST') {
jumpDests.push({
pc: inst.pc,
bytePos: inst.bytePos
});
}
}
return jumpDests;
}
/**
* Extract potential addresses from PUSH20 instructions
* @param {Array} instructions - Decoded instructions
* @returns {Array} Array of potential addresses
*/
function extractAddresses(instructions) {
return instructions
.filter(inst => inst.opcode === 'PUSH20' && inst.pushData)
.map(inst => ({
pc: inst.pc,
address: '0x' + inst.pushData,
checksumAddress: toChecksumAddress('0x' + inst.pushData)
}))
.filter(item => item.address !== '0x' + '0'.repeat(40)); // Filter out zero address
}
/**
* Convert address to checksum format (EIP-55)
* @param {string} address - Ethereum address
* @returns {string} Checksum address
*/
function toChecksumAddress(address) {
// Simple checksum implementation (would need keccak256 for full implementation)
// This is a placeholder - in production, use a proper implementation
return address;
}
/**
* ERC20 Extractor - Node.js Module
* Extracts functions, variables, and mappings from Solidity ERC20 contracts
* Returns: { functions: [], variables: [], mappings: [] }
*/
function isERC20Contract(sourceCode) {
/**
* Check if source code contains ERC20 token implementation
*/
const erc20Functions = [
'transfer(',
'approve(',
'transferFrom(',
'balanceOf(',
'totalSupply()'
];
const sourceLower = sourceCode.toLowerCase();
// Check if at least 4 out of 5 required functions are present
let foundFunctions = 0;
for (const func of erc20Functions) {
if (sourceLower.includes('function ' + func.toLowerCase())) {
foundFunctions++;
}
}
return foundFunctions >= 4; // Allow for 80% match
}
function extractFunctions(sourceCode) {
/**
* Extract all functions with their parameters and visibility
*/
const functions = [];
// Enhanced pattern to match function declarations
const functionPattern = /function\s+(\w+)\s*\(([^)]*)\)\s*(public|private|internal|external)?\s*(view|pure|payable)?\s*(override)?\s*(virtual)?\s*(returns\s*\([^)]*\))?\s*[{;]/gm;
let match;
while ((match = functionPattern.exec(sourceCode)) !== null) {
const funcName = match[1];
const params = match[2] ? match[2].trim() : '';
const visibility = match[3] || 'internal'; // default visibility
const stateMutability = match[4] || '';
const override = match[5] || '';
const virtual = match[6] || '';
const returns = match[7] || '';
// Clean up parameters
const cleanParams = params.replace(/\s+/g, ' ');
// Parse parameters
const paramList = [];
if (cleanParams) {
cleanParams.split(',').forEach(param => {
const trimmedParam = param.trim();
if (trimmedParam) {
paramList.push(trimmedParam);
}
});
}
functions.push({
name: funcName,
parameters: paramList,
visibility: visibility,
state_mutability: stateMutability,
override: Boolean(override),
virtual: Boolean(virtual),
returns: returns ? returns.replace('returns', '').trim() : '',
full_signature: `${funcName}(${cleanParams})`
});
}
return functions;
}
function extractVariables(sourceCode) {
/**
* Extract state variables
*/
const variables = [];
// Pattern to match state variable declarations
const variablePatterns = [
/(uint256|uint|int256|int|address|bool|string|bytes32|bytes)\s+(public|private|internal)?\s+(\w+)(?:\s*=\s*[^;]+)?;/g,
/(address\s+payable)\s+(public|private|internal)?\s+(\w+)(?:\s*=\s*[^;]+)?;/g
];
const lines = sourceCode.split('\n');
let inContract = false;
let contractDepth = 0;
for (const line of lines) {
const trimmedLine = line.trim();
// Skip comments and empty lines
if (trimmedLine.startsWith('//') || trimmedLine.startsWith('/*') || !trimmedLine) {
continue;
}
// Track contract boundaries
if (trimmedLine.includes('contract ') && trimmedLine.includes('{')) {
inContract = true;
contractDepth = 1;
continue;
} else if (inContract) {
contractDepth += (trimmedLine.match(/{/g) || []).length - (trimmedLine.match(/}/g) || []).length;
if (contractDepth <= 0) {
inContract = false;
continue;
}
}
if (!inContract) {
continue;
}
// Skip function definitions, modifiers, events, etc.
const skipKeywords = ['function ', 'modifier ', 'event ', 'constructor', 'receive(', 'fallback('];
if (skipKeywords.some(keyword => trimmedLine.includes(keyword))) {
continue;
}
// Extract variables
for (const pattern of variablePatterns) {
pattern.lastIndex = 0; // Reset regex
let match;
while ((match = pattern.exec(trimmedLine)) !== null) {
const varType = match[1].trim().replace(/\s+/g, ' ');
const visibility = match[2] || 'internal'; // default visibility
const varName = match[3].trim();
variables.push({
name: varName,
type: varType,
visibility: visibility,
declaration: trimmedLine
});
}
}
}
return variables;
}
function extractMappings(sourceCode) {
/**
* Extract mapping declarations
*/
const mappings = [];
// Pattern to match mapping declarations
const mappingPattern = /mapping\s*\(([^)]+)\)\s+(public|private|internal)?\s+(\w+)/g;
const lines = sourceCode.split('\n');
let inContract = false;
let contractDepth = 0;
for (const line of lines) {
const trimmedLine = line.trim();
// Skip comments and empty lines
if (trimmedLine.startsWith('//') || trimmedLine.startsWith('/*') || !trimmedLine) {
continue;
}
// Track contract boundaries
if (trimmedLine.includes('contract ') && trimmedLine.includes('{')) {
inContract = true;
contractDepth = 1;
continue;
} else if (inContract) {
contractDepth += (trimmedLine.match(/{/g) || []).length - (trimmedLine.match(/}/g) || []).length;
if (contractDepth <= 0) {
inContract = false;
continue;
}
}
if (!inContract) {
continue;
}
// Skip function definitions
if (trimmedLine.includes('function ')) {
continue;
}
// Extract mappings
mappingPattern.lastIndex = 0; // Reset regex
let match;
while ((match = mappingPattern.exec(trimmedLine)) !== null) {
const mappingType = match[1].trim();
const visibility = match[2] || 'internal'; // default visibility
const mappingName = match[3].trim();
// Parse mapping type (key => value)
const keyValue = mappingType.split('=>');
const keyType = keyValue.length > 0 ? keyValue[0].trim() : '';
const valueType = keyValue.length > 1 ? keyValue[1].trim() : '';
mappings.push({
name: mappingName,
key_type: keyType,
value_type: valueType,
visibility: visibility,
full_type: `mapping(${mappingType})`,
declaration: trimmedLine
});
}
}
return mappings;
}
function extractContractName(sourceCode) {
/**
* Extract the main ERC20 contract name
*/
// Skip common base contracts
const skipContracts = new Set(['Context', 'Ownable', 'IERC20', 'SafeMath', 'ERC20']);
// Look for contract declarations
const contractMatches = sourceCode.match(/contract\s+(\w+)/g);
if (contractMatches) {
for (const match of contractMatches) {
const contractName = match.replace('contract ', '').trim();
if (!skipContracts.has(contractName)) {
return contractName;
}
}
// If all contracts are in skip list, return the first one
return contractMatches[0].replace('contract ', '').trim();
}
return 'Unknown';
}
function extractERC20Data(sourceCode) {
/**
* Main function to extract ERC20 contract data
* @param {string} sourceCode - Solidity source code
* @returns {Object} - { functions: [], variables: [], mappings: [] }
*/
if (!sourceCode || typeof sourceCode !== 'string') {
throw new Error('Invalid source code provided');
}
// Check if it's an ERC20 contract
const isERC20 = isERC20Contract(sourceCode);
if (!isERC20) {
// Still extract data but mark as potentially non-ERC20
console.warn('Warning: Contract may not be a complete ERC20 implementation');
}
try {
// Extract contract components
const functions = extractFunctions(sourceCode);
const variables = extractVariables(sourceCode);
const mappings = extractMappings(sourceCode);
const contractName = extractContractName(sourceCode);
// Create extraction summary
const extractionSummary = {
contract_name: contractName,
total_functions: functions.length,
public_functions: functions.filter(f => f.visibility === 'public').length,
private_functions: functions.filter(f => f.visibility === 'private').length,
internal_functions: functions.filter(f => f.visibility === 'internal').length,
external_functions: functions.filter(f => f.visibility === 'external').length,
total_variables: variables.length,
public_variables: variables.filter(v => v.visibility === 'public').length,
private_variables: variables.filter(v => v.visibility === 'private').length,
total_mappings: mappings.length,
public_mappings: mappings.filter(m => m.visibility === 'public').length,
private_mappings: mappings.filter(m => m.visibility === 'private').length
};
return {
functions,
variables,
mappings,
extraction_summary: extractionSummary
};
} catch (error) {
return {
functions: [],
variables: [],
mappings: [],
error: `Extraction failed: ${error.message}`
};
}
}
module.exports = {
decodeBytecode,
decodeInstruction,
normalizeBytecode,
validateBytecode,
extractPushValues,
findJumpDestinations,
extractAddresses,
// ERC20 extraction functions
extractERC20Data,
isERC20Contract,
extractFunctions,
extractVariables,
extractMappings,
extractContractName
};