@bcoders.gr/evm-disassembler
Version:
A comprehensive EVM bytecode disassembler and analyzer with support for multiple EVM versions
593 lines (522 loc) • 20.3 kB
JavaScript
/**
* Function signature detection and analysis for EVM bytecode
* @module function-detector
*/
const crypto = require('crypto');
const { KNOWN_SIGNATURES } = require('./constants');
/**
* Detect function signatures in bytecode
* @param {Array} instructions - Array of decoded instructions
* @returns {Object} Function detection results
*/
function detectFunctions(instructions) {
const functions = [];
const selectors = new Set();
const dispatcherInfo = findDispatcher(instructions);
// Look for function selector patterns
for (let i = 0; i < instructions.length - 3; i++) {
const pattern = detectFunctionPattern(instructions, i);
if (pattern) {
const selector = pattern.selector;
if (!selectors.has(selector)) {
selectors.add(selector);
// Look up known signature
const knownSig = KNOWN_SIGNATURES.get(selector);
// Extract function body pattern
const bodyPattern = extractFunctionBodyPattern(instructions, pattern.jumpDest);
functions.push({
selector,
signature: knownSig || 'unknown',
pc: pattern.pc,
jumpDest: pattern.jumpDest,
isKnown: !!knownSig,
pattern: pattern.type,
opcodePattern: bodyPattern.opcodes,
patternHash: bodyPattern.hash,
instructionCount: bodyPattern.instructionCount,
stackOperations: bodyPattern.stackOps,
storageOperations: bodyPattern.storageOps,
memoryOperations: bodyPattern.memoryOps,
controlFlow: bodyPattern.controlFlow
});
}
}
}
// Sort functions by selector for consistent output
functions.sort((a, b) => a.selector.localeCompare(b.selector));
return {
functions,
totalFunctions: functions.length,
knownFunctions: functions.filter(f => f.isKnown).length,
unknownFunctions: functions.filter(f => !f.isKnown).length,
dispatcher: dispatcherInfo
};
}
/**
* Detect function selector pattern at given position
* @param {Array} instructions - Array of decoded instructions
* @param {number} index - Starting index
* @returns {Object|null} Pattern info or null
*/
function detectFunctionPattern(instructions, index) {
const inst = instructions[index];
// Pattern 1: PUSH4 selector, DUP1, PUSH4 value, EQ, PUSH2/PUSH1 dest, JUMPI
if (inst.opcode === 'PUSH4' && inst.pushData) {
const selector = inst.pushData.substring(0, 8);
// Look ahead for EQ and JUMPI pattern
for (let j = index + 1; j < Math.min(index + 10, instructions.length); j++) {
if (instructions[j].opcode === 'EQ') {
// Found EQ, now look for JUMPI
for (let k = j + 1; k < Math.min(j + 5, instructions.length); k++) {
if (instructions[k].opcode === 'JUMPI') {
// Found complete pattern, get jump destination
const pushInst = instructions[k - 1];
if (pushInst && pushInst.opcode.startsWith('PUSH')) {
return {
type: 'standard',
selector,
pc: inst.pc,
jumpDest: parseInt(pushInst.pushData, 16)
};
}
}
}
}
}
}
// Pattern 2: DUP1, PUSH4 selector, EQ, PUSH2/PUSH1 dest, JUMPI
if (inst.opcode === 'DUP1' && index + 1 < instructions.length) {
const nextInst = instructions[index + 1];
if (nextInst.opcode === 'PUSH4' && nextInst.pushData) {
const selector = nextInst.pushData.substring(0, 8);
// Look for EQ and JUMPI
for (let j = index + 2; j < Math.min(index + 8, instructions.length); j++) {
if (instructions[j].opcode === 'EQ') {
for (let k = j + 1; k < Math.min(j + 5, instructions.length); k++) {
if (instructions[k].opcode === 'JUMPI') {
const pushInst = instructions[k - 1];
if (pushInst && pushInst.opcode.startsWith('PUSH')) {
return {
type: 'dup-first',
selector,
pc: nextInst.pc,
jumpDest: parseInt(pushInst.pushData, 16)
};
}
}
}
}
}
}
}
return null;
}
/**
* Find the function dispatcher pattern
* @param {Array} instructions - Array of decoded instructions
* @returns {Object|null} Dispatcher info or null
*/
function findDispatcher(instructions) {
let dispatcherStart = null;
let dispatcherEnd = null;
let selectorCount = 0;
// Look for concentrated selector comparisons
for (let i = 0; i < instructions.length - 10; i++) {
let localSelectorCount = 0;
let hasCallDataLoad = false;
// Check a window of 20 instructions
for (let j = i; j < Math.min(i + 20, instructions.length); j++) {
const inst = instructions[j];
if (inst.opcode === 'CALLDATALOAD') {
hasCallDataLoad = true;
}
if (inst.opcode === 'PUSH4' && inst.pushData && inst.pushData.length >= 8) {
// Check if this looks like a function selector
const value = inst.pushData.substring(0, 8);
if (value !== '00000000' && value !== 'ffffffff') {
localSelectorCount++;
}
}
}
// If we found multiple selectors and CALLDATALOAD, likely dispatcher
if (localSelectorCount >= 2 && hasCallDataLoad) {
if (dispatcherStart === null) {
dispatcherStart = instructions[i].pc;
}
dispatcherEnd = instructions[Math.min(i + 20, instructions.length - 1)].pc;
selectorCount = Math.max(selectorCount, localSelectorCount);
}
}
if (dispatcherStart !== null) {
return {
start: dispatcherStart,
end: dispatcherEnd,
estimatedFunctions: selectorCount
};
}
return null;
}
/**
* Analyze function entry points and their characteristics
* @param {Array} instructions - Array of decoded instructions
* @param {Array} functions - Detected functions
* @returns {Object} Function analysis results
*/
function analyzeFunctionBodies(instructions, functions) {
const functionBodies = [];
for (const func of functions) {
if (func.jumpDest === undefined) continue;
// Find the instruction at the jump destination
const startIdx = instructions.findIndex(inst => inst.pc === func.jumpDest);
if (startIdx === -1) continue;
// Analyze the function body
const body = analyzeFunctionBody(instructions, startIdx);
functionBodies.push({
...func,
...body
});
}
return {
functionBodies,
averageInstructionCount: functionBodies.length > 0
? Math.round(functionBodies.reduce((sum, f) => sum + f.instructionCount, 0) / functionBodies.length)
: 0,
functionsWithStorage: functionBodies.filter(f => f.usesStorage).length,
functionsWithExternalCalls: functionBodies.filter(f => f.hasExternalCalls).length,
functionsWithEvents: functionBodies.filter(f => f.emitsEvents).length
};
}
/**
* Analyze a single function body
* @param {Array} instructions - Array of decoded instructions
* @param {number} startIdx - Starting index of function
* @returns {Object} Function body analysis
*/
function analyzeFunctionBody(instructions, startIdx) {
let instructionCount = 0;
let usesStorage = false;
let hasExternalCalls = false;
let emitsEvents = false;
let hasReturn = false;
let hasRevert = false;
const storageSlots = new Set();
const externalCalls = [];
const events = [];
// Analyze until we hit another JUMPDEST or end
for (let i = startIdx; i < instructions.length; i++) {
const inst = instructions[i];
instructionCount++;
// Check for storage operations
if (inst.opcode === 'SLOAD' || inst.opcode === 'SSTORE') {
usesStorage = true;
// Try to get storage slot if it's a direct push
if (i > 0 && instructions[i - 1].opcode.startsWith('PUSH')) {
const slot = instructions[i - 1].pushData;
if (slot) {
storageSlots.add(slot);
}
}
}
// Check for external calls
if (inst.opcode === 'CALL' || inst.opcode === 'DELEGATECALL' ||
inst.opcode === 'STATICCALL' || inst.opcode === 'CALLCODE') {
hasExternalCalls = true;
externalCalls.push({
pc: inst.pc,
type: inst.opcode
});
}
// Check for events
if (inst.opcode.startsWith('LOG')) {
emitsEvents = true;
events.push({
pc: inst.pc,
type: inst.opcode,
topics: parseInt(inst.opcode.substring(3))
});
}
// Check for return/revert
if (inst.opcode === 'RETURN') hasReturn = true;
if (inst.opcode === 'REVERT') hasRevert = true;
// Stop at next function or terminating instruction
if (i > startIdx && inst.opcode === 'JUMPDEST') break;
if (inst.opcode === 'STOP' || inst.opcode === 'RETURN' ||
inst.opcode === 'REVERT' || inst.opcode === 'SELFDESTRUCT') break;
}
return {
instructionCount,
usesStorage,
hasExternalCalls,
emitsEvents,
hasReturn,
hasRevert,
storageSlots: Array.from(storageSlots),
externalCalls,
events
};
}
/**
* Extract opcode pattern from function body for comparison
* @param {Array} instructions - Array of decoded instructions
* @param {number} jumpDest - Jump destination PC
* @returns {Object} Pattern information
*/
function extractFunctionBodyPattern(instructions, jumpDest) {
if (jumpDest === undefined) {
return {
opcodes: [],
hash: '',
instructionCount: 0,
stackOps: { pushes: 0, pops: 0, dups: 0, swaps: 0 },
storageOps: { loads: 0, stores: 0 },
memoryOps: { loads: 0, stores: 0 },
controlFlow: { jumps: 0, calls: 0, returns: 0 }
};
}
// Find the instruction at the jump destination
const startIdx = instructions.findIndex(inst => inst.pc === jumpDest);
if (startIdx === -1) {
return {
opcodes: [],
hash: '',
instructionCount: 0,
stackOps: { pushes: 0, pops: 0, dups: 0, swaps: 0 },
storageOps: { loads: 0, stores: 0 },
memoryOps: { loads: 0, stores: 0 },
controlFlow: { jumps: 0, calls: 0, returns: 0 }
};
}
const opcodes = [];
const stackOps = { pushes: 0, pops: 0, dups: 0, swaps: 0 };
const storageOps = { loads: 0, stores: 0 };
const memoryOps = { loads: 0, stores: 0 };
const controlFlow = { jumps: 0, calls: 0, returns: 0 };
let instructionCount = 0;
// Extract opcodes until we hit another JUMPDEST or terminating instruction
for (let i = startIdx; i < instructions.length; i++) {
const inst = instructions[i];
instructionCount++;
// Add opcode to pattern (normalize PUSH operations)
if (inst.opcode.startsWith('PUSH')) {
opcodes.push('PUSH');
stackOps.pushes++;
} else {
opcodes.push(inst.opcode);
}
// Count different operation types
if (inst.opcode.startsWith('DUP')) {
stackOps.dups++;
} else if (inst.opcode.startsWith('SWAP')) {
stackOps.swaps++;
} else if (['POP', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD', 'EXP', 'AND', 'OR', 'XOR', 'NOT', 'LT', 'GT', 'SLT', 'SGT', 'EQ', 'ISZERO'].includes(inst.opcode)) {
stackOps.pops++;
} else if (inst.opcode === 'SLOAD') {
storageOps.loads++;
} else if (inst.opcode === 'SSTORE') {
storageOps.stores++;
} else if (['MLOAD', 'MLOAD8'].includes(inst.opcode)) {
memoryOps.loads++;
} else if (['MSTORE', 'MSTORE8'].includes(inst.opcode)) {
memoryOps.stores++;
} else if (['JUMP', 'JUMPI'].includes(inst.opcode)) {
controlFlow.jumps++;
} else if (['CALL', 'CALLCODE', 'DELEGATECALL', 'STATICCALL'].includes(inst.opcode)) {
controlFlow.calls++;
} else if (['RETURN', 'REVERT', 'STOP'].includes(inst.opcode)) {
controlFlow.returns++;
}
// Stop at next function or terminating instruction
if (i > startIdx && inst.opcode === 'JUMPDEST') break;
if (inst.opcode === 'STOP' || inst.opcode === 'RETURN' ||
inst.opcode === 'REVERT' || inst.opcode === 'SELFDESTRUCT') break;
}
// Create a hash of the opcode pattern for quick comparison
const patternString = opcodes.join(',');
const hash = crypto.createHash('sha256').update(patternString).digest('hex').substring(0, 16);
return {
opcodes,
hash,
instructionCount,
stackOps,
storageOps,
memoryOps,
controlFlow,
patternString
};
}
/**
* Compare function patterns to find similar implementations
* @param {Array} functions - Array of functions with patterns
* @returns {Object} Comparison results
*/
function compareFunctionPatterns(functions) {
const comparisons = [];
const groups = {};
// Group functions by pattern hash for exact matches
functions.forEach(func => {
if (func.patternHash) {
if (!groups[func.patternHash]) {
groups[func.patternHash] = [];
}
groups[func.patternHash].push(func);
}
});
// Find exact pattern matches
const exactMatches = Object.values(groups).filter(group => group.length > 1);
// Calculate similarity scores for different functions
for (let i = 0; i < functions.length; i++) {
for (let j = i + 1; j < functions.length; j++) {
const func1 = functions[i];
const func2 = functions[j];
if (func1.patternHash !== func2.patternHash && func1.opcodePattern && func2.opcodePattern) {
const similarity = calculatePatternSimilarity(func1.opcodePattern, func2.opcodePattern);
if (similarity > 0.7) { // Only include high similarity matches
comparisons.push({
function1: {
selector: func1.selector,
signature: func1.signature
},
function2: {
selector: func2.selector,
signature: func2.signature
},
similarity,
similarityType: similarity > 0.9 ? 'very-high' : 'high'
});
}
}
}
}
return {
exactMatches: exactMatches.map(group => ({
patternHash: group[0].patternHash,
functions: group.map(f => ({
selector: f.selector,
signature: f.signature,
instructionCount: f.instructionCount
})),
count: group.length
})),
similarFunctions: comparisons,
totalComparisons: comparisons.length
};
}
/**
* Calculate similarity between two opcode patterns
* @param {Array} pattern1 - First opcode pattern
* @param {Array} pattern2 - Second opcode pattern
* @returns {number} Similarity score (0-1)
*/
function calculatePatternSimilarity(pattern1, pattern2) {
if (!pattern1 || !pattern2 || pattern1.length === 0 || pattern2.length === 0) {
return 0;
}
// Calculate Levenshtein distance normalized by longer sequence length
const distance = levenshteinDistance(pattern1, pattern2);
const maxLength = Math.max(pattern1.length, pattern2.length);
return 1 - (distance / maxLength);
}
/**
* Calculate Levenshtein distance between two arrays
* @param {Array} arr1 - First array
* @param {Array} arr2 - Second array
* @returns {number} Edit distance
*/
function levenshteinDistance(arr1, arr2) {
const matrix = [];
for (let i = 0; i <= arr2.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= arr1.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= arr2.length; i++) {
for (let j = 1; j <= arr1.length; j++) {
if (arr2[i - 1] === arr1[j - 1]) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j] + 1
);
}
}
}
return matrix[arr2.length][arr1.length];
}
/**
* Detect standard contract patterns (ERC20, ERC721, etc.)
* @param {Array} functions - Array of detected functions
* @returns {Object} Contract pattern detection results
*/
function detectContractPatterns(functions) {
const patterns = {
isERC20: false,
isERC721: false,
isOwnable: false,
isPausable: false,
detectedPatterns: []
};
// Check for ERC20 interface
const erc20Selectors = new Set([
'a9059cbb', // transfer
'095ea7b3', // approve
'23b872dd', // transferFrom
'70a08231', // balanceOf
'dd62ed3e', // allowance
'18160ddd' // totalSupply
]);
// Check for ERC721 interface
const erc721Selectors = new Set([
'6352211e', // ownerOf
'42842e0e', // safeTransferFrom
'b88d4fde', // safeTransferFrom
'23b872dd', // transferFrom
'a22cb465', // setApprovalForAll
'e985e9c5' // isApprovedForAll
]);
const foundSelectors = new Set(functions.map(f => f.selector));
// Check ERC20
let erc20Count = 0;
for (const selector of erc20Selectors) {
if (foundSelectors.has(selector)) erc20Count++;
}
if (erc20Count >= 4) {
patterns.isERC20 = true;
patterns.detectedPatterns.push('ERC20');
}
// Check ERC721
let erc721Count = 0;
for (const selector of erc721Selectors) {
if (foundSelectors.has(selector)) erc721Count++;
}
if (erc721Count >= 4) {
patterns.isERC721 = true;
patterns.detectedPatterns.push('ERC721');
}
// Check Ownable pattern
if (foundSelectors.has('8da5cb5b') || // owner()
foundSelectors.has('f2fde38b')) { // transferOwnership(address)
patterns.isOwnable = true;
patterns.detectedPatterns.push('Ownable');
}
// Check Pausable pattern
if (foundSelectors.has('5c975abb') || // paused()
foundSelectors.has('8456cb59')) { // pause()
patterns.isPausable = true;
patterns.detectedPatterns.push('Pausable');
}
return patterns;
}
module.exports = {
detectFunctions,
detectFunctionPattern,
findDispatcher,
analyzeFunctionBodies,
analyzeFunctionBody,
extractFunctionBodyPattern,
compareFunctionPatterns,
calculatePatternSimilarity,
detectContractPatterns
};