UNPKG

@bcoders.gr/evm-disassembler

Version:

A comprehensive EVM bytecode disassembler and analyzer with support for multiple EVM versions

593 lines (522 loc) 20.3 kB
/** * Function signature detection and analysis for EVM bytecode * @module function-detector */ const crypto = require('crypto'); const { KNOWN_SIGNATURES } = require('./constants'); /** * Detect function signatures in bytecode * @param {Array} instructions - Array of decoded instructions * @returns {Object} Function detection results */ function detectFunctions(instructions) { const functions = []; const selectors = new Set(); const dispatcherInfo = findDispatcher(instructions); // Look for function selector patterns for (let i = 0; i < instructions.length - 3; i++) { const pattern = detectFunctionPattern(instructions, i); if (pattern) { const selector = pattern.selector; if (!selectors.has(selector)) { selectors.add(selector); // Look up known signature const knownSig = KNOWN_SIGNATURES.get(selector); // Extract function body pattern const bodyPattern = extractFunctionBodyPattern(instructions, pattern.jumpDest); functions.push({ selector, signature: knownSig || 'unknown', pc: pattern.pc, jumpDest: pattern.jumpDest, isKnown: !!knownSig, pattern: pattern.type, opcodePattern: bodyPattern.opcodes, patternHash: bodyPattern.hash, instructionCount: bodyPattern.instructionCount, stackOperations: bodyPattern.stackOps, storageOperations: bodyPattern.storageOps, memoryOperations: bodyPattern.memoryOps, controlFlow: bodyPattern.controlFlow }); } } } // Sort functions by selector for consistent output functions.sort((a, b) => a.selector.localeCompare(b.selector)); return { functions, totalFunctions: functions.length, knownFunctions: functions.filter(f => f.isKnown).length, unknownFunctions: functions.filter(f => !f.isKnown).length, dispatcher: dispatcherInfo }; } /** * Detect function selector pattern at given position * @param {Array} instructions - Array of decoded instructions * @param {number} index - Starting index * @returns {Object|null} Pattern info or null */ function detectFunctionPattern(instructions, index) { const inst = instructions[index]; // Pattern 1: PUSH4 selector, DUP1, PUSH4 value, EQ, PUSH2/PUSH1 dest, JUMPI if (inst.opcode === 'PUSH4' && inst.pushData) { const selector = inst.pushData.substring(0, 8); // Look ahead for EQ and JUMPI pattern for (let j = index + 1; j < Math.min(index + 10, instructions.length); j++) { if (instructions[j].opcode === 'EQ') { // Found EQ, now look for JUMPI for (let k = j + 1; k < Math.min(j + 5, instructions.length); k++) { if (instructions[k].opcode === 'JUMPI') { // Found complete pattern, get jump destination const pushInst = instructions[k - 1]; if (pushInst && pushInst.opcode.startsWith('PUSH')) { return { type: 'standard', selector, pc: inst.pc, jumpDest: parseInt(pushInst.pushData, 16) }; } } } } } } // Pattern 2: DUP1, PUSH4 selector, EQ, PUSH2/PUSH1 dest, JUMPI if (inst.opcode === 'DUP1' && index + 1 < instructions.length) { const nextInst = instructions[index + 1]; if (nextInst.opcode === 'PUSH4' && nextInst.pushData) { const selector = nextInst.pushData.substring(0, 8); // Look for EQ and JUMPI for (let j = index + 2; j < Math.min(index + 8, instructions.length); j++) { if (instructions[j].opcode === 'EQ') { for (let k = j + 1; k < Math.min(j + 5, instructions.length); k++) { if (instructions[k].opcode === 'JUMPI') { const pushInst = instructions[k - 1]; if (pushInst && pushInst.opcode.startsWith('PUSH')) { return { type: 'dup-first', selector, pc: nextInst.pc, jumpDest: parseInt(pushInst.pushData, 16) }; } } } } } } } return null; } /** * Find the function dispatcher pattern * @param {Array} instructions - Array of decoded instructions * @returns {Object|null} Dispatcher info or null */ function findDispatcher(instructions) { let dispatcherStart = null; let dispatcherEnd = null; let selectorCount = 0; // Look for concentrated selector comparisons for (let i = 0; i < instructions.length - 10; i++) { let localSelectorCount = 0; let hasCallDataLoad = false; // Check a window of 20 instructions for (let j = i; j < Math.min(i + 20, instructions.length); j++) { const inst = instructions[j]; if (inst.opcode === 'CALLDATALOAD') { hasCallDataLoad = true; } if (inst.opcode === 'PUSH4' && inst.pushData && inst.pushData.length >= 8) { // Check if this looks like a function selector const value = inst.pushData.substring(0, 8); if (value !== '00000000' && value !== 'ffffffff') { localSelectorCount++; } } } // If we found multiple selectors and CALLDATALOAD, likely dispatcher if (localSelectorCount >= 2 && hasCallDataLoad) { if (dispatcherStart === null) { dispatcherStart = instructions[i].pc; } dispatcherEnd = instructions[Math.min(i + 20, instructions.length - 1)].pc; selectorCount = Math.max(selectorCount, localSelectorCount); } } if (dispatcherStart !== null) { return { start: dispatcherStart, end: dispatcherEnd, estimatedFunctions: selectorCount }; } return null; } /** * Analyze function entry points and their characteristics * @param {Array} instructions - Array of decoded instructions * @param {Array} functions - Detected functions * @returns {Object} Function analysis results */ function analyzeFunctionBodies(instructions, functions) { const functionBodies = []; for (const func of functions) { if (func.jumpDest === undefined) continue; // Find the instruction at the jump destination const startIdx = instructions.findIndex(inst => inst.pc === func.jumpDest); if (startIdx === -1) continue; // Analyze the function body const body = analyzeFunctionBody(instructions, startIdx); functionBodies.push({ ...func, ...body }); } return { functionBodies, averageInstructionCount: functionBodies.length > 0 ? Math.round(functionBodies.reduce((sum, f) => sum + f.instructionCount, 0) / functionBodies.length) : 0, functionsWithStorage: functionBodies.filter(f => f.usesStorage).length, functionsWithExternalCalls: functionBodies.filter(f => f.hasExternalCalls).length, functionsWithEvents: functionBodies.filter(f => f.emitsEvents).length }; } /** * Analyze a single function body * @param {Array} instructions - Array of decoded instructions * @param {number} startIdx - Starting index of function * @returns {Object} Function body analysis */ function analyzeFunctionBody(instructions, startIdx) { let instructionCount = 0; let usesStorage = false; let hasExternalCalls = false; let emitsEvents = false; let hasReturn = false; let hasRevert = false; const storageSlots = new Set(); const externalCalls = []; const events = []; // Analyze until we hit another JUMPDEST or end for (let i = startIdx; i < instructions.length; i++) { const inst = instructions[i]; instructionCount++; // Check for storage operations if (inst.opcode === 'SLOAD' || inst.opcode === 'SSTORE') { usesStorage = true; // Try to get storage slot if it's a direct push if (i > 0 && instructions[i - 1].opcode.startsWith('PUSH')) { const slot = instructions[i - 1].pushData; if (slot) { storageSlots.add(slot); } } } // Check for external calls if (inst.opcode === 'CALL' || inst.opcode === 'DELEGATECALL' || inst.opcode === 'STATICCALL' || inst.opcode === 'CALLCODE') { hasExternalCalls = true; externalCalls.push({ pc: inst.pc, type: inst.opcode }); } // Check for events if (inst.opcode.startsWith('LOG')) { emitsEvents = true; events.push({ pc: inst.pc, type: inst.opcode, topics: parseInt(inst.opcode.substring(3)) }); } // Check for return/revert if (inst.opcode === 'RETURN') hasReturn = true; if (inst.opcode === 'REVERT') hasRevert = true; // Stop at next function or terminating instruction if (i > startIdx && inst.opcode === 'JUMPDEST') break; if (inst.opcode === 'STOP' || inst.opcode === 'RETURN' || inst.opcode === 'REVERT' || inst.opcode === 'SELFDESTRUCT') break; } return { instructionCount, usesStorage, hasExternalCalls, emitsEvents, hasReturn, hasRevert, storageSlots: Array.from(storageSlots), externalCalls, events }; } /** * Extract opcode pattern from function body for comparison * @param {Array} instructions - Array of decoded instructions * @param {number} jumpDest - Jump destination PC * @returns {Object} Pattern information */ function extractFunctionBodyPattern(instructions, jumpDest) { if (jumpDest === undefined) { return { opcodes: [], hash: '', instructionCount: 0, stackOps: { pushes: 0, pops: 0, dups: 0, swaps: 0 }, storageOps: { loads: 0, stores: 0 }, memoryOps: { loads: 0, stores: 0 }, controlFlow: { jumps: 0, calls: 0, returns: 0 } }; } // Find the instruction at the jump destination const startIdx = instructions.findIndex(inst => inst.pc === jumpDest); if (startIdx === -1) { return { opcodes: [], hash: '', instructionCount: 0, stackOps: { pushes: 0, pops: 0, dups: 0, swaps: 0 }, storageOps: { loads: 0, stores: 0 }, memoryOps: { loads: 0, stores: 0 }, controlFlow: { jumps: 0, calls: 0, returns: 0 } }; } const opcodes = []; const stackOps = { pushes: 0, pops: 0, dups: 0, swaps: 0 }; const storageOps = { loads: 0, stores: 0 }; const memoryOps = { loads: 0, stores: 0 }; const controlFlow = { jumps: 0, calls: 0, returns: 0 }; let instructionCount = 0; // Extract opcodes until we hit another JUMPDEST or terminating instruction for (let i = startIdx; i < instructions.length; i++) { const inst = instructions[i]; instructionCount++; // Add opcode to pattern (normalize PUSH operations) if (inst.opcode.startsWith('PUSH')) { opcodes.push('PUSH'); stackOps.pushes++; } else { opcodes.push(inst.opcode); } // Count different operation types if (inst.opcode.startsWith('DUP')) { stackOps.dups++; } else if (inst.opcode.startsWith('SWAP')) { stackOps.swaps++; } else if (['POP', 'ADD', 'SUB', 'MUL', 'DIV', 'MOD', 'EXP', 'AND', 'OR', 'XOR', 'NOT', 'LT', 'GT', 'SLT', 'SGT', 'EQ', 'ISZERO'].includes(inst.opcode)) { stackOps.pops++; } else if (inst.opcode === 'SLOAD') { storageOps.loads++; } else if (inst.opcode === 'SSTORE') { storageOps.stores++; } else if (['MLOAD', 'MLOAD8'].includes(inst.opcode)) { memoryOps.loads++; } else if (['MSTORE', 'MSTORE8'].includes(inst.opcode)) { memoryOps.stores++; } else if (['JUMP', 'JUMPI'].includes(inst.opcode)) { controlFlow.jumps++; } else if (['CALL', 'CALLCODE', 'DELEGATECALL', 'STATICCALL'].includes(inst.opcode)) { controlFlow.calls++; } else if (['RETURN', 'REVERT', 'STOP'].includes(inst.opcode)) { controlFlow.returns++; } // Stop at next function or terminating instruction if (i > startIdx && inst.opcode === 'JUMPDEST') break; if (inst.opcode === 'STOP' || inst.opcode === 'RETURN' || inst.opcode === 'REVERT' || inst.opcode === 'SELFDESTRUCT') break; } // Create a hash of the opcode pattern for quick comparison const patternString = opcodes.join(','); const hash = crypto.createHash('sha256').update(patternString).digest('hex').substring(0, 16); return { opcodes, hash, instructionCount, stackOps, storageOps, memoryOps, controlFlow, patternString }; } /** * Compare function patterns to find similar implementations * @param {Array} functions - Array of functions with patterns * @returns {Object} Comparison results */ function compareFunctionPatterns(functions) { const comparisons = []; const groups = {}; // Group functions by pattern hash for exact matches functions.forEach(func => { if (func.patternHash) { if (!groups[func.patternHash]) { groups[func.patternHash] = []; } groups[func.patternHash].push(func); } }); // Find exact pattern matches const exactMatches = Object.values(groups).filter(group => group.length > 1); // Calculate similarity scores for different functions for (let i = 0; i < functions.length; i++) { for (let j = i + 1; j < functions.length; j++) { const func1 = functions[i]; const func2 = functions[j]; if (func1.patternHash !== func2.patternHash && func1.opcodePattern && func2.opcodePattern) { const similarity = calculatePatternSimilarity(func1.opcodePattern, func2.opcodePattern); if (similarity > 0.7) { // Only include high similarity matches comparisons.push({ function1: { selector: func1.selector, signature: func1.signature }, function2: { selector: func2.selector, signature: func2.signature }, similarity, similarityType: similarity > 0.9 ? 'very-high' : 'high' }); } } } } return { exactMatches: exactMatches.map(group => ({ patternHash: group[0].patternHash, functions: group.map(f => ({ selector: f.selector, signature: f.signature, instructionCount: f.instructionCount })), count: group.length })), similarFunctions: comparisons, totalComparisons: comparisons.length }; } /** * Calculate similarity between two opcode patterns * @param {Array} pattern1 - First opcode pattern * @param {Array} pattern2 - Second opcode pattern * @returns {number} Similarity score (0-1) */ function calculatePatternSimilarity(pattern1, pattern2) { if (!pattern1 || !pattern2 || pattern1.length === 0 || pattern2.length === 0) { return 0; } // Calculate Levenshtein distance normalized by longer sequence length const distance = levenshteinDistance(pattern1, pattern2); const maxLength = Math.max(pattern1.length, pattern2.length); return 1 - (distance / maxLength); } /** * Calculate Levenshtein distance between two arrays * @param {Array} arr1 - First array * @param {Array} arr2 - Second array * @returns {number} Edit distance */ function levenshteinDistance(arr1, arr2) { const matrix = []; for (let i = 0; i <= arr2.length; i++) { matrix[i] = [i]; } for (let j = 0; j <= arr1.length; j++) { matrix[0][j] = j; } for (let i = 1; i <= arr2.length; i++) { for (let j = 1; j <= arr1.length; j++) { if (arr2[i - 1] === arr1[j - 1]) { matrix[i][j] = matrix[i - 1][j - 1]; } else { matrix[i][j] = Math.min( matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1 ); } } } return matrix[arr2.length][arr1.length]; } /** * Detect standard contract patterns (ERC20, ERC721, etc.) * @param {Array} functions - Array of detected functions * @returns {Object} Contract pattern detection results */ function detectContractPatterns(functions) { const patterns = { isERC20: false, isERC721: false, isOwnable: false, isPausable: false, detectedPatterns: [] }; // Check for ERC20 interface const erc20Selectors = new Set([ 'a9059cbb', // transfer '095ea7b3', // approve '23b872dd', // transferFrom '70a08231', // balanceOf 'dd62ed3e', // allowance '18160ddd' // totalSupply ]); // Check for ERC721 interface const erc721Selectors = new Set([ '6352211e', // ownerOf '42842e0e', // safeTransferFrom 'b88d4fde', // safeTransferFrom '23b872dd', // transferFrom 'a22cb465', // setApprovalForAll 'e985e9c5' // isApprovedForAll ]); const foundSelectors = new Set(functions.map(f => f.selector)); // Check ERC20 let erc20Count = 0; for (const selector of erc20Selectors) { if (foundSelectors.has(selector)) erc20Count++; } if (erc20Count >= 4) { patterns.isERC20 = true; patterns.detectedPatterns.push('ERC20'); } // Check ERC721 let erc721Count = 0; for (const selector of erc721Selectors) { if (foundSelectors.has(selector)) erc721Count++; } if (erc721Count >= 4) { patterns.isERC721 = true; patterns.detectedPatterns.push('ERC721'); } // Check Ownable pattern if (foundSelectors.has('8da5cb5b') || // owner() foundSelectors.has('f2fde38b')) { // transferOwnership(address) patterns.isOwnable = true; patterns.detectedPatterns.push('Ownable'); } // Check Pausable pattern if (foundSelectors.has('5c975abb') || // paused() foundSelectors.has('8456cb59')) { // pause() patterns.isPausable = true; patterns.detectedPatterns.push('Pausable'); } return patterns; } module.exports = { detectFunctions, detectFunctionPattern, findDispatcher, analyzeFunctionBodies, analyzeFunctionBody, extractFunctionBodyPattern, compareFunctionPatterns, calculatePatternSimilarity, detectContractPatterns };