x86
Version:
Generates x86_64 native code from assembly instructions
275 lines (233 loc) • 8.29 kB
text/typescript
import * as o from './operand';
// # x86_64 Instruction
//
// Each CPU instruction is encoded in the following form, where only
// *Op-code* byte is required:
//
// |-------------------------------------------------|--------------------------------------------|
// | Instruction | Next instruction |
// |-------------------------------------------------|--------------------------------------------|
// |byte 1 |byte 2 |byte 3 |byte 4 |byte 5 |
// |---------|---------|---------|---------|---------| ...
// |REX |Op-code |Mod-R/M |SIB |Immediat | ...
// |---------|---------|---------|---------|---------| ...
// |optional |required |optional |optional |optional |
// |-------------------------------------------------|
export abstract class InstructionPart {
// ins: Instruction;
abstract write(arr: number[]): number[];
}
export abstract class Prefix extends InstructionPart {}
// ## REX
//
// REX is an optional prefix used for two reasons:
//
// 1. For 64-bit instructions that require this prefix to be used.
// 2. When using extended registers: r8, r9, r10, etc..; r8d, r9d, r10d, etc...
//
// REX byte layout:
//
// 76543210
// .1..WRXB
// .......B <--- R/M field in Mod-R/M byte, or BASE field in SIB byte addresses one of the extended registers.
// ......X <---- INDEX field in SIB byte addresses one of the extended registers.
// .....R <----- REG field in Mod-R/M byte addresses one of the extended registers.
// ....W <------ Used instruction needs REX prefix.
// .1 <--------- 0x40 identifies the REX prefix.
export class PrefixRex extends Prefix {
W: number; // 0 or 1
R: number; // 0 or 1
X: number; // 0 or 1
B: number; // 0 or 1
constructor(W, R, X, B) {
super();
this.W = W;
this.R = R;
this.X = X;
this.B = B;
}
write(arr: number[]): number[] {
if(this.W || this.R || this.X || this.B)
arr.push(0b01000000 | (this.W << 3) | (this.R << 2) | (this.X << 1) | this.B);
return arr;
}
}
// ## LOCK
//
// Prefix for performing atomic memory operations.
export class PrefixLock extends Prefix {
value = 0xF0;
write(arr: number[]): number[] {
arr.push(this.value);
return arr;
}
}
// ## Op-code
//
// Primary op-code of the instruction. Often the lower 2 or 3 bits of the
// instruction op-code may be set independently.
//
// `d` and `s` bits, specify: d - direction of the instruction, and s - size of the instruction.
// - **s**
// - 1 -- word size
// - 0 -- byte size
// - **d**
// - 1 -- register is destination
// - 0 -- register is source
//
// 76543210
// ......ds
//
// Lower 3 bits may also be used to encode register for some instructions. We set
// `.regInOp = true` if that is the case.
//
// 76543210
// .....000 = RAX
export class Opcode extends InstructionPart {
/* Now we support up to 3 byte instructions */
static MASK_SIZE = 0b111111111111111111111110; // `s` bit
static MASK_DIRECTION = 0b111111111111111111111101; // `d` bit
static MASK_OP = 0b111111111111111111111000; // When register is encoded into op-code.
static SIZE = { // `s` bit
BYTE: 0b0,
WORD: 0b1,
};
static DIRECTION = { // `d` bit
REG_IS_SRC: 0b00,
REG_IS_DST: 0b10,
};
// Main op-code value.
op: number = 0;
// Whether lower 3 bits of op-code should hold register address.
regInOp: boolean = false;
// Whether register is destination of this instruction, on false register is
// the source, basically this specifies the `d` bit in op-code.
regIsDest: boolean = true;
// `s` bit encoding in op-code, which tells whether instruction operates on "words" or "bytes".
isSizeWord: boolean = true;
write(arr: number[]): number[] {
// Op-code can be up to 3 bytes long.
var op = this.op;
if(op > 0xFFFF) arr.push((op & 0xFF0000) >> 16);
if(op > 0xFF) arr.push((op & 0xFF00) >> 8);
arr.push(op & 0xFF);
return arr;
}
}
// ## Mod-R/M
//
// Mod-R/M is an optional byte after the op-code that specifies the direction
// of operation or extends the op-code.
//
// 76543210
// .....XXX <--- R/M field: Register or Memory
// ..XXX <------ REG field: Register or op-code extension
// XX <--------- MOD field: mode of operation
export class Modrm extends InstructionPart {
// Two bits of `MOD` field in `Mod-R/M` byte.
static MOD = {
INDIRECT: 0b00,
DISP8: 0b01,
DISP32: 0b10,
REG_TO_REG: 0b11,
};
// When this value is encoded in R/M field, SIB byte has to follow Mod-R/M byte.
static RM_NEEDS_SIB = 0b100;
static getMod(mem: o.Memory) {
if(!mem.displacement) return Modrm.MOD.INDIRECT;
else if(mem.displacement.size === o.DisplacementValue.SIZE.DISP8) return Modrm.MOD.DISP8;
else return Modrm.MOD.DISP32;
}
static getRm(mem: o.Memory) {
return mem.base ? mem.base.get3bitId() : Modrm.RM_NEEDS_SIB;
}
mod: number = 0;
reg: number = 0;
rm: number = 0;
constructor(mod, reg, rm) {
super();
this.mod = mod;
this.reg = reg;
this.rm = rm;
}
write(arr: number[] = []): number[] {
arr.push((this.mod << 6) | (this.reg << 3) | this.rm);
return arr;
}
}
// ## SIB
//
// SIB (scale-index-base) is optional byte used when dereferencing memory
// with complex offset, like when you do:
//
// mov rax, [rbp + rdx * 8]
//
// The above operation in SIB byte is encoded as follows:
//
// rbp + rdx * 8 = BASE + INDEX * USERSCALE
//
// Where `USERSCALE` can only be 1, 2, 4 or 8; and is encoded as follows:
//
// USERSCALE (decimal) | SCALE (binary)
// ------------------- | --------------
// 1 | 00
// 2 | 01
// 4 | 10
// 8 | 11
//
// The layout of SIB byte:
//
// 76543210
// .....XXX <--- BASE field: base register address
// ..XXX <------ INDEX field: address of register used as scale
// XX <--------- SCALE field: specifies multiple of INDEX: USERSCALE * INDEX
export class Sib extends InstructionPart {
S: number = 0;
I: number = 0;
B: number = 0;
constructor(userscale, I, B) {
super();
this.setScale(userscale);
this.I = I;
this.B = B;
}
setScale(userscale) {
switch(userscale) {
case 1: this.S = 0b00; break;
case 2: this.S = 0b01; break;
case 4: this.S = 0b10; break;
case 8: this.S = 0b11; break;
default: throw TypeError(`User scale must be on of [1, 2, 4, 8], given: ${userscale}.`);
}
}
write(arr: number[] = []): number[] {
arr.push((this.S << 6) | (this.I << 3) | this.B);
return arr;
}
}
// ## Displacement
export class Displacement extends InstructionPart {
value: o.DisplacementValue;
constructor(value: o.DisplacementValue) {
super();
this.value = value;
}
write(arr: number[] = []): number[] {
this.value.octets.forEach((octet) => { arr.push(octet); });
return arr;
}
}
// ## Immediate
//
// Immediate constant value that follows other instruction bytes.
export class Immediate extends InstructionPart {
value: o.ImmediateValue;
constructor(value: o.ImmediateValue) {
super();
this.value = value;
}
write(arr: number[] = []): number[] {
this.value.octets.forEach((octet) => { arr.push(octet); });
return arr;
}
}