hashx/compiler/
x86_64.rs

1//! Dynamically emitted HashX assembly code for x86_64 targets
2
3use crate::compiler::{util, Architecture, Executable};
4use crate::program::{Instruction, InstructionArray, NUM_INSTRUCTIONS};
5use crate::register::{RegisterFile, RegisterId};
6use crate::CompilerError;
7use dynasmrt::{x64, x64::Rq, DynasmApi, DynasmLabelApi};
8use std::mem;
9
10impl Architecture for Executable {
11    fn compile(program: &InstructionArray) -> Result<Self, CompilerError> {
12        let mut asm = Assembler::new();
13        {
14            emit_save_regs(&mut asm);
15            emit_load_input(&mut asm);
16            emit_init_locals(&mut asm);
17            debug_assert_eq!(asm.len(), PROLOGUE_SIZE);
18        }
19        for inst in program {
20            let prev_len = asm.len();
21            emit_instruction(&mut asm, inst);
22            debug_assert!(asm.len() - prev_len <= INSTRUCTION_SIZE_LIMIT);
23        }
24        {
25            let prev_len = asm.len();
26            emit_store_output(&mut asm);
27            emit_restore_regs(&mut asm);
28            emit_return(&mut asm);
29            debug_assert_eq!(asm.len() - prev_len, EPILOGUE_SIZE);
30        }
31        asm.finalize()
32    }
33
34    fn invoke(&self, regs: &mut RegisterFile) {
35        // Choose the System V ABI for x86_64. (Rust now lets us do this even on
36        // targets that use a different default C ABI.) We aren't using the
37        // stack red zone, and we only need one register-sized parameter.
38        //
39        // Parameters: rdi rsi rdx rcx r8 r9
40        // Callee save: rbx rsp rbp r12 r13 r14 r15
41        // Scratch: rax rdi rsi rdx rcx r8 r9 r10 r11
42
43        let entry = self.buffer.ptr(Assembler::entry());
44        let entry: extern "sysv64" fn(*mut RegisterFile) -> () = unsafe { mem::transmute(entry) };
45        entry(regs);
46    }
47}
48
49/// Architecture-specific fixed prologue size
50const PROLOGUE_SIZE: usize = 0x68;
51
52/// Architecture-specific fixed epilogue size
53const EPILOGUE_SIZE: usize = 0x60;
54
55/// Architecture-specific maximum size for one instruction
56const INSTRUCTION_SIZE_LIMIT: usize = 0x11;
57
58/// Capacity for the temporary output buffer, before code is copied into
59/// a long-lived allocation that can be made executable.
60const BUFFER_CAPACITY: usize =
61    PROLOGUE_SIZE + EPILOGUE_SIZE + NUM_INSTRUCTIONS * INSTRUCTION_SIZE_LIMIT;
62
63/// Architecture-specific specialization of the Assembler
64type Assembler = util::Assembler<x64::X64Relocation, BUFFER_CAPACITY>;
65
66/// Map RegisterId in our abstract program to concrete registers and addresses.
67trait RegisterMapper {
68    /// Map RegisterId(0) to R8, and so on
69    fn rq(&self) -> u8;
70    /// Byte offset in a raw RegisterFile
71    fn offset(&self) -> i32;
72}
73
74impl RegisterMapper for RegisterId {
75    #[inline(always)]
76    fn rq(&self) -> u8 {
77        8 + (self.as_usize() as u8)
78    }
79
80    #[inline(always)]
81    fn offset(&self) -> i32 {
82        (self.as_usize() * mem::size_of::<u64>()) as i32
83    }
84}
85
86/// Wrapper for `dynasm!`, sets the architecture and defines register aliases
87macro_rules! dynasm {
88    ($asm:ident $($t:tt)*) => {
89        dynasmrt::dynasm!($asm
90            ; .arch x64
91            ; .alias mulh_in, rax
92            ; .alias mulh_result64, rdx
93            ; .alias mulh_result32, edx
94            ; .alias branch_prohibit_flag, esi
95            ; .alias const_ones, ecx
96            ; .alias register_file_ptr, rdi
97            $($t)*
98        )
99    }
100}
101
102/// Emit code to initialize our local variables to default values.
103#[inline(always)]
104fn emit_init_locals<A: DynasmApi>(asm: &mut A) {
105    dynasm!(asm
106    ; xor mulh_result64, mulh_result64
107    ; xor branch_prohibit_flag, branch_prohibit_flag
108    ; lea const_ones, [branch_prohibit_flag - 1]
109    );
110}
111
112/// List of registers to save on the stack, in address order
113const REGS_TO_SAVE: [Rq; 4] = [Rq::R12, Rq::R13, Rq::R14, Rq::R15];
114
115/// Calculate the amount of stack space to reserve, in bytes.
116///
117/// This is enough to hold REGS_TO_SAVE, and to keep the platform's
118/// 16-byte stack alignment.
119const fn stack_size() -> i32 {
120    let size = REGS_TO_SAVE.len() * mem::size_of::<u64>();
121    let alignment = 0x10;
122    let offset = size % alignment;
123    let size = if offset == 0 {
124        size
125    } else {
126        size + alignment - offset
127    };
128    size as i32
129}
130
131/// Emit code to allocate stack space and store REGS_TO_SAVE.
132#[inline(always)]
133fn emit_save_regs<A: DynasmApi>(asm: &mut A) {
134    dynasm!(asm; sub rsp, stack_size());
135    for (i, reg) in REGS_TO_SAVE.as_ref().iter().enumerate() {
136        let offset = (i * mem::size_of::<u64>()) as i32;
137        dynasm!(asm; mov [rsp + offset], Rq(*reg as u8));
138    }
139}
140
141/// Emit code to restore REGS_TO_SAVE and deallocate stack space.
142#[inline(always)]
143fn emit_restore_regs<A: DynasmApi>(asm: &mut A) {
144    for (i, reg) in REGS_TO_SAVE.as_ref().iter().enumerate() {
145        let offset = (i * mem::size_of::<u64>()) as i32;
146        dynasm!(asm; mov Rq(*reg as u8), [rsp + offset]);
147    }
148    dynasm!(asm; add rsp, stack_size());
149}
150
151/// Emit code to move all input values from the RegisterFile into their
152/// actual hardware registers.
153#[inline(always)]
154fn emit_load_input<A: DynasmApi>(asm: &mut A) {
155    for reg in RegisterId::all() {
156        dynasm!(asm; mov Rq(reg.rq()), [register_file_ptr + reg.offset()]);
157    }
158}
159
160/// Emit code to move all output values from machine registers back into
161/// their RegisterFile slots.
162#[inline(always)]
163fn emit_store_output<A: DynasmApi>(asm: &mut A) {
164    for reg in RegisterId::all() {
165        dynasm!(asm; mov [register_file_ptr + reg.offset()], Rq(reg.rq()));
166    }
167}
168
169/// Emit a return instruction.
170#[inline(always)]
171fn emit_return<A: DynasmApi>(asm: &mut A) {
172    dynasm!(asm; ret);
173}
174
175/// Emit code for a single [`Instruction`] in the hash program.
176#[inline(always)]
177fn emit_instruction(asm: &mut Assembler, inst: &Instruction) {
178    /// Common implementation for binary operations on registers
179    macro_rules! reg_op {
180        ($op:tt, $dst:ident, $src:ident) => {
181            dynasm!(asm; $op Rq($dst.rq()), Rq($src.rq()))
182        }
183    }
184
185    /// Common implementation for binary operations with a const operand
186    macro_rules! const_op {
187        ($op:tt, $dst:ident, $src:expr) => {
188            dynasm!(asm; $op Rq($dst.rq()), $src)
189        }
190    }
191
192    /// Common implementation for wide multiply operations.
193    /// These use the one-argument form of `mul` (one register plus RDX:RAX)
194    macro_rules! mulh_op {
195        ($op:tt, $dst:ident, $src:ident) => {
196            dynasm!(asm
197                ; mov mulh_in, Rq($dst.rq())
198                ; $op Rq($src.rq())
199                ; mov Rq($dst.rq()), mulh_result64
200            )
201        }
202    }
203
204    /// Common implementation for scaled add using `lea`.
205    /// Currently dynasm can only parse literal scale parameters.
206    macro_rules! add_scaled_op {
207        ($scale:tt, $dst:ident, $src:ident) => {
208            dynasm!(asm
209                ; lea Rq($dst.rq()), [ Rq($dst.rq()) + $scale * Rq($src.rq()) ]
210            )
211        }
212    }
213
214    match inst {
215        Instruction::Target => {
216            dynasm!(asm; target: );
217        }
218        Instruction::Branch { mask } => {
219            // Only one branch is allowed, `branch_prohibit_flag` keeps the test
220            // from passing. We get mul result tracking for free by assigning
221            // mulh_result32 to the corresponding output register used by the
222            // x86 mul instruction.
223            dynasm!(asm
224                ; or mulh_result32, branch_prohibit_flag
225                ; test mulh_result32, *mask as i32
226                ; cmovz branch_prohibit_flag, const_ones
227                ; jz <target
228            );
229        }
230        Instruction::AddShift {
231            dst,
232            src,
233            left_shift,
234        } => match left_shift {
235            0 => add_scaled_op!(1, dst, src),
236            1 => add_scaled_op!(2, dst, src),
237            2 => add_scaled_op!(4, dst, src),
238            3 => add_scaled_op!(8, dst, src),
239            _ => unreachable!(),
240        },
241        Instruction::UMulH { dst, src } => {
242            mulh_op!(mul, dst, src);
243        }
244        Instruction::SMulH { dst, src } => {
245            mulh_op!(imul, dst, src);
246        }
247        Instruction::Mul { dst, src } => {
248            reg_op!(imul, dst, src);
249        }
250        Instruction::Xor { dst, src } => {
251            reg_op!(xor, dst, src);
252        }
253        Instruction::Sub { dst, src } => {
254            reg_op!(sub, dst, src);
255        }
256        Instruction::AddConst { dst, src } => {
257            const_op!(add, dst, *src);
258        }
259        Instruction::XorConst { dst, src } => {
260            const_op!(xor, dst, *src);
261        }
262        Instruction::Rotate { dst, right_rotate } => {
263            const_op!(ror, dst, *right_rotate as i8);
264        }
265    }
266}