1
//! Dynamically emitted HashX assembly code for x86_64 targets
2

            
3
use crate::compiler::{util, Architecture, Executable};
4
use crate::program::{Instruction, InstructionArray, NUM_INSTRUCTIONS};
5
use crate::register::{RegisterFile, RegisterId};
6
use crate::CompilerError;
7
use dynasmrt::{x64, x64::Rq, DynasmApi, DynasmLabelApi};
8
use std::mem;
9

            
10
impl Architecture for Executable {
11
7808
    fn compile(program: &InstructionArray) -> Result<Self, CompilerError> {
12
7808
        let mut asm = Assembler::new();
13
7808
        {
14
7808
            emit_save_regs(&mut asm);
15
7808
            emit_load_input(&mut asm);
16
7808
            emit_init_locals(&mut asm);
17
7808
            debug_assert_eq!(asm.len(), PROLOGUE_SIZE);
18
        }
19
4005504
        for inst in program {
20
3997696
            let prev_len = asm.len();
21
3997696
            emit_instruction(&mut asm, inst);
22
3997696
            debug_assert!(asm.len() - prev_len <= INSTRUCTION_SIZE_LIMIT);
23
        }
24
        {
25
7808
            let prev_len = asm.len();
26
7808
            emit_store_output(&mut asm);
27
7808
            emit_restore_regs(&mut asm);
28
7808
            emit_return(&mut asm);
29
7808
            debug_assert_eq!(asm.len() - prev_len, EPILOGUE_SIZE);
30
        }
31
7808
        asm.finalize()
32
7808
    }
33

            
34
314647424
    fn invoke(&self, regs: &mut RegisterFile) {
35
314647424
        // Choose the System V ABI for x86_64. (Rust now lets us do this even on
36
314647424
        // targets that use a different default C ABI.) We aren't using the
37
314647424
        // stack red zone, and we only need one register-sized parameter.
38
314647424
        //
39
314647424
        // Parameters: rdi rsi rdx rcx r8 r9
40
314647424
        // Callee save: rbx rsp rbp r12 r13 r14 r15
41
314647424
        // Scratch: rax rdi rsi rdx rcx r8 r9 r10 r11
42
314647424

            
43
314647424
        let entry = self.buffer.ptr(Assembler::entry());
44
314647424
        let entry: extern "sysv64" fn(*mut RegisterFile) -> () = unsafe { mem::transmute(entry) };
45
314647424
        entry(regs);
46
314647424
    }
47
}
48

            
49
/// Architecture-specific fixed prologue size
50
const PROLOGUE_SIZE: usize = 0x68;
51

            
52
/// Architecture-specific fixed epilogue size
53
const EPILOGUE_SIZE: usize = 0x60;
54

            
55
/// Architecture-specific maximum size for one instruction
56
const INSTRUCTION_SIZE_LIMIT: usize = 0x11;
57

            
58
/// Capacity for the temporary output buffer, before code is copied into
59
/// a long-lived allocation that can be made executable.
60
const BUFFER_CAPACITY: usize =
61
    PROLOGUE_SIZE + EPILOGUE_SIZE + NUM_INSTRUCTIONS * INSTRUCTION_SIZE_LIMIT;
62

            
63
/// Architecture-specific specialization of the Assembler
64
type Assembler = util::Assembler<x64::X64Relocation, BUFFER_CAPACITY>;
65

            
66
/// Map RegisterId in our abstract program to concrete registers and addresses.
67
trait RegisterMapper {
68
    /// Map RegisterId(0) to R8, and so on
69
    fn rq(&self) -> u8;
70
    /// Byte offset in a raw RegisterFile
71
    fn offset(&self) -> i32;
72
}
73

            
74
impl RegisterMapper for RegisterId {
75
    #[inline(always)]
76
13485312
    fn rq(&self) -> u8 {
77
13485312
        8 + (self.as_usize() as u8)
78
13485312
    }
79

            
80
    #[inline(always)]
81
124928
    fn offset(&self) -> i32 {
82
124928
        (self.as_usize() * mem::size_of::<u64>()) as i32
83
124928
    }
84
}
85

            
86
/// Wrapper for `dynasm!`, sets the architecture and defines register aliases
87
macro_rules! dynasm {
88
    ($asm:ident $($t:tt)*) => {
89
        dynasmrt::dynasm!($asm
90
            ; .arch x64
91
            ; .alias mulh_in, rax
92
            ; .alias mulh_result64, rdx
93
            ; .alias mulh_result32, edx
94
            ; .alias branch_prohibit_flag, esi
95
            ; .alias const_ones, ecx
96
            ; .alias register_file_ptr, rdi
97
            $($t)*
98
        )
99
    }
100
}
101

            
102
/// Emit code to initialize our local variables to default values.
103
#[inline(always)]
104
7808
fn emit_init_locals<A: DynasmApi>(asm: &mut A) {
105
7808
    dynasm!(asm
106
7808
    ; xor mulh_result64, mulh_result64
107
7808
    ; xor branch_prohibit_flag, branch_prohibit_flag
108
7808
    ; lea const_ones, [branch_prohibit_flag - 1]
109
7808
    );
110
7808
}
111

            
112
/// List of registers to save on the stack, in address order
113
const REGS_TO_SAVE: [Rq; 4] = [Rq::R12, Rq::R13, Rq::R14, Rq::R15];
114

            
115
/// Calculate the amount of stack space to reserve, in bytes.
116
///
117
/// This is enough to hold REGS_TO_SAVE, and to keep the platform's
118
/// 16-byte stack alignment.
119
15616
const fn stack_size() -> i32 {
120
15616
    let size = REGS_TO_SAVE.len() * mem::size_of::<u64>();
121
15616
    let alignment = 0x10;
122
15616
    let offset = size % alignment;
123
15616
    let size = if offset == 0 {
124
15616
        size
125
    } else {
126
        size + alignment - offset
127
    };
128
15616
    size as i32
129
15616
}
130

            
131
/// Emit code to allocate stack space and store REGS_TO_SAVE.
132
#[inline(always)]
133
7808
fn emit_save_regs<A: DynasmApi>(asm: &mut A) {
134
7808
    dynasm!(asm; sub rsp, stack_size());
135
31232
    for (i, reg) in REGS_TO_SAVE.as_ref().iter().enumerate() {
136
31232
        let offset = (i * mem::size_of::<u64>()) as i32;
137
31232
        dynasm!(asm; mov [rsp + offset], Rq(*reg as u8));
138
31232
    }
139
7808
}
140

            
141
/// Emit code to restore REGS_TO_SAVE and deallocate stack space.
142
#[inline(always)]
143
7808
fn emit_restore_regs<A: DynasmApi>(asm: &mut A) {
144
31232
    for (i, reg) in REGS_TO_SAVE.as_ref().iter().enumerate() {
145
31232
        let offset = (i * mem::size_of::<u64>()) as i32;
146
31232
        dynasm!(asm; mov Rq(*reg as u8), [rsp + offset]);
147
31232
    }
148
7808
    dynasm!(asm; add rsp, stack_size());
149
7808
}
150

            
151
/// Emit code to move all input values from the RegisterFile into their
152
/// actual hardware registers.
153
#[inline(always)]
154
7808
fn emit_load_input<A: DynasmApi>(asm: &mut A) {
155
70272
    for reg in RegisterId::all() {
156
62464
        dynasm!(asm; mov Rq(reg.rq()), [register_file_ptr + reg.offset()]);
157
62464
    }
158
7808
}
159

            
160
/// Emit code to move all output values from machine registers back into
161
/// their RegisterFile slots.
162
#[inline(always)]
163
7808
fn emit_store_output<A: DynasmApi>(asm: &mut A) {
164
70272
    for reg in RegisterId::all() {
165
62464
        dynasm!(asm; mov [register_file_ptr + reg.offset()], Rq(reg.rq()));
166
62464
    }
167
7808
}
168

            
169
/// Emit a return instruction.
170
#[inline(always)]
171
7808
fn emit_return<A: DynasmApi>(asm: &mut A) {
172
7808
    dynasm!(asm; ret);
173
7808
}
174

            
175
/// Emit code for a single [`Instruction`] in the hash program.
176
#[inline(always)]
177
3997696
fn emit_instruction(asm: &mut Assembler, inst: &Instruction) {
178
    /// Common implementation for binary operations on registers
179
    macro_rules! reg_op {
180
        ($op:tt, $dst:ident, $src:ident) => {
181
            dynasm!(asm; $op Rq($dst.rq()), Rq($src.rq()))
182
        }
183
    }
184

            
185
    /// Common implementation for binary operations with a const operand
186
    macro_rules! const_op {
187
        ($op:tt, $dst:ident, $src:expr) => {
188
            dynasm!(asm; $op Rq($dst.rq()), $src)
189
        }
190
    }
191

            
192
    /// Common implementation for wide multiply operations.
193
    /// These use the one-argument form of `mul` (one register plus RDX:RAX)
194
    macro_rules! mulh_op {
195
        ($op:tt, $dst:ident, $src:ident) => {
196
            dynasm!(asm
197
                ; mov mulh_in, Rq($dst.rq())
198
                ; $op Rq($src.rq())
199
                ; mov Rq($dst.rq()), mulh_result64
200
            )
201
        }
202
    }
203

            
204
    /// Common implementation for scaled add using `lea`.
205
    /// Currently dynasm can only parse literal scale parameters.
206
    macro_rules! add_scaled_op {
207
        ($scale:tt, $dst:ident, $src:ident) => {
208
            dynasm!(asm
209
                ; lea Rq($dst.rq()), [ Rq($dst.rq()) + $scale * Rq($src.rq()) ]
210
            )
211
        }
212
    }
213

            
214
3997696
    match inst {
215
124928
        Instruction::Target => {
216
124928
            dynasm!(asm; target: );
217
124928
        }
218
124928
        Instruction::Branch { mask } => {
219
124928
            // Only one branch is allowed, `branch_prohibit_flag` keeps the test
220
124928
            // from passing. We get mul result tracking for free by assigning
221
124928
            // mulh_result32 to the corresponding output register used by the
222
124928
            // x86 mul instruction.
223
124928
            dynasm!(asm
224
124928
                ; or mulh_result32, branch_prohibit_flag
225
124928
                ; test mulh_result32, *mask as i32
226
124928
                ; cmovz branch_prohibit_flag, const_ones
227
124928
                ; jz <target
228
124928
            );
229
124928
        }
230
        Instruction::AddShift {
231
269504
            dst,
232
269504
            src,
233
269504
            left_shift,
234
269504
        } => match left_shift {
235
65984
            0 => add_scaled_op!(1, dst, src),
236
68736
            1 => add_scaled_op!(2, dst, src),
237
66624
            2 => add_scaled_op!(4, dst, src),
238
68160
            3 => add_scaled_op!(8, dst, src),
239
            _ => unreachable!(),
240
        },
241
126848
        Instruction::UMulH { dst, src } => {
242
126848
            mulh_op!(mul, dst, src);
243
126848
        }
244
123008
        Instruction::SMulH { dst, src } => {
245
123008
            mulh_op!(imul, dst, src);
246
123008
        }
247
1249280
        Instruction::Mul { dst, src } => {
248
1249280
            reg_op!(imul, dst, src);
249
1249280
        }
250
304704
        Instruction::Xor { dst, src } => {
251
304704
            reg_op!(xor, dst, src);
252
304704
        }
253
277184
        Instruction::Sub { dst, src } => {
254
277184
            reg_op!(sub, dst, src);
255
277184
        }
256
541824
        Instruction::AddConst { dst, src } => {
257
541824
            const_op!(add, dst, *src);
258
541824
        }
259
550208
        Instruction::XorConst { dst, src } => {
260
550208
            const_op!(xor, dst, *src);
261
550208
        }
262
305280
        Instruction::Rotate { dst, right_rotate } => {
263
305280
            const_op!(ror, dst, *right_rotate as i8);
264
305280
        }
265
    }
266
3997696
}