hashx/
generator.rs

1//! Pseudorandom generator for hash programs and parts thereof
2
3use crate::constraints::{self, Pass, RegisterWriter, Validator};
4use crate::program::{Instruction, InstructionVec, Opcode};
5use crate::rand::RngBuffer;
6use crate::register::{RegisterId, RegisterSet};
7use crate::scheduler::{InstructionPlan, Scheduler};
8use crate::Error;
9use rand_core::RngCore;
10
11/// The `model` attempts to document HashX program generation choices,
12/// separate from the main body of the program generator.
13mod model {
14    use crate::constraints::Pass;
15    use crate::generator::OpcodeSelector;
16    use crate::program::Opcode;
17    use crate::scheduler::SubCycle;
18
19    /// Choose the next [`OpcodeSelector`].
20    ///
21    /// HashX uses a repeating pattern of opcode selectors, based on the
22    /// current sub_cycle timestamp simulated for instruction fetch/decode.
23    /// This gives a roughly constant program layout since most instructions do
24    /// only take one sub-cycle to decode, but we do skip a sub-cycle every time
25    /// there's a 2-op instruction (wide mul, target, branch). The repeating
26    /// selector pattern is designed to ensure these skipped selectors are
27    /// always the Normal type, so the overall number of multiply and branch
28    /// instructions stays constant.
29    ///
30    /// The basic pattern is `(Mul, Normal, Normal)` but `Branch`, `Target`,
31    /// and `WideMul` operations are mixed in at fixed locations in a 36-cycle
32    /// repetition.
33    ///
34    /// Normal cycles are all replaced by `ImmediateSrc` if this is the retry
35    /// pass, so that retries won't need to attempt source register selection
36    /// in this case.
37    #[inline(always)]
38    pub(super) fn choose_opcode_selector(pass: Pass, sub_cycle: SubCycle) -> OpcodeSelector {
39        let n = sub_cycle.as_usize() % 36;
40        if n == 1 {
41            OpcodeSelector::Target
42        } else if n == 19 {
43            OpcodeSelector::Branch
44        } else if n == 12 || n == 24 {
45            OpcodeSelector::WideMul
46        } else if (n % 3) == 0 {
47            OpcodeSelector::Mul
48        } else {
49            match pass {
50                Pass::Original => OpcodeSelector::Normal,
51                Pass::Retry => OpcodeSelector::ImmediateSrc,
52            }
53        }
54    }
55
56    /// Opcode choices for [`OpcodeSelector::WideMul`]
57    pub(super) const WIDE_MUL_OPS_TABLE: [Opcode; 2] = [Opcode::SMulH, Opcode::UMulH];
58
59    /// Opcode choices for [`OpcodeSelector::ImmediateSrc`]
60    pub(super) const IMMEDIATE_SRC_OPS_TABLE: [Opcode; 4] = [
61        Opcode::Rotate,
62        Opcode::XorConst,
63        Opcode::AddConst,
64        Opcode::AddConst,
65    ];
66
67    /// Opcode choices for [`OpcodeSelector::Normal`]
68    pub(super) const NORMAL_OPS_TABLE: [Opcode; 8] = [
69        Opcode::Rotate,
70        Opcode::XorConst,
71        Opcode::AddConst,
72        Opcode::AddConst,
73        Opcode::Sub,
74        Opcode::Xor,
75        Opcode::XorConst,
76        Opcode::AddShift,
77    ];
78
79    /// Masks for [`super::Instruction::Branch`] always have a constant
80    /// number of bits set.
81    ///
82    /// The probability of taking a branch is approximately
83    /// `1.0 / (1 << BRANCH_MASK_BIT_WEIGHT)`
84    pub(super) const BRANCH_MASK_BIT_WEIGHT: usize = 4;
85}
86
87/// Program generator
88pub(crate) struct Generator<'r, R: RngCore> {
89    /// The program generator wraps a random number generator, via [`RngBuffer`].
90    rng: RngBuffer<'r, R>,
91
92    /// Keep track of when execution units and registers will be ready,
93    /// and ultimately generate a list of candidate available registers
94    /// for any particular cycle.
95    scheduler: Scheduler,
96
97    /// Additional constraints on the entire program and pieces thereof
98    /// are implemented in this separate Validator module.
99    validator: Validator,
100
101    /// Last [`Opcode`] chosen by an instruction selector
102    ///
103    /// Some of the instruction selectors have the notion of avoiding
104    /// duplicates, but `HashX` designs this check based on the sequence of
105    /// selector results rather than the sequence of committed instructions.
106    last_selector_result_op: Option<Opcode>,
107}
108
109impl<'r, R: RngCore> Generator<'r, R> {
110    /// Create a fresh program generator from a random number generator state.
111    #[inline(always)]
112    pub(crate) fn new(rng: &'r mut R) -> Self {
113        Generator {
114            rng: RngBuffer::new(rng),
115            scheduler: Scheduler::new(),
116            validator: Validator::new(),
117            last_selector_result_op: None,
118        }
119    }
120
121    /// Pick a pseudorandom register from a RegisterSet.
122    ///
123    /// Returns `Err(())` if the set is empty. Consumes one `u32` from the `Rng`
124    /// only if the set contains more than one item.
125    ///
126    /// The choice is perfectly uniform only if the register set is a power of
127    /// two length. Uniformity is not critical here.
128    #[inline(always)]
129    fn select_register(&mut self, reg_options: &RegisterSet) -> Result<RegisterId, ()> {
130        match reg_options.len() {
131            0 => Err(()),
132            1 => Ok(reg_options.index(0)),
133            num_options => {
134                let num_options: u32 = num_options
135                    .try_into()
136                    .expect("register set length always fits in u32");
137                let index = self.rng.next_u32() % num_options;
138                Ok(reg_options.index(
139                    index
140                        .try_into()
141                        .expect("register set length always fits in usize"),
142                ))
143            }
144        }
145    }
146
147    /// Pick a pseudorandom operation from a list of options.
148    ///
149    /// The `options` slice must be between 2 and 255 options in length.
150    /// For uniformity and efficiency it's best if the length is also a power
151    /// of two. All actual operation lists used by HashX have power-of-two
152    /// lengths.
153    #[inline(always)]
154    fn select_op<'a, T, const SIZE: usize>(&mut self, options: &'a [T; SIZE]) -> &'a T {
155        &options[(self.rng.next_u8() as usize) % options.len()]
156    }
157
158    /// Generate a random u32 bit mask, with a constant number of bits set.
159    ///
160    /// This uses an iterative algorithm that selects one bit at a time
161    /// using a u8 from the Rng for each, discarding duplicates.
162    #[inline(always)]
163    fn select_constant_weight_bit_mask(&mut self, num_ones: usize) -> u32 {
164        let mut result = 0_u32;
165        let mut count = 0;
166        while count < num_ones {
167            let bit = 1 << ((self.rng.next_u8() as usize) % 32);
168            if (result & bit) == 0 {
169                result |= bit;
170                count += 1;
171            }
172        }
173        result
174    }
175
176    /// Generate random nonzero values.
177    ///
178    /// Iteratively picks a random u32, masking it, and discarding results that
179    /// would be all zero.
180    #[inline(always)]
181    fn select_nonzero_u32(&mut self, mask: u32) -> u32 {
182        loop {
183            let result = self.rng.next_u32() & mask;
184            if result != 0 {
185                return result;
186            }
187        }
188    }
189
190    /// Generate an entire program.
191    ///
192    /// Generates instructions into a provided [`Vec`] until the generator
193    /// state can't be advanced any further. Runs the whole-program validator.
194    /// Returns with [`Error::ProgramConstraints`] if the program fails these
195    /// checks. This happens in normal use on a small fraction of seed values.
196    #[inline(always)]
197    pub(crate) fn generate_program(&mut self, output: &mut InstructionVec) -> Result<(), Error> {
198        assert!(output.is_empty());
199        while !output.is_full() {
200            match self.generate_instruction() {
201                Err(()) => break,
202                Ok((inst, regw)) => {
203                    let state_advance = self.commit_instruction_state(&inst, regw);
204                    output.push(inst);
205                    if let Err(()) = state_advance {
206                        break;
207                    }
208                }
209            }
210        }
211        self.validator
212            .check_whole_program(&self.scheduler, output)
213            .map_err(|()| Error::ProgramConstraints)
214    }
215
216    /// Generate the next instruction.
217    ///
218    /// This is a multi-pass approach, starting with a [`Pass::Original`] that
219    /// normally succeeds, followed by a [`Pass::Retry`] with simplifications
220    /// to improve success rate, followed by a timing stall that advances the
221    /// simulated cycle count forward and tries again with the benefit of newly
222    /// available registers in the schedule.
223    ///
224    /// This only returns `Err(())` if we've hit a stopping condition for the
225    /// program.
226    #[inline(always)]
227    fn generate_instruction(&mut self) -> Result<(Instruction, RegisterWriter), ()> {
228        loop {
229            if let Ok(result) = self.instruction_gen_attempt(Pass::Original) {
230                return Ok(result);
231            }
232            if let Ok(result) = self.instruction_gen_attempt(Pass::Retry) {
233                return Ok(result);
234            }
235            self.scheduler.stall()?;
236        }
237    }
238
239    /// Choose an opcode using the current [`OpcodeSelector`], subject to
240    /// stateful constraints on adjacent opcode choices.
241    #[inline(always)]
242    fn choose_opcode(&mut self, pass: Pass) -> Opcode {
243        let op = loop {
244            let sub_cycle = self.scheduler.instruction_stream_sub_cycle();
245            let op = model::choose_opcode_selector(pass, sub_cycle).apply(self);
246            if let Ok(()) = constraints::opcode_pair_allowed(self.last_selector_result_op, op) {
247                break op;
248            }
249        };
250        self.last_selector_result_op = Some(op);
251        op
252    }
253
254    /// Make one attempt at instruction generation.
255    ///
256    /// This picks an [`OpcodeSelector`], chooses an opcode, then finishes
257    /// choosing the opcode-specific parts of the instruction. Each of these
258    /// choices affects the Rng state, and may fail if conditions are not met.
259    #[inline(always)]
260    fn instruction_gen_attempt(&mut self, pass: Pass) -> Result<(Instruction, RegisterWriter), ()> {
261        let op = self.choose_opcode(pass);
262        let plan = self.scheduler.instruction_plan(op)?;
263        let (inst, regw) = self.choose_instruction_with_opcode_plan(op, pass, &plan)?;
264        debug_assert_eq!(inst.opcode(), op);
265        self.scheduler.commit_instruction_plan(&plan, &inst);
266        Ok((inst, regw))
267    }
268
269    /// Choose only a source register, depending on the opcode and timing plan
270    #[inline(never)]
271    fn choose_src_reg(
272        &mut self,
273        op: Opcode,
274        timing_plan: &InstructionPlan,
275    ) -> Result<RegisterId, ()> {
276        let src_set = RegisterSet::from_filter(|src| {
277            self.scheduler
278                .register_available(src, timing_plan.cycle_issued())
279        });
280        let src_set = constraints::src_registers_allowed(src_set, op);
281        self.select_register(&src_set)
282    }
283
284    /// Choose both a source and destination register using a normal
285    /// [`RegisterWriter`] for two-operand instructions.
286    #[inline(always)]
287    fn choose_src_dst_regs(
288        &mut self,
289        op: Opcode,
290        pass: Pass,
291        writer_info_fn: fn(RegisterId) -> RegisterWriter,
292        timing_plan: &InstructionPlan,
293    ) -> Result<(RegisterId, RegisterId, RegisterWriter), ()> {
294        let src = self.choose_src_reg(op, timing_plan)?;
295        let writer_info = writer_info_fn(src);
296        let dst = self.choose_dst_reg(op, pass, writer_info, Some(src), timing_plan)?;
297        Ok((src, dst, writer_info))
298    }
299
300    /// Choose both a source and destination register, with a custom
301    /// [`RegisterWriter`] constraint that doesn't depend on source
302    /// register choice.
303    #[inline(always)]
304    fn choose_src_dst_regs_with_writer_info(
305        &mut self,
306        op: Opcode,
307        pass: Pass,
308        writer_info: RegisterWriter,
309        timing_plan: &InstructionPlan,
310    ) -> Result<(RegisterId, RegisterId), ()> {
311        let src = self.choose_src_reg(op, timing_plan)?;
312        let dst = self.choose_dst_reg(op, pass, writer_info, Some(src), timing_plan)?;
313        Ok((src, dst))
314    }
315
316    /// Choose a destination register only, using source and writer info
317    /// as well as the current state of the validator.
318    #[inline(never)]
319    fn choose_dst_reg(
320        &mut self,
321        op: Opcode,
322        pass: Pass,
323        writer_info: RegisterWriter,
324        src: Option<RegisterId>,
325        timing_plan: &InstructionPlan,
326    ) -> Result<RegisterId, ()> {
327        let validator = self
328            .validator
329            .dst_registers_allowed(op, pass, writer_info, src);
330        let dst_set = RegisterSet::from_filter(|dst| {
331            self.scheduler
332                .register_available(dst, timing_plan.cycle_issued())
333                && validator.check(dst)
334        });
335        self.select_register(&dst_set)
336    }
337
338    /// With an [`Opcode`] and an execution unit timing plan already in mind,
339    /// generate the other pieces necessary to fully describe an
340    /// [`Instruction`].
341    ///
342    /// This can fail if register selection fails.
343    #[inline(always)]
344    fn choose_instruction_with_opcode_plan(
345        &mut self,
346        op: Opcode,
347        pass: Pass,
348        plan: &InstructionPlan,
349    ) -> Result<(Instruction, RegisterWriter), ()> {
350        Ok(match op {
351            Opcode::Target => (Instruction::Target, RegisterWriter::None),
352
353            Opcode::Branch => (
354                Instruction::Branch {
355                    mask: self.select_constant_weight_bit_mask(model::BRANCH_MASK_BIT_WEIGHT),
356                },
357                RegisterWriter::None,
358            ),
359
360            Opcode::UMulH => {
361                let regw = RegisterWriter::UMulH(self.rng.next_u32());
362                let (src, dst) = self.choose_src_dst_regs_with_writer_info(op, pass, regw, plan)?;
363                (Instruction::UMulH { src, dst }, regw)
364            }
365
366            Opcode::SMulH => {
367                let regw = RegisterWriter::SMulH(self.rng.next_u32());
368                let (src, dst) = self.choose_src_dst_regs_with_writer_info(op, pass, regw, plan)?;
369                (Instruction::SMulH { src, dst }, regw)
370            }
371
372            Opcode::Mul => {
373                let regw = RegisterWriter::Mul;
374                let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?;
375                (Instruction::Mul { src, dst }, regw)
376            }
377
378            Opcode::Sub => {
379                let regw = RegisterWriter::AddSub;
380                let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?;
381                (Instruction::Sub { src, dst }, regw)
382            }
383
384            Opcode::Xor => {
385                let regw = RegisterWriter::Xor;
386                let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?;
387                (Instruction::Xor { src, dst }, regw)
388            }
389
390            Opcode::AddShift => {
391                let regw = RegisterWriter::AddSub;
392                let left_shift = (self.rng.next_u32() & 3) as u8;
393                let (src, dst, regw) = self.choose_src_dst_regs(op, pass, regw, plan)?;
394                (
395                    Instruction::AddShift {
396                        src,
397                        dst,
398                        left_shift,
399                    },
400                    regw,
401                )
402            }
403
404            Opcode::AddConst => {
405                let regw = RegisterWriter::AddConst;
406                let src = self.select_nonzero_u32(u32::MAX) as i32;
407                let dst = self.choose_dst_reg(op, pass, regw, None, plan)?;
408                (Instruction::AddConst { src, dst }, regw)
409            }
410
411            Opcode::XorConst => {
412                let regw = RegisterWriter::XorConst;
413                let src = self.select_nonzero_u32(u32::MAX) as i32;
414                let dst = self.choose_dst_reg(op, pass, regw, None, plan)?;
415                (Instruction::XorConst { src, dst }, regw)
416            }
417
418            Opcode::Rotate => {
419                let regw = RegisterWriter::Rotate;
420                let right_rotate: u8 = self.select_nonzero_u32(63) as u8;
421                let dst = self.choose_dst_reg(op, pass, regw, None, plan)?;
422                (Instruction::Rotate { dst, right_rotate }, regw)
423            }
424        })
425    }
426
427    /// Commit all state modifications associated with a chosen instruction
428    /// that's certainly being written to the final program.
429    ///
430    /// Returns `Ok(())` on success or `Err(())` if the new state would no
431    /// longer be valid for program generation and we're done writing code.
432    #[inline(always)]
433    fn commit_instruction_state(
434        &mut self,
435        inst: &Instruction,
436        regw: RegisterWriter,
437    ) -> Result<(), ()> {
438        self.validator.commit_instruction(inst, regw);
439        self.scheduler.advance_instruction_stream(inst.opcode())
440    }
441}
442
443/// HashX uses a limited number of different instruction selection strategies,
444/// chosen based on the sub-cycle timing of our position in the
445/// instruction stream.
446#[derive(Debug, Clone, Copy, Eq, PartialEq)]
447enum OpcodeSelector {
448    /// Main ALU instruction chooser, picks from Add/Sub/Xor/Rotate
449    Normal,
450    /// Retry pass if Normal fails, instructions with immediate source only
451    ImmediateSrc,
452    /// Only multiply instructions, no additional register selection work.
453    Mul,
454    /// Wide multiply instructions, randomly choosing signedness
455    WideMul,
456    /// Only branch targets
457    Target,
458    /// Only branch instructions
459    Branch,
460}
461
462impl OpcodeSelector {
463    /// Apply the selector, advancing the Rng state and returning an Opcode.
464    #[inline(always)]
465    fn apply<R: RngCore>(&self, gen: &mut Generator<'_, R>) -> Opcode {
466        match self {
467            OpcodeSelector::Target => Opcode::Target,
468            OpcodeSelector::Branch => Opcode::Branch,
469            OpcodeSelector::Mul => Opcode::Mul,
470            OpcodeSelector::Normal => *gen.select_op(&model::NORMAL_OPS_TABLE),
471            OpcodeSelector::ImmediateSrc => *gen.select_op(&model::IMMEDIATE_SRC_OPS_TABLE),
472            OpcodeSelector::WideMul => *gen.select_op(&model::WIDE_MUL_OPS_TABLE),
473        }
474    }
475}