Tor 0.4.9.2-alpha-dev
All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
compiler.h
1/* Copyright (c) 2020 tevador <tevador@gmail.com> */
2/* See LICENSE for licensing information */
3
4#ifndef COMPILER_H
5#define COMPILER_H
6
7#include <stdint.h>
8#include <stdbool.h>
9#include <hashx.h>
10#include "virtual_memory.h"
11#include "program.h"
12
13HASHX_PRIVATE bool hashx_compile_x86(const hashx_program* program, uint8_t* code);
14
15HASHX_PRIVATE bool hashx_compile_a64(const hashx_program* program, uint8_t* code);
16
17#if defined(_M_X64) || defined(__x86_64__)
18#define HASHX_COMPILER_X86
19#define hashx_compile(p,c) hashx_compile_x86(p,c)
20#elif defined(__aarch64__)
21#define HASHX_COMPILER_A64
22#define hashx_compile(p,c) hashx_compile_a64(p,c)
23#else
24#define hashx_compile(p,c) (false)
25#endif
26
27HASHX_PRIVATE void hashx_compiler_init(hashx_ctx* compiler);
28HASHX_PRIVATE void hashx_compiler_destroy(hashx_ctx* compiler);
29
30/* Compiled code sizes in bytes:
31 *
32 * Prologue Epilogue MulH Reg-Reg Reg-Imm32 Branch+Tgt MaxInst
33 * X86 69 64 9 3..4 7 15 10 (br)
34 * A64 40 36 4 4 12 24 24 (br)
35 *
36 * Maximum code sizes, assuming an arbitrary instruction mix including unlimited
37 * branch instructions. (Branch size * 512 + prologue + epilogue)
38 *
39 * Max possible code size (any instructions)
40 * X86 5253
41 * A64 12364
42 *
43 * Actual code sizes tend to be much smaller due to the instruction mix chosen
44 * by the program generator. To get a quick overview of the statistics, we
45 * measure the sample mean and sample standard deviation for 1 million random
46 * hash programs:
47 *
48 * Mean Std Deviation 4096 bytes at
49 * X86 2786.4 26.259 49.9 standard deviations
50 * A64 3507.7 58.526 10.1 standard deviations
51 *
52 * If we search for PRNG sequences that maximize generated code size, it's easy
53 * to find aarch64 code that needs in the range of 4100-4300 bytes. On x86, this
54 * search still doesn't turn up programs anywhere close to a full page.
55 *
56 * Anyway, this is all to say that a one-page buffer is fine except for in
57 * extremely rare cases on aarch64, and a two-page buffer is enough for any
58 * behavior we can expect from the program generator under arbitrary input,
59 * but only a 4-page buffer is enough for fully arbitrary instruction streams
60 * on any architecture.
61 *
62 * Let's use a 2-page buffer on aarch64, or 1-page elsewhere.
63 *
64 * Note that the buffer allocation is done by platform-independent code,
65 * so COMP_CODE_SIZE must always have a valid size even on platforms where
66 * it is not actually supported or used.
67 *
68 * If this buffer fills up, compilation will fail with a runtime error.
69 */
70
71#ifdef HASHX_COMPILER_A64
72#define COMP_CODE_SIZE (4096 * 2)
73#else
74#define COMP_CODE_SIZE (4096 * 1)
75#endif
76
77#endif