summaryrefslogtreecommitdiff
path: root/libs/luajit-cmake/luajit/src/lj_target.h
blob: 19716928411f7746ea9e7b938a1a651a14eb9583 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/*
** Definitions for target CPU.
** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
*/

#ifndef _LJ_TARGET_H
#define _LJ_TARGET_H

#include "lj_def.h"
#include "lj_arch.h"

/* -- Registers and spill slots ------------------------------------------- */

/* Register type (uint8_t in ir->r). */
typedef uint32_t Reg;

/* The hi-bit is NOT set for an allocated register. This means the value
** can be directly used without masking. The hi-bit is set for a register
** allocation hint or for RID_INIT, RID_SINK or RID_SUNK.
*/
#define RID_NONE		0x80
#define RID_MASK		0x7f
#define RID_INIT		(RID_NONE|RID_MASK)
#define RID_SINK		(RID_INIT-1)
#define RID_SUNK		(RID_INIT-2)

#define ra_noreg(r)		((r) & RID_NONE)
#define ra_hasreg(r)		(!((r) & RID_NONE))

/* The ra_hashint() macro assumes a previous test for ra_noreg(). */
#define ra_hashint(r)		((r) < RID_SUNK)
#define ra_gethint(r)		((Reg)((r) & RID_MASK))
#define ra_sethint(rr, r)	rr = (uint8_t)((r)|RID_NONE)
#define ra_samehint(r1, r2)	(ra_gethint((r1)^(r2)) == 0)

/* Spill slot 0 means no spill slot has been allocated. */
#define SPS_NONE		0

#define ra_hasspill(s)		((s) != SPS_NONE)

/* Combined register and spill slot (uint16_t in ir->prev). */
typedef uint32_t RegSP;

#define REGSP(r, s)		((r) + ((s) << 8))
#define REGSP_HINT(r)		((r)|RID_NONE)
#define REGSP_INIT		REGSP(RID_INIT, 0)

#define regsp_reg(rs)		((rs) & 255)
#define regsp_spill(rs)		((rs) >> 8)
#define regsp_used(rs) \
  (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0))

/* -- Register sets ------------------------------------------------------- */

/* Bitset for registers. 32 registers suffice for most architectures.
** Note that one set holds bits for both GPRs and FPRs.
*/
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
typedef uint64_t RegSet;
#else
typedef uint32_t RegSet;
#endif

#define RID2RSET(r)		(((RegSet)1) << (r))
#define RSET_EMPTY		((RegSet)0)
#define RSET_RANGE(lo, hi)	((RID2RSET((hi)-(lo))-1) << (lo))

#define rset_test(rs, r)	((int)((rs) >> (r)) & 1)
#define rset_set(rs, r)		(rs |= RID2RSET(r))
#define rset_clear(rs, r)	(rs &= ~RID2RSET(r))
#define rset_exclude(rs, r)	(rs & ~RID2RSET(r))
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
#define rset_picktop(rs)	((Reg)(__builtin_clzll(rs)^63))
#define rset_pickbot(rs)	((Reg)__builtin_ctzll(rs))
#else
#define rset_picktop(rs)	((Reg)lj_fls(rs))
#define rset_pickbot(rs)	((Reg)lj_ffs(rs))
#endif

/* -- Register allocation cost -------------------------------------------- */

/* The register allocation heuristic keeps track of the cost for allocating
** a specific register:
**
** A free register (obviously) has a cost of 0 and a 1-bit in the free mask.
**
** An already allocated register has the (non-zero) IR reference in the lowest
** bits and the result of a blended cost-model in the higher bits.
**
** The allocator first checks the free mask for a hit. Otherwise an (unrolled)
** linear search for the minimum cost is used. The search doesn't need to
** keep track of the position of the minimum, which makes it very fast.
** The lowest bits of the minimum cost show the desired IR reference whose
** register is the one to evict.
**
** Without the cost-model this degenerates to the standard heuristics for
** (reverse) linear-scan register allocation. Since code generation is done
** in reverse, a live interval extends from the last use to the first def.
** For an SSA IR the IR reference is the first (and only) def and thus
** trivially marks the end of the interval. The LSRA heuristics says to pick
** the register whose live interval has the furthest extent, i.e. the lowest
** IR reference in our case.
**
** A cost-model should take into account other factors, like spill-cost and
** restore- or rematerialization-cost, which depend on the kind of instruction.
** E.g. constants have zero spill costs, variant instructions have higher
** costs than invariants and PHIs should preferably never be spilled.
**
** Here's a first cut at simple, but effective blended cost-model for R-LSRA:
** - Due to careful design of the IR, constants already have lower IR
**   references than invariants and invariants have lower IR references
**   than variants.
** - The cost in the upper 16 bits is the sum of the IR reference and a
**   weighted score. The score currently only takes into account whether
**   the IRT_ISPHI bit is set in the instruction type.
** - The PHI weight is the minimum distance (in IR instructions) a PHI
**   reference has to be further apart from a non-PHI reference to be spilled.
** - It should be a power of two (for speed) and must be between 2 and 32768.
**   Good values for the PHI weight seem to be between 40 and 150.
** - Further study is required.
*/
#define REGCOST_PHI_WEIGHT	64

/* Cost for allocating a specific register. */
typedef uint32_t RegCost;

/* Note: assumes 16 bit IRRef1. */
#define REGCOST(cost, ref)	((RegCost)(ref) + ((RegCost)(cost) << 16))
#define regcost_ref(rc)		((IRRef1)(rc))

#define REGCOST_T(t) \
  ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI))
#define REGCOST_REF_T(ref, t)	(REGCOST((ref), (ref)) + REGCOST_T((t)))

/* -- Target-specific definitions ----------------------------------------- */

#if LJ_TARGET_X86ORX64
#include "lj_target_x86.h"
#elif LJ_TARGET_ARM
#include "lj_target_arm.h"
#elif LJ_TARGET_ARM64
#include "lj_target_arm64.h"
#elif LJ_TARGET_PPC
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
#include "lj_target_mips.h"
#else
#error "Missing include for target CPU"
#endif

#ifdef EXITSTUBS_PER_GROUP
/* Return the address of an exit stub. */
static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno)
{
  lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL,
	     "exit stub group for exit %d uninitialized", exitno);
  return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
	 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
#define exitstub_addr(J, exitno) \
  ((MCode *)exitstub_addr_((char **)((J)->exitstubgroup), (exitno)))
#endif

#endif