diff options
Diffstat (limited to 'libs/luajit-cmake/luajit/src/vm_x64.dasc')
-rw-r--r-- | libs/luajit-cmake/luajit/src/vm_x64.dasc | 4935 |
1 files changed, 4935 insertions, 0 deletions
diff --git a/libs/luajit-cmake/luajit/src/vm_x64.dasc b/libs/luajit-cmake/luajit/src/vm_x64.dasc new file mode 100644 index 0000000..03d9655 --- /dev/null +++ b/libs/luajit-cmake/luajit/src/vm_x64.dasc @@ -0,0 +1,4935 @@ +|// Low-level VM code for x64 CPUs in LJ_GC64 mode. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +| +|.arch x64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|//----------------------------------------------------------------------- +| +|.if WIN +|.define X64WIN, 1 // Windows/x64 calling conventions. +|.endif +| +|// Fixed register assignments for the interpreter. +|// This is very fragile and has many dependencies. Caveat emptor. +|.define BASE, rdx // Not C callee-save, refetched anyway. +|.if X64WIN +|.define KBASE, rdi // Must be C callee-save. +|.define PC, rsi // Must be C callee-save. +|.define DISPATCH, rbx // Must be C callee-save. +|.define KBASEd, edi +|.define PCd, esi +|.define DISPATCHd, ebx +|.else +|.define KBASE, r15 // Must be C callee-save. +|.define PC, rbx // Must be C callee-save. +|.define DISPATCH, r14 // Must be C callee-save. +|.define KBASEd, r15d +|.define PCd, ebx +|.define DISPATCHd, r14d +|.endif +| +|.define RA, rcx +|.define RAd, ecx +|.define RAH, ch +|.define RAL, cl +|.define RB, rbp // Must be rbp (C callee-save). +|.define RBd, ebp +|.define RC, rax // Must be rax. +|.define RCd, eax +|.define RCW, ax +|.define RCH, ah +|.define RCL, al +|.define OP, RBd +|.define RD, RC +|.define RDd, RCd +|.define RDW, RCW +|.define RDL, RCL +|.define TMPR, r10 +|.define TMPRd, r10d +|.define ITYPE, r11 +|.define ITYPEd, r11d +| +|.if X64WIN +|.define CARG1, rcx // x64/WIN64 C call arguments. +|.define CARG2, rdx +|.define CARG3, r8 +|.define CARG4, r9 +|.define CARG1d, ecx +|.define CARG2d, edx +|.define CARG3d, r8d +|.define CARG4d, r9d +|.else +|.define CARG1, rdi // x64/POSIX C call arguments. +|.define CARG2, rsi +|.define CARG3, rdx +|.define CARG4, rcx +|.define CARG5, r8 +|.define CARG6, r9 +|.define CARG1d, edi +|.define CARG2d, esi +|.define CARG3d, edx +|.define CARG4d, ecx +|.define CARG5d, r8d +|.define CARG6d, r9d +|.endif +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|//----------------------------------------------------------------------- +|.if X64WIN // x64/Windows stack layout +| +|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). +|.macro saveregs_ +| push rdi; push rsi; push rbx +| sub rsp, CFRAME_SPACE +|.endmacro +|.macro saveregs +| push rbp; saveregs_ +|.endmacro +|.macro restoreregs +| add rsp, CFRAME_SPACE +| pop rbx; pop rsi; pop rdi; pop rbp +|.endmacro +| +|.define SAVE_CFRAME, aword [rsp+aword*13] +|.define SAVE_PC, aword [rsp+aword*12] +|.define SAVE_L, aword [rsp+aword*11] +|.define SAVE_ERRF, dword [rsp+dword*21] +|.define SAVE_NRES, dword [rsp+dword*20] +|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter +|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. +|.define SAVE_R4, aword [rsp+aword*8] +|.define SAVE_R3, aword [rsp+aword*7] +|.define SAVE_R2, aword [rsp+aword*6] +|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. +|.define ARG5, aword [rsp+aword*4] +|.define CSAVE_4, aword [rsp+aword*3] +|.define CSAVE_3, aword [rsp+aword*2] +|.define CSAVE_2, aword [rsp+aword*1] +|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. +|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee +| +|.define ARG5d, dword [rsp+dword*8] +|.define TMP1, ARG5 // TMP1 overlaps ARG5 +|.define TMP1d, ARG5d +|.define TMP1hi, dword [rsp+dword*9] +|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. +| +|//----------------------------------------------------------------------- +|.else // x64/POSIX stack layout +| +|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). +|.macro saveregs_ +| push rbx; push r15; push r14 +|.if NO_UNWIND +| push r13; push r12 +|.endif +| sub rsp, CFRAME_SPACE +|.endmacro +|.macro saveregs +| push rbp; saveregs_ +|.endmacro +|.macro restoreregs +| add rsp, CFRAME_SPACE +|.if NO_UNWIND +| pop r12; pop r13 +|.endif +| pop r14; pop r15; pop rbx; pop rbp +|.endmacro +| +|//----- 16 byte aligned, +|.if NO_UNWIND +|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. +|.define SAVE_R4, aword [rsp+aword*10] +|.define SAVE_R3, aword [rsp+aword*9] +|.define SAVE_R2, aword [rsp+aword*8] +|.define SAVE_R1, aword [rsp+aword*7] +|.define SAVE_RU2, aword [rsp+aword*6] +|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. +|.else +|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. +|.define SAVE_R4, aword [rsp+aword*8] +|.define SAVE_R3, aword [rsp+aword*7] +|.define SAVE_R2, aword [rsp+aword*6] +|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. +|.endif +|.define SAVE_CFRAME, aword [rsp+aword*4] +|.define SAVE_PC, aword [rsp+aword*3] +|.define SAVE_L, aword [rsp+aword*2] +|.define SAVE_ERRF, dword [rsp+dword*3] +|.define SAVE_NRES, dword [rsp+dword*2] +|.define TMP1, aword [rsp] //<-- rsp while in interpreter. +|//----- 16 byte aligned +| +|.define TMP1d, dword [rsp] +|.define TMP1hi, dword [rsp+dword*1] +|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. +| +|.endif +| +|//----------------------------------------------------------------------- +| +|// Instruction headers. +|.macro ins_A; .endmacro +|.macro ins_AD; .endmacro +|.macro ins_AJ; .endmacro +|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro +|.macro ins_AB_; movzx RBd, RCH; .endmacro +|.macro ins_A_C; movzx RCd, RCL; .endmacro +|.macro ins_AND; not RD; .endmacro +| +|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). +|.macro ins_NEXT +| mov RCd, [PC] +| movzx RAd, RCH +| movzx OP, RCL +| add PC, 4 +| shr RCd, 16 +| jmp aword [DISPATCH+OP*8] +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| // Around 10%-30% slower on Core2, a lot more slower on P4. +| .macro ins_next +| jmp ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC +| mov PC, LFUNC:RB->pc +| mov RAd, [PC] +| movzx OP, RAL +| movzx RAd, RAH +| add PC, 4 +| jmp aword [DISPATCH+OP*8] +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC, RD = nargs+1 +| mov [BASE-8], PC +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|// Macros to clear or set tags. +|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro +|.macro settp, reg, tp +| mov64 ITYPE, ((uint64_t)tp<<47) +| or reg, ITYPE +|.endmacro +|.macro settp, dst, reg, tp +| mov64 dst, ((uint64_t)tp<<47) +| or dst, reg +|.endmacro +|.macro setint, reg +| settp reg, LJ_TISNUM +|.endmacro +|.macro setint, dst, reg +| settp dst, reg, LJ_TISNUM +|.endmacro +| +|// Macros to test operand types. +|.macro checktp_nc, reg, tp, target +| mov ITYPE, reg +| sar ITYPE, 47 +| cmp ITYPEd, tp +| jne target +|.endmacro +|.macro checktp, reg, tp, target +| mov ITYPE, reg +| cleartp reg +| sar ITYPE, 47 +| cmp ITYPEd, tp +| jne target +|.endmacro +|.macro checktptp, src, tp, target +| mov ITYPE, src +| sar ITYPE, 47 +| cmp ITYPEd, tp +| jne target +|.endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro +| +|.macro checknumx, reg, target, jump +| mov ITYPE, reg +| sar ITYPE, 47 +| cmp ITYPEd, LJ_TISNUM +| jump target +|.endmacro +|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro +|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro +|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro +|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro +|.macro checknumber, src, target; checknumx src, target, ja; .endmacro +| +|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro +|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro +| +|// These operands must be used with movzx. +|.define PC_OP, byte [PC-4] +|.define PC_RA, byte [PC-3] +|.define PC_RB, byte [PC-1] +|.define PC_RC, byte [PC-2] +|.define PC_RD, word [PC-2] +| +|.macro branchPC, reg +| lea PC, [PC+reg*4-BCBIAS_J*4] +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|// Decrement hashed hotcount and trigger trace recorder if zero. +|.macro hotloop, reg +| mov reg, PCd +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP +| jb ->vm_hotloop +|.endmacro +| +|.macro hotcall, reg +| mov reg, PCd +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL +| jb ->vm_hotcall +|.endmacro +| +|// Set current VM state. +|.macro set_vmstate, st +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st +|.endmacro +| +|.macro fpop1; fstp st1; .endmacro +| +|// Synthesize SSE FP constants. +|.macro sseconst_abs, reg, tmp // Synthesize abs mask. +| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp +|.endmacro +| +|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. +| mov64 tmp, U64x(val,00000000); movd reg, tmp +|.endmacro +| +|.macro sseconst_sign, reg, tmp // Synthesize sign mask. +| sseconst_hi reg, tmp, 80000000 +|.endmacro +|.macro sseconst_1, reg, tmp // Synthesize 1.0. +| sseconst_hi reg, tmp, 3ff00000 +|.endmacro +|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. +| sseconst_hi reg, tmp, 43300000 +|.endmacro +|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. +| sseconst_hi reg, tmp, 43380000 +|.endmacro +| +|// Move table write barrier back. Overwrites reg. +|.macro barrierback, tab, reg +| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) +| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] +| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab +| mov tab->gclist, reg +|.endmacro +| +|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | test PCd, FRAME_P + | jz ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + | and PC, -8 + | sub BASE, PC // Restore caller base. + | lea RA, [RA+PC-8] // Rebase RA and prepend one result. + | mov PC, [BASE-8] // Fetch PC of previous frame. + | // Prepending may overwrite the pcall frame, so do it at the end. + | mov_true ITYPE + | mov aword [BASE+RA], ITYPE // Prepend true to results. + | + |->vm_returnc: + | add RDd, 1 // RD = nresults+1 + | jz ->vm_unwind_yield + | mov MULTRES, RDd + | test PC, FRAME_TYPE + | jz ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return + | xor PC, FRAME_C + | test PCd, FRAME_TYPE + | jnz ->vm_returnp + | + | // Return to C. + | set_vmstate C + | and PC, -8 + | sub PC, BASE + | neg PC // Previous base = BASE - delta. + | + | sub RDd, 1 + | jz >2 + |1: // Move results down. + | mov RB, [BASE+RA] + | mov [BASE-16], RB + | add BASE, 8 + | sub RDd, 1 + | jnz <1 + |2: + | mov L:RB, SAVE_L + | mov L:RB->base, PC + |3: + | mov RDd, MULTRES + | mov RAd, SAVE_NRES // RA = wanted nresults+1 + |4: + | cmp RAd, RDd + | jne >6 // More/less results wanted? + |5: + | sub BASE, 16 + | mov L:RB->top, BASE + | + |->vm_leave_cp: + | mov RA, SAVE_CFRAME // Restore previous C frame. + | mov L:RB->cframe, RA + | xor eax, eax // Ok return status for vm_pcall. + | + |->vm_leave_unw: + | restoreregs + | ret + | + |6: + | jb >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | cmp BASE, L:RB->maxstack + | ja >8 + | mov aword [BASE-16], LJ_TNIL + | add BASE, 8 + | add RDd, 1 + | jmp <4 + | + |7: // Less results wanted. + | test RAd, RAd + | jz <5 // But check for LUA_MULTRET+1. + | sub RA, RD // Negative result! + | lea BASE, [BASE+RA*8] // Correct top. + | jmp <5 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | mov L:RB->top, BASE // Save current top held in BASE (yes). + | mov MULTRES, RDd // Need to fill only remainder with nil. + | mov CARG2d, RAd + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. + | jmp <3 + | + |->vm_unwind_yield: + | mov al, LUA_YIELD + | jmp ->vm_unwind_c_eh + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mov eax, CARG2d // Error return status for vm_pcall. + | mov rsp, CARG1 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | mov L:RB, SAVE_L + | mov GL:RB, L:RB->glref + | mov dword GL:RB->vmstate, ~LJ_VMST_C + | jmp ->vm_leave_unw + | + |->vm_unwind_rethrow: + |.if not X64WIN + | mov CARG1, SAVE_L + | mov CARG2d, eax + | restoreregs + | jmp extern lj_err_throw // (lua_State *L, int errcode) + |.endif + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | and CARG1, CFRAME_RAWMASK + | mov rsp, CARG1 + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | mov L:RB, SAVE_L + | mov RDd, 1+1 // Really 1+2 results, incr. later. + | mov BASE, L:RB->base + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP + | mov PC, [BASE-8] // Fetch PC of previous frame. + | mov_false RA + | mov RB, [BASE] + | mov [BASE-16], RA // Prepend false to error message. + | mov [BASE-8], RB + | mov RA, -16 // Results start at BASE+RA = BASE-16. + | set_vmstate INTERP + | jmp ->vm_returnc // Increments RD/MULTRES and returns. + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | mov CARG2d, LUA_MINSTACK + | jmp >2 + | + |->vm_growstack_v: // Grow stack for vararg Lua function. + | sub RD, 16 // LJ_FR2 + | jmp >1 + | + |->vm_growstack_f: // Grow stack for fixarg Lua function. + | // BASE = new base, RD = nargs+1, RB = L, PC = first PC + | lea RD, [BASE+NARGS:RD*8-8] + |1: + | movzx RAd, byte [PC-4+PC2PROTO(framesize)] + | add PC, 4 // Must point after first instruction. + | mov L:RB->base, BASE + | mov L:RB->top, RD + | mov SAVE_PC, PC + | mov CARG2, RA + |2: + | // RB = L, L->base = new base, L->top = top + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->base + | mov RD, L:RB->top + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | sub RD, BASE + | shr RDd, 3 + | add NARGS:RDd, 1 + | // BASE = new base, RB = LFUNC, RD = nargs+1 + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mov L:RB, CARG1 // Caveat: CARG1 may be RA. + | mov SAVE_L, CARG1 + | mov RA, CARG2 + | mov PCd, FRAME_CP + | xor RDd, RDd + | lea KBASE, [esp+CFRAME_RESUME] + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP + | mov SAVE_PC, RD // Any value outside of bytecode is ok. + | mov SAVE_CFRAME, RD + | mov SAVE_NRES, RDd + | mov SAVE_ERRF, RDd + | mov L:RB->cframe, KBASE + | cmp byte L:RB->status, RDL + | je >2 // Initial resume (like a call). + | + | // Resume after yield (like a return). + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | mov byte L:RB->status, RDL + | mov BASE, L:RB->base + | mov RD, L:RB->top + | sub RD, RA + | shr RDd, 3 + | add RDd, 1 // RD = nresults+1 + | sub RA, BASE // RA = resultofs + | mov PC, [BASE-8] + | mov MULTRES, RDd + | test PCd, FRAME_TYPE + | jz ->BC_RET_Z + | jmp ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | mov PCd, FRAME_CP + | mov SAVE_ERRF, CARG4d + | jmp >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | mov PCd, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | mov SAVE_NRES, CARG3d + | mov L:RB, CARG1 // Caveat: CARG1 may be RA. + | mov SAVE_L, CARG1 + | mov RA, CARG2 + | + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. + | mov SAVE_CFRAME, KBASE + | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. + | add DISPATCH, GG_G2DISP + | mov L:RB->cframe, rsp + | + |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). + | add PC, RA + | sub PC, BASE // PC = frame delta + frame type + | + | mov RD, L:RB->top + | sub RD, RA + | shr NARGS:RDd, 3 + | add NARGS:RDd, 1 // RD = nargs+1 + | + |->vm_call_dispatch: + | mov LFUNC:RB, [RA-16] + | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. + | + |->vm_call_dispatch_f: + | mov BASE, RA + | ins_call + | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | mov L:RB, CARG1 // Caveat: CARG1 may be RA. + | mov SAVE_L, CARG1 + | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. + | + | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). + | sub KBASE, L:RB->top + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | mov SAVE_ERRF, 0 // No error function. + | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame. + | add DISPATCH, GG_G2DISP + | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). + | + | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. + | mov SAVE_CFRAME, KBASE + | mov L:RB->cframe, rsp + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | + | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | // TValue * (new base) or NULL returned in eax (RC). + | test RC, RC + | jz ->vm_leave_cp // No base? Just remove C frame. + | mov RA, RC + | mov PCd, FRAME_CP + | jmp <2 // Else continue with the call. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) + | add RA, BASE + | and PC, -8 + | mov RB, BASE + | sub BASE, PC // Restore caller BASE. + | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg. + | mov RC, RA // ... in [RC] + | mov PC, [RB-24] // Restore PC from [cont|PC]. + | mov RA, qword [RB-32] // May be negative on WIN64 with debug. + |.if FFI + | cmp RA, 1 + | jbe >1 + |.endif + | mov LFUNC:KBASE, [BASE-16] + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | // BASE = base, RC = result, RB = meta base + | jmp RA // Jump to continuation. + | + |.if FFI + |1: + | je ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: Tail call from C function. + | sub RB, BASE + | shr RBd, 3 + | lea RDd, [RBd-3] + | jmp ->vm_call_tail + |.endif + | + |->cont_cat: // BASE = base, RC = result, RB = mbase + | movzx RAd, PC_RB + | sub RB, 32 + | lea RA, [BASE+RA*8] + | sub RA, RB + | je ->cont_ra + | neg RA + | shr RAd, 3 + |.if X64WIN + | mov CARG3d, RAd + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE + | mov RC, [RC] + | mov [RB], RC + | mov CARG2, RB + |.else + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE + | mov CARG3d, RAd + | mov RA, [RC] + | mov [RB], RA + | mov CARG2, RB + |.endif + | jmp ->BC_CAT_Z + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | mov TMP1, STR:RC + | lea RC, TMP1 + | cmp PC_OP, BC_GGET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. + | mov [RB], TAB:RA + | jmp >2 + | + |->vmeta_tgetb: + | movzx RCd, PC_RC + |.if DUALNUM + | setint RC + | mov TMP1, RC + |.else + | cvtsi2sd xmm0, RCd + | movsd TMP1, xmm0 + |.endif + | lea RC, TMP1 + | jmp >1 + | + |->vmeta_tgetv: + | movzx RCd, PC_RC // Reload TValue *k from RC. + | lea RC, [BASE+RC*8] + |1: + | movzx RBd, PC_RB // Reload TValue *t from RB. + | lea RB, [BASE+RB*8] + |2: + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG2, RB + | mov CARG3, RC + | mov L:RB, L:CARG1 + | mov SAVE_PC, PC + | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz >3 + |->cont_ra: // BASE = base, RC = result + | movzx RAd, PC_RA + | mov RB, [RC] + | mov [BASE+RA*8], RB + | ins_next + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | mov RA, L:RB->top + | mov [RA-24], PC // [cont|PC] + | lea PC, [RA+FRAME_CONT] + | sub PC, BASE + | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. + | mov NARGS:RDd, 2+1 // 2 args for func(t, k). + | cleartp LFUNC:RB + | jmp ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | mov CARG1, TAB:RB + | mov RB, BASE // Save BASE. + | mov CARG2d, RCd // Caveat: CARG2 == BASE + | call extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in eax (RC). + | movzx RAd, PC_RA + | mov BASE, RB // Restore BASE. + | test RC, RC + | jnz ->BC_TGETR_Z + | mov ITYPE, LJ_TNIL + | jmp ->BC_TGETR2_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | mov TMP1, STR:RC + | lea RC, TMP1 + | cmp PC_OP, BC_GSET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. + | mov [RB], TAB:RA + | jmp >2 + | + |->vmeta_tsetb: + | movzx RCd, PC_RC + |.if DUALNUM + | setint RC + | mov TMP1, RC + |.else + | cvtsi2sd xmm0, RCd + | movsd TMP1, xmm0 + |.endif + | lea RC, TMP1 + | jmp >1 + | + |->vmeta_tsetv: + | movzx RCd, PC_RC // Reload TValue *k from RC. + | lea RC, [BASE+RC*8] + |1: + | movzx RBd, PC_RB // Reload TValue *t from RB. + | lea RB, [BASE+RB*8] + |2: + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG2, RB + | mov CARG3, RC + | mov L:RB, L:CARG1 + | mov SAVE_PC, PC + | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | movzx RAd, PC_RA + | mov RB, [BASE+RA*8] + | mov [RC], RB + |->cont_nop: // BASE = base, (RC = result) + | ins_next + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | mov RA, L:RB->top + | mov [RA-24], PC // [cont|PC] + | movzx RCd, PC_RA + | // Copy value to third argument. + | mov RB, [BASE+RC*8] + | mov [RA+16], RB + | lea PC, [RA+FRAME_CONT] + | sub PC, BASE + | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. + | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v). + | cleartp LFUNC:RB + | jmp ->vm_call_dispatch_f + | + |->vmeta_tsetr: + |.if X64WIN + | mov L:CARG1, SAVE_L + | mov CARG3d, RCd + | mov L:CARG1->base, BASE + | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE. + |.else + | mov L:CARG1, SAVE_L + | mov CARG2, TAB:RB + | mov L:CARG1->base, BASE + | mov RB, BASE // Save BASE. + | mov CARG3d, RCd // Caveat: CARG3 == BASE. + |.endif + | mov SAVE_PC, PC + | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // TValue * returned in eax (RC). + | movzx RAd, PC_RA + | mov BASE, RB // Restore BASE. + | jmp ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | movzx RDd, PC_RD + | movzx RAd, PC_RA + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. + |.if X64WIN + | lea CARG3, [BASE+RD*8] + | lea CARG2, [BASE+RA*8] + |.else + | lea CARG2, [BASE+RA*8] + | lea CARG3, [BASE+RD*8] + |.endif + | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. + | movzx CARG4d, PC_OP + | mov SAVE_PC, PC + | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // 0/1 or TValue * (metamethod) returned in eax (RC). + |3: + | mov BASE, L:RB->base + | cmp RC, 1 + | ja ->vmeta_binop + |4: + | lea PC, [PC+4] + | jb >6 + |5: + | movzx RDd, PC_RD + | branchPC RD + |6: + | ins_next + | + |->cont_condt: // BASE = base, RC = result + | add PC, 4 + | mov ITYPE, [RC] + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true. + | jb <5 + | jmp <6 + | + |->cont_condf: // BASE = base, RC = result + | mov ITYPE, [RC] + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. + | jmp <4 + | + |->vmeta_equal: + | cleartp TAB:RD + | sub PC, 4 + |.if X64WIN + | mov CARG3, RD + | mov CARG4d, RBd + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2 == BASE. + | mov CARG2, RA + | mov CARG1, L:RB // Caveat: CARG1 == RA. + |.else + | mov CARG2, RA + | mov CARG4d, RBd // Caveat: CARG4 == RA. + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG3 == BASE. + | mov CARG3, RD + | mov CARG1, L:RB + |.endif + | mov SAVE_PC, PC + | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // 0/1 or TValue * (metamethod) returned in eax (RC). + | jmp <3 + | + |->vmeta_equal_cd: + |.if FFI + | sub PC, 4 + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG1, L:RB + | mov CARG2d, dword [PC-4] + | mov SAVE_PC, PC + | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) + | // 0/1 or TValue * (metamethod) returned in eax (RC). + | jmp <3 + |.endif + | + |->vmeta_istype: + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG2d, RAd + | mov CARG3d, RDd + | mov L:CARG1, L:RB + | mov SAVE_PC, PC + | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | mov BASE, L:RB->base + | jmp <6 + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vno: + |.if DUALNUM + | movzx RBd, PC_RB + | movzx RCd, PC_RC + |.endif + |->vmeta_arith_vn: + | lea RC, [KBASE+RC*8] + | jmp >1 + | + |->vmeta_arith_nvo: + |.if DUALNUM + | movzx RBd, PC_RB + | movzx RCd, PC_RC + |.endif + |->vmeta_arith_nv: + | lea TMPR, [KBASE+RC*8] + | lea RC, [BASE+RB*8] + | mov RB, TMPR + | jmp >2 + | + |->vmeta_unm: + | lea RC, [BASE+RD*8] + | mov RB, RC + | jmp >2 + | + |->vmeta_arith_vvo: + |.if DUALNUM + | movzx RBd, PC_RB + | movzx RCd, PC_RC + |.endif + |->vmeta_arith_vv: + | lea RC, [BASE+RC*8] + |1: + | lea RB, [BASE+RB*8] + |2: + | lea RA, [BASE+RA*8] + |.if X64WIN + | mov CARG3, RB + | mov CARG4, RC + | movzx RCd, PC_OP + | mov ARG5d, RCd + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2 == BASE. + | mov CARG2, RA + | mov CARG1, L:RB // Caveat: CARG1 == RA. + |.else + | movzx CARG5d, PC_OP + | mov CARG2, RA + | mov CARG4, RC // Caveat: CARG4 == RA. + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. + | mov CARG3, RB + | mov L:RB, L:CARG1 + |.endif + | mov SAVE_PC, PC + | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // NULL (finished) or TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = base, RC = new base, stack = cont/func/o1/o2 + | mov RA, RC + | sub RC, BASE + | mov [RA-24], PC // [cont|PC] + | lea PC, [RC+FRAME_CONT] + | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2). + | jmp ->vm_call_dispatch + | + |->vmeta_len: + | movzx RDd, PC_RD + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE + | mov L:CARG1, L:RB + | mov SAVE_PC, PC + | call extern lj_meta_len // (lua_State *L, TValue *o) + | // NULL (retry) or TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base +#if LJ_52 + | test RC, RC + | jne ->vmeta_binop // Binop call for compatibility. + | movzx RDd, PC_RD + | mov TAB:CARG1, [BASE+RD*8] + | cleartp TAB:CARG1 + | jmp ->BC_LEN_Z +#else + | jmp ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call_ra: + | lea RA, [BASE+RA*8+16] + |->vmeta_call: // Resolve and call __call metamethod. + | // BASE = old base, RA = new base, RC = nargs+1, PC = return + | mov TMP1d, NARGS:RDd // Save RA, RC for us. + | mov RB, RA + |.if X64WIN + | mov L:TMPR, SAVE_L + | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE. + | lea CARG2, [RA-16] + | lea CARG3, [RA+NARGS:RD*8-8] + | mov CARG1, L:TMPR // Caveat: CARG1 is RA. + |.else + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. + | lea CARG2, [RA-16] + | lea CARG3, [RA+NARGS:RD*8-8] + |.endif + | mov SAVE_PC, PC + | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | mov RA, RB + | mov L:RB, SAVE_L + | mov BASE, L:RB->base + | mov NARGS:RDd, TMP1d + | mov LFUNC:RB, [RA-16] + | add NARGS:RDd, 1 + | // This is fragile. L->base must not move, KBASE must always be defined. + | cmp KBASE, BASE // Continue with CALLT if flag set. + | je ->BC_CALLT_Z + | cleartp LFUNC:RB + | mov BASE, RA + | ins_call // Otherwise call resolved metamethod. + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG2, RA // Caveat: CARG2 == BASE + | mov L:CARG1, L:RB // Caveat: CARG1 == RA + | mov SAVE_PC, PC + | call extern lj_meta_for // (lua_State *L, TValue *base) + | mov BASE, L:RB->base + | mov RCd, [PC-4] + | movzx RAd, RCH + | movzx OP, RCL + | shr RCd, 16 + | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | cmp NARGS:RDd, 1+1; jb ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | cmp NARGS:RDd, 2+1; jb ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name, op + | .ffunc_1 name + | checknumtp [BASE], ->fff_fallback + | op xmm0, qword [BASE] + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc_n name, movsd + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc_2 name + | checknumtp [BASE], ->fff_fallback + | checknumtp [BASE+8], ->fff_fallback + | movsd xmm0, qword [BASE] + | movsd xmm1, qword [BASE+8] + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses label 1. + |.macro ffgccheck + | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] + | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] + | jb >1 + | call ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | mov ITYPE, [BASE] + | mov RB, ITYPE + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback + | mov PC, [BASE-8] + | mov MULTRES, RDd + | mov RB, [BASE] + | mov [BASE-16], RB + | sub RDd, 2 + | jz >2 + | mov RA, BASE + |1: + | add RA, 8 + | mov RB, [RA] + | mov [RA-16], RB + | sub RDd, 1 + | jnz <1 + |2: + | mov RDd, MULTRES + | jmp ->fff_res_ + | + |.ffunc_1 type + | mov RC, [BASE] + | sar RC, 47 + | mov RBd, LJ_TISNUM + | cmp RCd, RBd + | cmovb RCd, RBd + | not RCd + |2: + | mov CFUNC:RB, [BASE-16] + | cleartp CFUNC:RB + | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] + | mov PC, [BASE-8] + | settp STR:RC, LJ_TSTR + | mov [BASE-16], STR:RC + | jmp ->fff_res1 + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | mov TAB:RB, [BASE] + | mov PC, [BASE-8] + | checktab TAB:RB, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | mov TAB:RB, TAB:RB->metatable + |2: + | test TAB:RB, TAB:RB + | mov aword [BASE-16], LJ_TNIL + | jz ->fff_res1 + | settp TAB:RC, TAB:RB, LJ_TTAB + | mov [BASE-16], TAB:RC // Store metatable as default result. + | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] + | mov RAd, TAB:RB->hmask + | and RAd, STR:RC->sid + | settp STR:RC, LJ_TSTR + | imul RAd, #NODE + | add NODE:RA, TAB:RB->node + |3: // Rearranged logic, because we expect _not_ to find the key. + | cmp NODE:RA->key, STR:RC + | je >5 + |4: + | mov NODE:RA, NODE:RA->next + | test NODE:RA, NODE:RA + | jnz <3 + | jmp ->fff_res1 // Not found, keep default result. + |5: + | mov RB, NODE:RA->val + | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. + | mov [BASE-16], RB // Return value of mt.__metatable. + | jmp ->fff_res1 + | + |6: + | cmp ITYPEd, LJ_TUDATA; je <1 + | cmp ITYPEd, LJ_TISNUM; ja >7 + | mov ITYPEd, LJ_TISNUM + |7: + | not ITYPEd + | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])] + | jmp <2 + | + |.ffunc_2 setmetatable + | mov TAB:RB, [BASE] + | mov TAB:TMPR, TAB:RB + | checktab TAB:RB, ->fff_fallback + | // Fast path: no mt for table yet and not clearing the mt. + | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback + | mov TAB:RA, [BASE+8] + | checktab TAB:RA, ->fff_fallback + | mov TAB:RB->metatable, TAB:RA + | mov PC, [BASE-8] + | mov [BASE-16], TAB:TMPR // Return original table. + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jz >1 + | // Possible write barrier. Table is black, but skip iswhite(mt) check. + | barrierback TAB:RB, RC + |1: + | jmp ->fff_res1 + | + |.ffunc_2 rawget + |.if X64WIN + | mov TAB:RA, [BASE] + | checktab TAB:RA, ->fff_fallback + | mov RB, BASE // Save BASE. + | lea CARG3, [BASE+8] + | mov CARG2, TAB:RA // Caveat: CARG2 == BASE. + | mov CARG1, SAVE_L + |.else + | mov TAB:CARG2, [BASE] + | checktab TAB:CARG2, ->fff_fallback + | mov RB, BASE // Save BASE. + | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. + | mov CARG1, SAVE_L + |.endif + | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // cTValue * returned in eax (RD). + | mov BASE, RB // Restore BASE. + | // Copy table slot. + | mov RB, [RD] + | mov PC, [BASE-8] + | mov [BASE-16], RB + | jmp ->fff_res1 + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. + | mov RB, [BASE] + | checknumber RB, ->fff_fallback + | mov PC, [BASE-8] + | mov [BASE-16], RB + | jmp ->fff_res1 + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | mov PC, [BASE-8] + | mov STR:RB, [BASE] + | checktp_nc STR:RB, LJ_TSTR, >3 + | // A __tostring method in the string base metatable is ignored. + |2: + | mov [BASE-16], STR:RB + | jmp ->fff_res1 + |3: // Handle numbers inline, unless a number base metatable is present. + | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1 + | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 + | jne ->fff_fallback + | ffgccheck // Caveat: uses label 1. + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Add frame since C call can throw. + | mov SAVE_PC, PC // Redundant (but a defined value). + |.if not X64WIN + | mov CARG2, BASE // Otherwise: CARG2 == BASE + |.endif + | mov L:CARG1, L:RB + |.if DUALNUM + | call extern lj_strfmt_number // (lua_State *L, cTValue *o) + |.else + | call extern lj_strfmt_num // (lua_State *L, lua_Number *np) + |.endif + | // GCstr returned in eax (RD). + | mov BASE, L:RB->base + | settp STR:RB, RD, LJ_TSTR + | jmp <2 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | je >2 // Missing 2nd arg? + |1: + | mov CARG1, [BASE] + | mov PC, [BASE-8] + | checktab CARG1, ->fff_fallback + | mov RB, BASE // Save BASE. + |.if X64WIN + | lea CARG3, [BASE-16] + | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE. + |.else + | lea CARG2, [BASE+8] + | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE. + |.endif + | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // 1=found, 0=end, -1=error returned in eax (RD). + | mov BASE, RB // Restore BASE. + | test RDd, RDd; jg ->fff_res2 // Found key/value. + | js ->fff_fallback_2 // Invalid key. + | // End of traversal: return nil. + | mov aword [BASE-16], LJ_TNIL + | jmp ->fff_res1 + |2: // Set missing 2nd arg to nil. + | mov aword [BASE+8], LJ_TNIL + | jmp <1 + | + |.ffunc_1 pairs + | mov TAB:RB, [BASE] + | mov TMPR, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback +#endif + | mov CFUNC:RD, [BASE-16] + | cleartp CFUNC:RD + | mov CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | mov PC, [BASE-8] + | mov [BASE-16], CFUNC:RD + | mov [BASE-8], TMPR + | mov aword [BASE], LJ_TNIL + | mov RDd, 1+3 + | jmp ->fff_res + | + |.ffunc_2 ipairs_aux + | mov TAB:RB, [BASE] + | checktab TAB:RB, ->fff_fallback + |.if DUALNUM + | mov RA, [BASE+8] + | checkint RA, ->fff_fallback + |.else + | checknumtp [BASE+8], ->fff_fallback + | movsd xmm0, qword [BASE+8] + |.endif + | mov PC, [BASE-8] + |.if DUALNUM + | add RAd, 1 + | setint ITYPE, RA + | mov [BASE-16], ITYPE + |.else + | sseconst_1 xmm1, TMPR + | addsd xmm0, xmm1 + | cvttsd2si RAd, xmm0 + | movsd qword [BASE-16], xmm0 + |.endif + | cmp RAd, TAB:RB->asize; jae >2 // Not in array part? + | mov RD, TAB:RB->array + | lea RD, [RD+RA*8] + |1: + | cmp aword [RD], LJ_TNIL; je ->fff_res0 + | // Copy array slot. + | mov RB, [RD] + | mov [BASE-8], RB + |->fff_res2: + | mov RDd, 1+2 + | jmp ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | cmp dword TAB:RB->hmask, 0; je ->fff_res0 + |.if X64WIN + | mov TMPR, BASE + | mov CARG2d, RAd + | mov CARG1, TAB:RB + | mov RB, TMPR + |.else + | mov CARG1, TAB:RB + | mov RB, BASE // Save BASE. + | mov CARG2d, RAd // Caveat: CARG2 == BASE + |.endif + | call extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in eax (RD). + | mov BASE, RB + | test RD, RD + | jnz <1 + |->fff_res0: + | mov RDd, 1+0 + | jmp ->fff_res + | + |.ffunc_1 ipairs + | mov TAB:RB, [BASE] + | mov TMPR, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback +#endif + | mov CFUNC:RD, [BASE-16] + | cleartp CFUNC:RD + | mov CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | mov PC, [BASE-8] + | mov [BASE-16], CFUNC:RD + | mov [BASE-8], TMPR + |.if DUALNUM + | mov64 RD, ((uint64_t)LJ_TISNUM<<47) + | mov [BASE], RD + |.else + | mov qword [BASE], 0 + |.endif + | mov RDd, 1+3 + | jmp ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc_1 pcall + | lea RA, [BASE+16] + | sub NARGS:RDd, 1 + | mov PCd, 16+FRAME_PCALL + |1: + | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)] + | shr RB, HOOK_ACTIVE_SHIFT + | and RB, 1 + | add PC, RB // Remember active hook before pcall. + | // Note: this does a (harmless) copy of the function to the PC slot, too. + | mov KBASE, RD + |2: + | mov RB, [RA+KBASE*8-24] + | mov [RA+KBASE*8-16], RB + | sub KBASE, 1 + | ja <2 + | jmp ->vm_call_dispatch + | + |.ffunc_2 xpcall + | mov LFUNC:RA, [BASE+8] + | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback + | mov LFUNC:RB, [BASE] // Swap function and traceback. + | mov [BASE], LFUNC:RA + | mov [BASE+8], LFUNC:RB + | lea RA, [BASE+24] + | sub NARGS:RDd, 2 + | mov PCd, 24+FRAME_PCALL + | jmp <1 + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | mov L:RB, [BASE] + | cleartp L:RB + |.else + |.ffunc coroutine_wrap_aux + | mov CFUNC:RB, [BASE-16] + | cleartp CFUNC:RB + | mov L:RB, CFUNC:RB->upvalue[0].gcr + | cleartp L:RB + |.endif + | mov PC, [BASE-8] + | mov SAVE_PC, PC + | mov TMP1, L:RB + |.if resume + | checktptp [BASE], LJ_TTHREAD, ->fff_fallback + |.endif + | cmp aword L:RB->cframe, 0; jne ->fff_fallback + | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback + | mov RA, L:RB->top + | je >1 // Status != LUA_YIELD (i.e. 0)? + | cmp RA, L:RB->base // Check for presence of initial func. + | je ->fff_fallback + | mov PC, [RA-8] // Move initial function up. + | mov [RA], PC + | add RA, 8 + |1: + |.if resume + | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). + |.else + | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). + |.endif + | cmp PC, L:RB->maxstack; ja ->fff_fallback + | mov L:RB->top, PC + | + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + |.if resume + | add BASE, 8 // Keep resumed thread in stack for GC. + |.endif + | mov L:RB->top, BASE + |.if resume + | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. + |.else + | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. + |.endif + | sub RB, PC // Relative to PC. + | + | cmp PC, RA + | je >3 + |2: // Move args to coroutine. + | mov RC, [PC+RB] + | mov [PC-8], RC + | sub PC, 8 + | cmp PC, RA + | jne <2 + |3: + | mov CARG2, RA + | mov CARG1, TMP1 + | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | + | mov L:RB, SAVE_L + | mov L:PC, TMP1 + | mov BASE, L:RB->base + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | + | cmp eax, LUA_YIELD + | ja >8 + |4: + | mov RA, L:PC->base + | mov KBASE, L:PC->top + | mov L:PC->top, RA // Clear coroutine stack. + | mov PC, KBASE + | sub PC, RA + | je >6 // No results? + | lea RD, [BASE+PC] + | shr PCd, 3 + | cmp RD, L:RB->maxstack + | ja >9 // Need to grow stack? + | + | mov RB, BASE + | sub RB, RA + |5: // Move results from coroutine. + | mov RD, [RA] + | mov [RA+RB], RD + | add RA, 8 + | cmp RA, KBASE + | jne <5 + |6: + |.if resume + | lea RDd, [PCd+2] // nresults+1 = 1 + true + results. + | mov_true ITYPE // Prepend true to results. + | mov [BASE-8], ITYPE + |.else + | lea RDd, [PCd+1] // nresults+1 = 1 + results. + |.endif + |7: + | mov PC, SAVE_PC + | mov MULTRES, RDd + |.if resume + | mov RA, -8 + |.else + | xor RAd, RAd + |.endif + | test PCd, FRAME_TYPE + | jz ->BC_RET_Z + | jmp ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | mov_false ITYPE // Prepend false to results. + | mov [BASE-8], ITYPE + | mov RA, L:PC->top + | sub RA, 8 + | mov L:PC->top, RA // Clear error from coroutine stack. + | // Copy error message. + | mov RD, [RA] + | mov [BASE], RD + | mov RDd, 1+2 // nresults+1 = 1 + false + error. + | jmp <7 + |.else + | mov CARG2, L:PC + | mov CARG1, L:RB + | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Error function does not return. + |.endif + | + |9: // Handle stack expansion on return from yield. + | mov L:RA, TMP1 + | mov L:RA->top, KBASE // Undo coroutine stack clearing. + | mov CARG2, PC + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov L:PC, TMP1 + | mov BASE, L:RB->base + | jmp <4 // Retry the stack move. + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | mov L:RB, SAVE_L + | test aword L:RB->cframe, CFRAME_RESUME + | jz ->fff_fallback + | mov L:RB->base, BASE + | lea RD, [BASE+NARGS:RD*8-8] + | mov L:RB->top, RD + | xor RDd, RDd + | mov aword L:RB->cframe, RD + | mov al, LUA_YIELD + | mov byte L:RB->status, al + | jmp ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + | .ffunc_1 math_abs + | mov RB, [BASE] + |.if DUALNUM + | checkint RB, >3 + | cmp RBd, 0; jns ->fff_resi + | neg RBd; js >2 + |->fff_resbit: + |->fff_resi: + | setint RB + |->fff_resRB: + | mov PC, [BASE-8] + | mov [BASE-16], RB + | jmp ->fff_res1 + |2: + | mov64 RB, U64x(41e00000,00000000) // 2^31. + | jmp ->fff_resRB + |3: + | ja ->fff_fallback + |.else + | checknum RB, ->fff_fallback + |.endif + | shl RB, 1 + | shr RB, 1 + | mov PC, [BASE-8] + | mov [BASE-16], RB + | jmp ->fff_res1 + | + |.ffunc_n math_sqrt, sqrtsd + |->fff_resxmm0: + | mov PC, [BASE-8] + | movsd qword [BASE-16], xmm0 + | // fallthrough + | + |->fff_res1: + | mov RDd, 1+1 + |->fff_res: + | mov MULTRES, RDd + |->fff_res_: + | test PCd, FRAME_TYPE + | jnz >7 + |5: + | cmp PC_RB, RDL // More results expected? + | ja >6 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | movzx RAd, PC_RA + | neg RA + | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 + | ins_next + | + |6: // Fill up results with nil. + | mov aword [BASE+RD*8-24], LJ_TNIL + | add RD, 1 + | jmp <5 + | + |7: // Non-standard return case. + | mov RA, -16 // Results start at BASE+RA = BASE-16. + | jmp ->vm_return + | + |.macro math_round, func + | .ffunc math_ .. func + |.if DUALNUM + | mov RB, [BASE] + | checknumx RB, ->fff_resRB, je + | ja ->fff_fallback + |.else + | checknumtp [BASE], ->fff_fallback + |.endif + | movsd xmm0, qword [BASE] + | call ->vm_ .. func .. _sse + |.if DUALNUM + | cvttsd2si RBd, xmm0 + | cmp RBd, 0x80000000 + | jne ->fff_resi + | cvtsi2sd xmm1, RBd + | ucomisd xmm0, xmm1 + | jp ->fff_resxmm0 + | je ->fff_resi + |.endif + | jmp ->fff_resxmm0 + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc math_log + | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. + | checknumtp [BASE], ->fff_fallback + | movsd xmm0, qword [BASE] + | mov RB, BASE + | call extern log + | mov BASE, RB + | jmp ->fff_resxmm0 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | mov RB, BASE + | call extern func + | mov BASE, RB + | jmp ->fff_resxmm0 + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | mov RB, BASE + | call extern func + | mov BASE, RB + | jmp ->fff_resxmm0 + |.endmacro + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | checknumtp [BASE], ->fff_fallback + | checknumtp [BASE+8], ->fff_fallback + | fld qword [BASE+8] + | fld qword [BASE] + | fscale + | fpop1 + | mov PC, [BASE-8] + | fstp qword [BASE-16] + | jmp ->fff_res1 + | + |.ffunc_n math_frexp + | mov RB, BASE + |.if X64WIN + | lea CARG2, TMP1 // Caveat: CARG2 == BASE + |.else + | lea CARG1, TMP1 + |.endif + | call extern frexp + | mov BASE, RB + | mov RBd, TMP1d + | mov PC, [BASE-8] + | movsd qword [BASE-16], xmm0 + |.if DUALNUM + | setint RB + | mov [BASE-8], RB + |.else + | cvtsi2sd xmm1, RBd + | movsd qword [BASE-8], xmm1 + |.endif + | mov RDd, 1+2 + | jmp ->fff_res + | + |.ffunc_n math_modf + | mov RB, BASE + |.if X64WIN + | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE + |.else + | lea CARG1, [BASE-16] + |.endif + | call extern modf + | mov BASE, RB + | mov PC, [BASE-8] + | movsd qword [BASE-8], xmm0 + | mov RDd, 1+2 + | jmp ->fff_res + | + |.macro math_minmax, name, cmovop, sseop + | .ffunc_1 name + | mov RAd, 2 + |.if DUALNUM + | mov RB, [BASE] + | checkint RB, >4 + |1: // Handle integers. + | cmp RAd, RDd; jae ->fff_resRB + | mov TMPR, [BASE+RA*8-8] + | checkint TMPR, >3 + | cmp RBd, TMPRd + | cmovop RB, TMPR + | add RAd, 1 + | jmp <1 + |3: + | ja ->fff_fallback + | // Convert intermediate result to number and continue below. + | cvtsi2sd xmm0, RBd + | jmp >6 + |4: + | ja ->fff_fallback + |.else + | checknumtp [BASE], ->fff_fallback + |.endif + | + | movsd xmm0, qword [BASE] + |5: // Handle numbers or integers. + | cmp RAd, RDd; jae ->fff_resxmm0 + |.if DUALNUM + | mov RB, [BASE+RA*8-8] + | checknumx RB, >6, jb + | ja ->fff_fallback + | cvtsi2sd xmm1, RBd + | jmp >7 + |.else + | checknumtp [BASE+RA*8-8], ->fff_fallback + |.endif + |6: + | movsd xmm1, qword [BASE+RA*8-8] + |7: + | sseop xmm0, xmm1 + | add RAd, 1 + | jmp <5 + |.endmacro + | + | math_minmax math_min, cmovg, minsd + | math_minmax math_max, cmovl, maxsd + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | cmp NARGS:RDd, 1+1; jne ->fff_fallback + | mov STR:RB, [BASE] + | checkstr STR:RB, ->fff_fallback + | mov PC, [BASE-8] + | cmp dword STR:RB->len, 1 + | jb ->fff_res0 // Return no results for empty string. + | movzx RBd, byte STR:RB[1] + |.if DUALNUM + | jmp ->fff_resi + |.else + | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0 + |.endif + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg. + |.if DUALNUM + | mov RB, [BASE] + | checkint RB, ->fff_fallback + |.else + | checknumtp [BASE], ->fff_fallback + | cvttsd2si RBd, qword [BASE] + |.endif + | cmp RBd, 255; ja ->fff_fallback + | mov TMP1d, RBd + | mov TMPRd, 1 + | lea RD, TMP1 // Points to stack. Little-endian. + |->fff_newstr: + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG3d, TMPRd // Zero-extended to size_t. + | mov CARG2, RD + | mov CARG1, L:RB + | mov SAVE_PC, PC + | call extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: + | // GCstr * returned in eax (RD). + | mov BASE, L:RB->base + | mov PC, [BASE-8] + | settp STR:RD, LJ_TSTR + | mov [BASE-16], STR:RD + | jmp ->fff_res1 + | + |.ffunc string_sub + | ffgccheck + | mov TMPRd, -1 + | cmp NARGS:RDd, 1+2; jb ->fff_fallback + | jna >1 + |.if DUALNUM + | mov TMPR, [BASE+16] + | checkint TMPR, ->fff_fallback + |.else + | checknumtp [BASE+16], ->fff_fallback + | cvttsd2si TMPRd, qword [BASE+16] + |.endif + |1: + | mov STR:RB, [BASE] + | checkstr STR:RB, ->fff_fallback + |.if DUALNUM + | mov ITYPE, [BASE+8] + | mov RAd, ITYPEd // Must clear hiword for lea below. + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISNUM + | jne ->fff_fallback + |.else + | checknumtp [BASE+8], ->fff_fallback + | cvttsd2si RAd, qword [BASE+8] + |.endif + | mov RCd, STR:RB->len + | cmp RCd, TMPRd // len < end? (unsigned compare) + | jb >5 + |2: + | test RAd, RAd // start <= 0? + | jle >7 + |3: + | sub TMPRd, RAd // start > end? + | jl ->fff_emptystr + | lea RD, [STR:RB+RAd+#STR-1] + | add TMPRd, 1 + |4: + | jmp ->fff_newstr + | + |5: // Negative end or overflow. + | jl >6 + | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1) + | jmp <2 + |6: // Overflow. + | mov TMPRd, RCd // end = len + | jmp <2 + | + |7: // Negative start or underflow. + | je >8 + | add RAd, RCd // start = start+(len+1) + | add RAd, 1 + | jg <3 // start > 0? + |8: // Underflow. + | mov RAd, 1 // start = 1 + | jmp <3 + | + |->fff_emptystr: // Range underflow. + | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok. + | jmp <4 + | + |.macro ffstring_op, name + | .ffunc_1 string_ .. name + | ffgccheck + |.if X64WIN + | mov STR:TMPR, [BASE] + | checkstr STR:TMPR, ->fff_fallback + |.else + | mov STR:CARG2, [BASE] + | checkstr STR:CARG2, ->fff_fallback + |.endif + | mov L:RB, SAVE_L + | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] + | mov L:RB->base, BASE + |.if X64WIN + | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE + |.endif + | mov RC, SBUF:CARG1->b + | mov SBUF:CARG1->L, L:RB + | mov SBUF:CARG1->w, RC + | mov SAVE_PC, PC + | call extern lj_buf_putstr_ .. name + | mov CARG1, rax + | call extern lj_buf_tostr + | jmp ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |.macro .ffunc_bit, name, kind, fdef + | fdef name + |.if kind == 2 + | sseconst_tobit xmm1, RB + |.endif + |.if DUALNUM + | mov RB, [BASE] + | checkint RB, >1 + |.if kind > 0 + | jmp >2 + |.else + | jmp ->fff_resbit + |.endif + |1: + | ja ->fff_fallback + | movd xmm0, RB + |.else + | checknumtp [BASE], ->fff_fallback + | movsd xmm0, qword [BASE] + |.endif + |.if kind < 2 + | sseconst_tobit xmm1, RB + |.endif + | addsd xmm0, xmm1 + | movd RBd, xmm0 + |2: + |.endmacro + | + |.macro .ffunc_bit, name, kind + | .ffunc_bit name, kind, .ffunc_1 + |.endmacro + | + |.ffunc_bit bit_tobit, 0 + | jmp ->fff_resbit + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name, 2 + | mov TMPRd, NARGS:RDd // Save for fallback. + | lea RD, [BASE+NARGS:RD*8-16] + |1: + | cmp RD, BASE + | jbe ->fff_resbit + |.if DUALNUM + | mov RA, [RD] + | checkint RA, >2 + | ins RBd, RAd + | sub RD, 8 + | jmp <1 + |2: + | ja ->fff_fallback_bit_op + | movd xmm0, RA + |.else + | checknumtp [RD], ->fff_fallback_bit_op + | movsd xmm0, qword [RD] + |.endif + | addsd xmm0, xmm1 + | movd RAd, xmm0 + | ins RBd, RAd + | sub RD, 8 + | jmp <1 + |.endmacro + | + |.ffunc_bit_op bit_band, and + |.ffunc_bit_op bit_bor, or + |.ffunc_bit_op bit_bxor, xor + | + |.ffunc_bit bit_bswap, 1 + | bswap RBd + | jmp ->fff_resbit + | + |.ffunc_bit bit_bnot, 1 + | not RBd + |.if DUALNUM + | jmp ->fff_resbit + |.else + |->fff_resbit: + | cvtsi2sd xmm0, RBd + | jmp ->fff_resxmm0 + |.endif + | + |->fff_fallback_bit_op: + | mov NARGS:RDd, TMPRd // Restore for fallback + | jmp ->fff_fallback + | + |.macro .ffunc_bit_sh, name, ins + |.if DUALNUM + | .ffunc_bit name, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | mov RA, [BASE+8] + | checkint RA, ->fff_fallback + |.else + | .ffunc_nn name + | sseconst_tobit xmm2, RB + | addsd xmm0, xmm2 + | addsd xmm1, xmm2 + | movd RBd, xmm0 + | movd RAd, xmm1 + |.endif + | ins RBd, cl // Assumes RA is ecx. + | jmp ->fff_resbit + |.endmacro + | + |.ffunc_bit_sh bit_lshift, shl + |.ffunc_bit_sh bit_rshift, shr + |.ffunc_bit_sh bit_arshift, sar + |.ffunc_bit_sh bit_rol, rol + |.ffunc_bit_sh bit_ror, ror + | + |//----------------------------------------------------------------------- + | + |->fff_fallback_2: + | mov NARGS:RDd, 1+2 // Other args are ignored, anyway. + | jmp ->fff_fallback + |->fff_fallback_1: + | mov NARGS:RDd, 1+1 // Other args are ignored, anyway. + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RD = nargs+1 + | mov L:RB, SAVE_L + | mov PC, [BASE-8] // Fallback may overwrite PC. + | mov SAVE_PC, PC // Redundant (but a defined value). + | mov L:RB->base, BASE + | lea RD, [BASE+NARGS:RD*8-8] + | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. + | mov L:RB->top, RD + | mov CFUNC:RD, [BASE-16] + | cleartp CFUNC:RD + | cmp RA, L:RB->maxstack + | ja >5 // Need to grow stack. + | mov CARG1, L:RB + | call aword CFUNC:RD->f // (lua_State *L) + | mov BASE, L:RB->base + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | test RDd, RDd; jg ->fff_res // Returned nresults+1? + |1: + | mov RA, L:RB->top + | sub RA, BASE + | shr RAd, 3 + | test RDd, RDd + | lea NARGS:RDd, [RAd+1] + | mov LFUNC:RB, [BASE-16] + | jne ->vm_call_tail // Returned -1? + | cleartp LFUNC:RB + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | mov RA, BASE + | test PCd, FRAME_TYPE + | jnz >3 + | movzx RBd, PC_RA + | neg RB + | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8 + | jmp ->vm_call_dispatch // Resolve again for tailcall. + |3: + | mov RB, PC + | and RB, -8 + | sub BASE, RB + | jmp ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | mov CARG2d, LUA_MINSTACK + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->base + | xor RDd, RDd // Simulate a return 0. + | jmp <1 // Dumb retry (goes through ff first). + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RD = nargs+1 + | pop RB // Must keep stack at same level. + | mov TMP1, RB // Save return address + | mov L:RB, SAVE_L + | mov SAVE_PC, PC // Redundant (but a defined value). + | mov L:RB->base, BASE + | lea RD, [BASE+NARGS:RD*8-8] + | mov CARG1, L:RB + | mov L:RB->top, RD + | call extern lj_gc_step // (lua_State *L) + | mov BASE, L:RB->base + | mov RD, L:RB->top + | sub RD, BASE + | shr RDd, 3 + | add NARGS:RDd, 1 + | mov RB, TMP1 + | push RB // Restore return address. + | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] + | test RDL, HOOK_VMEVENT // No recording while in vmevent. + | jnz >5 + | // Decrement the hookcount for consistency, but always do the call. + | test RDL, HOOK_ACTIVE + | jnz >1 + | test RDL, LUA_MASKLINE|LUA_MASKCOUNT + | jz >1 + | dec dword [DISPATCH+DISPATCH_GL(hookcount)] + | jmp >1 + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] + | test RDL, HOOK_ACTIVE // Hook already active? + | jnz >5 + | jmp >1 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] + | test RDL, HOOK_ACTIVE // Hook already active? + | jnz >5 + | + | test RDL, LUA_MASKLINE|LUA_MASKCOUNT + | jz >5 + | dec dword [DISPATCH+DISPATCH_GL(hookcount)] + | jz >1 + | test RDL, LUA_MASKLINE + | jz >5 + |1: + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG2, PC // Caveat: CARG2 == BASE + | mov CARG1, L:RB + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | mov BASE, L:RB->base + |4: + | movzx RAd, PC_RA + |5: + | movzx OP, PC_OP + | movzx RDd, PC_RD + | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. + | + |->cont_hook: // Continue from hook yield. + | add PC, 4 + | mov RA, [RB-40] + | mov MULTRES, RAd // Restore MULTRES for *M ins. + | jmp <4 + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). + | cleartp LFUNC:RB + | mov RB, LFUNC:RB->pc + | movzx RDd, byte [RB+PC2PROTO(framesize)] + | lea RD, [BASE+RD*8] + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov L:RB->top, RD + | mov CARG2, PC + | lea CARG1, [DISPATCH+GG_DISP2J] + | mov aword [DISPATCH+DISPATCH_J(L)], L:RB + | mov SAVE_PC, PC + | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | jmp <3 + |.endif + | + |->vm_callhook: // Dispatch target for call hooks. + | mov SAVE_PC, PC + |.if JIT + | jmp >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | mov SAVE_PC, PC + | or PC, 1 // Marker for hot call. + |1: + |.endif + | lea RD, [BASE+NARGS:RD*8-8] + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov L:RB->top, RD + | mov CARG2, PC + | mov CARG1, L:RB + | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // ASMFunction returned in eax/rax (RD). + | mov SAVE_PC, 0 // Invalidate for subsequent line hook. + |.if JIT + | and PC, -2 + |.endif + | mov BASE, L:RB->base + | mov RA, RD + | mov RD, L:RB->top + | sub RD, BASE + | mov RB, RA + | movzx RAd, PC_RA + | shr RDd, 3 + | add NARGS:RDd, 1 + | jmp RB + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // BASE = base, RC = result, RB = mbase + | mov TRACE:ITYPE, [RB-40] // Save previous trace. + | cleartp TRACE:ITYPE + | mov TMPRd, MULTRES + | movzx RAd, PC_RA + | lea RA, [BASE+RA*8] // Call base. + | sub TMPRd, 1 + | jz >2 + |1: // Move results down. + | mov RB, [RC] + | mov [RA], RB + | add RC, 8 + | add RA, 8 + | sub TMPRd, 1 + | jnz <1 + |2: + | movzx RCd, PC_RA + | movzx RBd, PC_RB + | add RC, RB + | lea RC, [BASE+RC*8-8] + |3: + | cmp RC, RA + | ja >9 // More results wanted? + | + | test TRACE:ITYPE, TRACE:ITYPE + | jz ->cont_nop + | movzx RBd, word TRACE:ITYPE->traceno + | movzx RDd, word TRACE:ITYPE->link + | cmp RDd, RBd + | je ->cont_nop // Blacklisted. + | test RDd, RDd + | jne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | mov [DISPATCH+DISPATCH_J(exitno)], RB + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG2, PC + | lea CARG1, [DISPATCH+GG_DISP2J] + | mov aword [DISPATCH+DISPATCH_J(L)], L:RB + | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | mov BASE, L:RB->base + | jmp ->cont_nop + | + |9: // Fill up results with nil. + | mov aword [RA], LJ_TNIL + | add RA, 8 + | jmp <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG2, PC // Caveat: CARG2 == BASE + | mov CARG1, L:RB + | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | mov BASE, L:RB->base + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | sub PC, 4 + | jmp ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Called from an exit stub with the exit number on the stack. + |// The 16 bit exit number is stored with two (sign-extended) push imm8. + |->vm_exit_handler: + |.if JIT + | push r13; push r12 + | push r11; push r10; push r9; push r8 + | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp + | push rbx; push rdx; push rcx; push rax + | movzx RCd, byte [rbp-8] // Reconstruct exit number. + | mov RCH, byte [rbp-16] + | mov [rbp-8], r15; mov [rbp-16], r14 + | // DISPATCH is preserved on-trace in LJ_GC64 mode. + | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. + | set_vmstate EXIT + | mov [DISPATCH+DISPATCH_J(exitno)], RCd + | mov [DISPATCH+DISPATCH_J(parent)], RAd + |.if X64WIN + | sub rsp, 16*8+4*8 // Room for SSE regs + save area. + |.else + | sub rsp, 16*8 // Room for SSE regs. + |.endif + | add rbp, -128 + | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 + | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 + | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 + | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 + | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 + | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 + | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 + | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 + | // Caveat: RB is rbp. + | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] + | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] + | mov aword [DISPATCH+DISPATCH_J(L)], L:RB + | mov L:RB->base, BASE + |.if X64WIN + | lea CARG2, [rsp+4*8] + |.else + | mov CARG2, rsp + |.endif + | lea CARG1, [DISPATCH+GG_DISP2J] + | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 + | call extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // MULTRES or negated error code returned in eax (RD). + | mov RA, L:RB->cframe + | and RA, CFRAME_RAWMASK + | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). + | mov BASE, L:RB->base + | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. + | jmp >1 + |.endif + |->vm_exit_interp: + | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. + |.if JIT + | // Restore additional callee-save registers only used in compiled code. + |.if X64WIN + | lea RA, [rsp+10*16+4*8] + |1: + | movdqa xmm15, [RA-10*16] + | movdqa xmm14, [RA-9*16] + | movdqa xmm13, [RA-8*16] + | movdqa xmm12, [RA-7*16] + | movdqa xmm11, [RA-6*16] + | movdqa xmm10, [RA-5*16] + | movdqa xmm9, [RA-4*16] + | movdqa xmm8, [RA-3*16] + | movdqa xmm7, [RA-2*16] + | mov rsp, RA // Reposition stack to C frame. + | movdqa xmm6, [RA-1*16] + | mov r15, CSAVE_1 + | mov r14, CSAVE_2 + | mov r13, CSAVE_3 + | mov r12, CSAVE_4 + |.else + | lea RA, [rsp+16] + |1: + | mov r13, [RA-8] + | mov r12, [RA] + | mov rsp, RA // Reposition stack to C frame. + |.endif + | test RDd, RDd; js >9 // Check for error from exit. + | mov L:RB, SAVE_L + | mov MULTRES, RDd + | mov LFUNC:KBASE, [BASE-16] + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | mov L:RB->base, BASE + | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 + | set_vmstate INTERP + | // Modified copy of ins_next which handles function header dispatch, too. + | mov RCd, [PC] + | movzx RAd, RCH + | movzx OP, RCL + | add PC, 4 + | shr RCd, 16 + | cmp OP, BC_FUNCF // Function header? + | jb >3 + | cmp OP, BC_FUNCC+2 // Fast function? + | jae >4 + |2: + | mov RCd, MULTRES // RC/RD holds nres+1. + |3: + | jmp aword [DISPATCH+OP*8] + | + |4: // Check frame below fast function. + | mov RC, [BASE-8] + | test RCd, FRAME_TYPE + | jnz <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | movzx RCd, byte [RC-3] + | neg RC + | mov LFUNC:KBASE, [BASE+RC*8-32] + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | jmp <2 + | + |9: // Rethrow error from the right C frame. + | mov CARG2d, RDd + | mov CARG1, L:RB + | neg CARG2d + | call extern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// FP value rounding. Called by math.floor/math.ceil fast functions + |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. + |.macro vm_round, name, mode, cond + |->name: + |->name .. _sse: + | sseconst_abs xmm2, RD + | sseconst_2p52 xmm3, RD + | movaps xmm1, xmm0 + | andpd xmm1, xmm2 // |x| + | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. + | jbe >1 + | andnpd xmm2, xmm0 // Isolate sign bit. + |.if mode == 2 // trunc(x)? + | movaps xmm0, xmm1 + | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 + | subsd xmm1, xmm3 + | sseconst_1 xmm3, RD + | cmpsd xmm0, xmm1, 1 // |x| < result? + | andpd xmm0, xmm3 + | subsd xmm1, xmm0 // If yes, subtract -1. + | orpd xmm1, xmm2 // Merge sign bit back in. + |.else + | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 + | subsd xmm1, xmm3 + | orpd xmm1, xmm2 // Merge sign bit back in. + | sseconst_1 xmm3, RD + | .if mode == 1 // ceil(x)? + | cmpsd xmm0, xmm1, 6 // x > result? + | andpd xmm0, xmm3 + | addsd xmm1, xmm0 // If yes, add 1. + | orpd xmm1, xmm2 // Merge sign bit back in (again). + | .else // floor(x)? + | cmpsd xmm0, xmm1, 1 // x < result? + | andpd xmm0, xmm3 + | subsd xmm1, xmm0 // If yes, subtract 1. + | .endif + |.endif + | movaps xmm0, xmm1 + |1: + | ret + |.endmacro + | + | vm_round vm_floor, 0, 1 + | vm_round vm_ceil, 1, JIT + | vm_round vm_trunc, 2, JIT + | + |// FP modulo x%y. Called by BC_MOD* and vm_arith. + |->vm_mod: + |// Args in xmm0/xmm1, return value in xmm0. + |// Caveat: xmm0-xmm5 and RC (eax) modified! + | movaps xmm5, xmm0 + | divsd xmm0, xmm1 + | sseconst_abs xmm2, RD + | sseconst_2p52 xmm3, RD + | movaps xmm4, xmm0 + | andpd xmm4, xmm2 // |x/y| + | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. + | jbe >1 + | andnpd xmm2, xmm0 // Isolate sign bit. + | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 + | subsd xmm4, xmm3 + | orpd xmm4, xmm2 // Merge sign bit back in. + | sseconst_1 xmm2, RD + | cmpsd xmm0, xmm4, 1 // x/y < result? + | andpd xmm0, xmm2 + | subsd xmm4, xmm0 // If yes, subtract 1.0. + | movaps xmm0, xmm5 + | mulsd xmm1, xmm4 + | subsd xmm0, xmm1 + | ret + |1: + | mulsd xmm1, xmm0 + | movaps xmm0, xmm5 + | subsd xmm0, xmm1 + | ret + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) + |->vm_cpuid: + | mov eax, CARG1d + | .if X64WIN; push rsi; mov rsi, CARG2; .endif + | push rbx + | xor ecx, ecx + | cpuid + | mov [rsi], eax + | mov [rsi+4], ebx + | mov [rsi+8], ecx + | mov [rsi+12], edx + | pop rbx + | .if X64WIN; pop rsi; .endif + | ret + | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2d + |.define NEXT_IDXa, CARG2 + |.define NEXT_PTR, RC + |.define NEXT_PTRd, RCd + |.define NEXT_TMP, CARG3 + |.define NEXT_ASIZE, CARG4d + |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.if X64WIN + |.define NEXT_RES_PTR, [rsp+aword*5] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.else + |.define NEXT_RES_PTR, [rsp+aword*1] + |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.endif + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in edx. + |->vm_next: + |.if JIT + | mov NEXT_ASIZE, NEXT_TAB->asize + |1: // Traverse array part. + | cmp NEXT_IDX, NEXT_ASIZE; jae >5 + | mov NEXT_TMP, NEXT_TAB->array + | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8] + | cmp NEXT_TMP, LJ_TNIL; je >2 + | lea NEXT_PTR, NEXT_RES_PTR + | mov qword [NEXT_PTR], NEXT_TMP + |.if DUALNUM + | setint NEXT_TMP, NEXT_IDXa + | mov qword [NEXT_PTR+qword*1], NEXT_TMP + |.else + | cvtsi2sd xmm0, NEXT_IDX + | movsd qword [NEXT_PTR+qword*1], xmm0 + |.endif + | NEXT_RES_IDX 1 + | ret + |2: // Skip holes in array part. + | add NEXT_IDX, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub NEXT_IDX, NEXT_ASIZE + |6: + | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 + | imul NEXT_PTRd, NEXT_IDX, #NODE + | add NODE:NEXT_PTR, NEXT_TAB->node + | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7 + | NEXT_RES_IDXL NEXT_ASIZE+1 + | ret + |7: // Skip holes in hash part. + | add NEXT_IDX, 1 + | jmp <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | NEXT_RES_IDX NEXT_ASIZE + | lea NEXT_PTR, NEXT_RES_PTR + | mov qword [NEXT_PTR+qword*1], LJ_TNIL + | ret + |.endif + | + |//----------------------------------------------------------------------- + |//-- Assertions --------------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->assert_bad_for_arg_type: +#ifdef LUA_USE_ASSERT + | int3 +#endif + | int3 + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in ah/al. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. + | lea DISPATCH, [ebp+GG_G2DISP] + | mov CTSTATE, GL:ebp->ctype_state + | movzx eax, ax + | mov CTSTATE->cb.slot, eax + | mov CTSTATE->cb.gpr[0], CARG1 + | mov CTSTATE->cb.gpr[1], CARG2 + | mov CTSTATE->cb.gpr[2], CARG3 + | mov CTSTATE->cb.gpr[3], CARG4 + | movsd qword CTSTATE->cb.fpr[0], xmm0 + | movsd qword CTSTATE->cb.fpr[1], xmm1 + | movsd qword CTSTATE->cb.fpr[2], xmm2 + | movsd qword CTSTATE->cb.fpr[3], xmm3 + |.if X64WIN + | lea rax, [rsp+CFRAME_SIZE+4*8] + |.else + | lea rax, [rsp+CFRAME_SIZE] + | mov CTSTATE->cb.gpr[4], CARG5 + | mov CTSTATE->cb.gpr[5], CARG6 + | movsd qword CTSTATE->cb.fpr[4], xmm4 + | movsd qword CTSTATE->cb.fpr[5], xmm5 + | movsd qword CTSTATE->cb.fpr[6], xmm6 + | movsd qword CTSTATE->cb.fpr[7], xmm7 + |.endif + | mov CTSTATE->cb.stack, rax + | mov CARG2, rsp + | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. + | mov CARG1, CTSTATE + | call extern lj_ccallback_enter // (CTState *cts, void *cf) + | // lua_State * returned in eax (RD). + | set_vmstate INTERP + | mov BASE, L:RD->base + | mov RD, L:RD->top + | sub RD, BASE + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | shr RD, 3 + | add RD, 1 + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | mov L:RA, SAVE_L + | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] + | mov aword CTSTATE->L, L:RA + | mov L:RA->base, BASE + | mov L:RA->top, RB + | mov CARG1, CTSTATE + | mov CARG2, RC + | call extern lj_ccallback_leave // (CTState *cts, TValue *o) + | mov rax, CTSTATE->cb.gpr[0] + | movsd xmm0, qword CTSTATE->cb.fpr[0] + | jmp ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, rbx + | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 + | + | // Readjust stack. + | mov eax, CCSTATE->spadj + | sub rsp, rax + | + | // Copy stack slots. + | movzx ecx, byte CCSTATE->nsp + | sub ecx, 1 + | js >2 + |1: + | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] + | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax + | sub ecx, 1 + | jns <1 + |2: + | + | movzx eax, byte CCSTATE->nfpr + | mov CARG1, CCSTATE->gpr[0] + | mov CARG2, CCSTATE->gpr[1] + | mov CARG3, CCSTATE->gpr[2] + | mov CARG4, CCSTATE->gpr[3] + |.if not X64WIN + | mov CARG5, CCSTATE->gpr[4] + | mov CARG6, CCSTATE->gpr[5] + |.endif + | test eax, eax; jz >5 + | movaps xmm0, CCSTATE->fpr[0] + | movaps xmm1, CCSTATE->fpr[1] + | movaps xmm2, CCSTATE->fpr[2] + | movaps xmm3, CCSTATE->fpr[3] + |.if not X64WIN + | cmp eax, 4; jbe >5 + | movaps xmm4, CCSTATE->fpr[4] + | movaps xmm5, CCSTATE->fpr[5] + | movaps xmm6, CCSTATE->fpr[6] + | movaps xmm7, CCSTATE->fpr[7] + |.endif + |5: + | + | call aword CCSTATE->func + | + | mov CCSTATE->gpr[0], rax + | movaps CCSTATE->fpr[0], xmm0 + |.if not X64WIN + | mov CCSTATE->gpr[1], rdx + | movaps CCSTATE->fpr[1], xmm1 + |.endif + | + | mov rbx, [rbp-8]; leave; ret + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |// Note: aligning all instructions does not pay off. + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + |.macro jmp_comp, lt, ge, le, gt, target + ||switch (op) { + ||case BC_ISLT: + | lt target + ||break; + ||case BC_ISGE: + | ge target + ||break; + ||case BC_ISLE: + | le target + ||break; + ||case BC_ISGT: + | gt target + ||break; + ||default: break; /* Shut up GCC. */ + ||} + |.endmacro + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1, RD = src2, JMP with RD = target + | ins_AD + | mov ITYPE, [BASE+RA*8] + | mov RB, [BASE+RD*8] + | mov RA, ITYPE + | mov RD, RB + | sar ITYPE, 47 + | sar RB, 47 + |.if DUALNUM + | cmp ITYPEd, LJ_TISNUM; jne >7 + | cmp RBd, LJ_TISNUM; jne >8 + | add PC, 4 + | cmp RAd, RDd + | jmp_comp jge, jl, jg, jle, >9 + |6: + | movzx RDd, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | ja ->vmeta_comp + | // RA is a number. + | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp + | // RA is a number, RD is an integer. + | cvtsi2sd xmm0, RDd + | jmp >2 + | + |8: // RA is an integer, RD is not an integer. + | ja ->vmeta_comp + | // RA is an integer, RD is a number. + | cvtsi2sd xmm1, RAd + | movd xmm0, RD + | jmp >3 + |.else + | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp + | cmp RBd, LJ_TISNUM; jae ->vmeta_comp + |.endif + |1: + | movd xmm0, RD + |2: + | movd xmm1, RA + |3: + | add PC, 4 + | ucomisd xmm0, xmm1 + | // Unordered: all of ZF CF PF set, ordered: PF clear. + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + |.if DUALNUM + | jmp_comp jbe, ja, jb, jae, <9 + | jmp <6 + |.else + | jmp_comp jbe, ja, jb, jae, >1 + | movzx RDd, PC_RD + | branchPC RD + |1: + | ins_next + |.endif + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | ins_AD // RA = src1, RD = src2, JMP with RD = target + | mov RB, [BASE+RD*8] + | mov ITYPE, [BASE+RA*8] + | add PC, 4 + | mov RD, RB + | mov RA, ITYPE + | sar RB, 47 + | sar ITYPE, 47 + |.if DUALNUM + | cmp RBd, LJ_TISNUM; jne >7 + | cmp ITYPEd, LJ_TISNUM; jne >8 + | cmp RDd, RAd + if (vk) { + | jne >9 + } else { + | je >9 + } + | movzx RDd, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RD is not an integer. + | ja >5 + | // RD is a number. + | movd xmm1, RD + | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5 + | // RD is a number, RA is an integer. + | cvtsi2sd xmm0, RAd + | jmp >2 + | + |8: // RD is an integer, RA is not an integer. + | ja >5 + | // RD is an integer, RA is a number. + | cvtsi2sd xmm1, RDd + | jmp >1 + | + |.else + | cmp RBd, LJ_TISNUM; jae >5 + | cmp ITYPEd, LJ_TISNUM; jae >5 + | movd xmm1, RD + |.endif + |1: + | movd xmm0, RA + |2: + | ucomisd xmm0, xmm1 + |4: + iseqne_fp: + if (vk) { + | jp >2 // Unordered means not equal. + | jne >2 + } else { + | jp >2 // Unordered means not equal. + | je >1 + } + iseqne_end: + if (vk) { + |1: // EQ: Branch to the target. + | movzx RDd, PC_RD + | branchPC RD + |2: // NE: Fallthrough to next instruction. + |.if not FFI + |3: + |.endif + } else { + |.if not FFI + |3: + |.endif + |2: // NE: Branch to the target. + | movzx RDd, PC_RD + | branchPC RD + |1: // EQ: Fallthrough to next instruction. + } + if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || + op == BC_ISEQN || op == BC_ISNEN)) { + | jmp <9 + } else { + | ins_next + } + | + if (op == BC_ISEQV || op == BC_ISNEV) { + |5: // Either or both types are not numbers. + |.if FFI + | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd + | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd + |.endif + | cmp RA, RD + | je <1 // Same GCobjs or pvalues? + | cmp RBd, ITYPEd + | jne <2 // Not the same type? + | cmp RBd, LJ_TISTABUD + | ja <2 // Different objects and not table/ud? + | + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | cleartp TAB:RA + | mov TAB:RB, TAB:RA->metatable + | test TAB:RB, TAB:RB + | jz <2 // No metatable? + | test byte TAB:RB->nomm, 1<<MM_eq + | jnz <2 // Or 'no __eq' flag set? + if (vk) { + | xor RBd, RBd // ne = 0 + } else { + | mov RBd, 1 // ne = 1 + } + | jmp ->vmeta_equal // Handle __eq metamethod. + } else { + |.if FFI + |3: + | cmp ITYPEd, LJ_TCDATA + if (LJ_DUALNUM && vk) { + | jne <9 + } else { + | jne <2 + } + | jmp ->vmeta_equal_cd + |.endif + } + break; + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | ins_AND // RA = src, RD = str const, JMP with RD = target + | mov RB, [BASE+RA*8] + | add PC, 4 + | checkstr RB, >3 + | cmp RB, [KBASE+RD*8] + iseqne_test: + if (vk) { + | jne >2 + } else { + | je >1 + } + goto iseqne_end; + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | ins_AD // RA = src, RD = num const, JMP with RD = target + | mov RB, [BASE+RA*8] + | add PC, 4 + |.if DUALNUM + | checkint RB, >7 + | mov RD, [KBASE+RD*8] + | checkint RD, >8 + | cmp RBd, RDd + if (vk) { + | jne >9 + } else { + | je >9 + } + | movzx RDd, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | ja >3 + | // RA is a number. + | mov RD, [KBASE+RD*8] + | checkint RD, >1 + | // RA is a number, RD is an integer. + | cvtsi2sd xmm0, RDd + | jmp >2 + | + |8: // RA is an integer, RD is a number. + | cvtsi2sd xmm0, RBd + | movd xmm1, RD + | ucomisd xmm0, xmm1 + | jmp >4 + |1: + | movd xmm0, RD + |.else + | checknum RB, >3 + |1: + | movsd xmm0, qword [KBASE+RD*8] + |.endif + |2: + | ucomisd xmm0, qword [BASE+RA*8] + |4: + goto iseqne_fp; + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target + | mov RB, [BASE+RA*8] + | sar RB, 47 + | add PC, 4 + | cmp RBd, RDd + if (!LJ_HASFFI) goto iseqne_test; + if (vk) { + | jne >3 + | movzx RDd, PC_RD + | branchPC RD + |2: + | ins_next + |3: + | cmp RBd, LJ_TCDATA; jne <2 + | jmp ->vmeta_equal_cd + } else { + | je >2 + | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd + | movzx RDd, PC_RD + | branchPC RD + |2: + | ins_next + } + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | ins_AD // RA = dst or unused, RD = src, JMP with RD = target + | mov ITYPE, [BASE+RD*8] + | add PC, 4 + if (op == BC_ISTC || op == BC_ISFC) { + | mov RB, ITYPE + } + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISTC) { + | jae >1 + } else { + | jb >1 + } + if (op == BC_ISTC || op == BC_ISFC) { + | mov [BASE+RA*8], RB + } + | movzx RDd, PC_RD + | branchPC RD + |1: // Fallthrough to the next instruction. + | ins_next + break; + + case BC_ISTYPE: + | ins_AD // RA = src, RD = -type + | mov RB, [BASE+RA*8] + | sar RB, 47 + | add RBd, RDd + | jne ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | ins_AD // RA = src, RD = -(TISNUM-1) + | checknumtp [BASE+RA*8], ->vmeta_istype + | ins_next + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | ins_AD // RA = dst, RD = src + | mov RB, [BASE+RD*8] + | mov [BASE+RA*8], RB + | ins_next_ + break; + case BC_NOT: + | ins_AD // RA = dst, RD = src + | mov RB, [BASE+RD*8] + | sar RB, 47 + | mov RCd, 2 + | cmp RB, LJ_TISTRUECOND + | sbb RCd, 0 + | shl RC, 47 + | not RC + | mov [BASE+RA*8], RC + | ins_next + break; + case BC_UNM: + | ins_AD // RA = dst, RD = src + | mov RB, [BASE+RD*8] + |.if DUALNUM + | checkint RB, >5 + | neg RBd + | jo >4 + | setint RB + |9: + | mov [BASE+RA*8], RB + | ins_next + |4: + | mov64 RB, U64x(41e00000,00000000) // 2^31. + | jmp <9 + |5: + | ja ->vmeta_unm + |.else + | checknum RB, ->vmeta_unm + |.endif + | mov64 RD, U64x(80000000,00000000) + | xor RB, RD + |.if DUALNUM + | jmp <9 + |.else + | mov [BASE+RA*8], RB + | ins_next + |.endif + break; + case BC_LEN: + | ins_AD // RA = dst, RD = src + | mov RD, [BASE+RD*8] + | checkstr RD, >2 + |.if DUALNUM + | mov RDd, dword STR:RD->len + |1: + | setint RD + | mov [BASE+RA*8], RD + |.else + | xorps xmm0, xmm0 + | cvtsi2sd xmm0, dword STR:RD->len + |1: + | movsd qword [BASE+RA*8], xmm0 + |.endif + | ins_next + |2: + | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len + | mov TAB:CARG1, TAB:RD +#if LJ_52 + | mov TAB:RB, TAB:RD->metatable + | cmp TAB:RB, 0 + | jnz >9 + |3: +#endif + |->BC_LEN_Z: + | mov RB, BASE // Save BASE. + | call extern lj_tab_len // (GCtab *t) + | // Length of table returned in eax (RD). + |.if DUALNUM + | // Nothing to do. + |.else + | cvtsi2sd xmm0, RDd + |.endif + | mov BASE, RB // Restore BASE. + | movzx RAd, PC_RA + | jmp <1 +#if LJ_52 + |9: // Check for __len. + | test byte TAB:RB->nomm, 1<<MM_len + | jnz <3 + | jmp ->vmeta_len // 'no __len' flag NOT set: check. +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithpre, sseins, ssereg + | ins_ABC + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | checknumtp [BASE+RB*8], ->vmeta_arith_vn + | .if DUALNUM + | checknumtp [KBASE+RC*8], ->vmeta_arith_vn + | .endif + | movsd xmm0, qword [BASE+RB*8] + | sseins ssereg, qword [KBASE+RC*8] + || break; + ||case 1: + | checknumtp [BASE+RB*8], ->vmeta_arith_nv + | .if DUALNUM + | checknumtp [KBASE+RC*8], ->vmeta_arith_nv + | .endif + | movsd xmm0, qword [KBASE+RC*8] + | sseins ssereg, qword [BASE+RB*8] + || break; + ||default: + | checknumtp [BASE+RB*8], ->vmeta_arith_vv + | checknumtp [BASE+RC*8], ->vmeta_arith_vv + | movsd xmm0, qword [BASE+RB*8] + | sseins ssereg, qword [BASE+RC*8] + || break; + ||} + |.endmacro + | + |.macro ins_arithdn, intins + | ins_ABC + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | mov RB, [BASE+RB*8] + | mov RC, [KBASE+RC*8] + | checkint RB, ->vmeta_arith_vno + | checkint RC, ->vmeta_arith_vno + | intins RBd, RCd; jo ->vmeta_arith_vno + || break; + ||case 1: + | mov RB, [BASE+RB*8] + | mov RC, [KBASE+RC*8] + | checkint RB, ->vmeta_arith_nvo + | checkint RC, ->vmeta_arith_nvo + | intins RCd, RBd; jo ->vmeta_arith_nvo + || break; + ||default: + | mov RB, [BASE+RB*8] + | mov RC, [BASE+RC*8] + | checkint RB, ->vmeta_arith_vvo + | checkint RC, ->vmeta_arith_vvo + | intins RBd, RCd; jo ->vmeta_arith_vvo + || break; + ||} + ||if (vk == 1) { + | setint RC + | mov [BASE+RA*8], RC + ||} else { + | setint RB + | mov [BASE+RA*8], RB + ||} + | ins_next + |.endmacro + | + |.macro ins_arithpost + | movsd qword [BASE+RA*8], xmm0 + |.endmacro + | + |.macro ins_arith, sseins + | ins_arithpre sseins, xmm0 + | ins_arithpost + | ins_next + |.endmacro + | + |.macro ins_arith, intins, sseins + |.if DUALNUM + | ins_arithdn intins + |.else + | ins_arith, sseins + |.endif + |.endmacro + + | // RA = dst, RB = src1 or num const, RC = src2 or num const + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arith add, addsd + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arith sub, subsd + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arith imul, mulsd + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arith divsd + break; + case BC_MODVN: + | ins_arithpre movsd, xmm1 + |->BC_MODVN_Z: + | call ->vm_mod + | ins_arithpost + | ins_next + break; + case BC_MODNV: case BC_MODVV: + | ins_arithpre movsd, xmm1 + | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. + break; + case BC_POW: + | ins_arithpre movsd, xmm1 + | mov RB, BASE + | call extern pow + | movzx RAd, PC_RA + | mov BASE, RB + | ins_arithpost + | ins_next + break; + + case BC_CAT: + | ins_ABC // RA = dst, RB = src_start, RC = src_end + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE + | lea CARG2, [BASE+RC*8] + | mov CARG3d, RCd + | sub CARG3d, RBd + |->BC_CAT_Z: + | mov L:RB, L:CARG1 + | mov SAVE_PC, PC + | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // NULL (finished) or TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jnz ->vmeta_binop + | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB]. + | movzx RAd, PC_RA + | mov RC, [BASE+RB*8] + | mov [BASE+RA*8], RC + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | ins_AND // RA = dst, RD = str const (~) + | mov RD, [KBASE+RD*8] + | settp RD, LJ_TSTR + | mov [BASE+RA*8], RD + | ins_next + break; + case BC_KCDATA: + |.if FFI + | ins_AND // RA = dst, RD = cdata const (~) + | mov RD, [KBASE+RD*8] + | settp RD, LJ_TCDATA + | mov [BASE+RA*8], RD + | ins_next + |.endif + break; + case BC_KSHORT: + | ins_AD // RA = dst, RD = signed int16 literal + |.if DUALNUM + | movsx RDd, RDW + | setint RD + | mov [BASE+RA*8], RD + |.else + | movsx RDd, RDW // Sign-extend literal. + | cvtsi2sd xmm0, RDd + | movsd qword [BASE+RA*8], xmm0 + |.endif + | ins_next + break; + case BC_KNUM: + | ins_AD // RA = dst, RD = num const + | movsd xmm0, qword [KBASE+RD*8] + | movsd qword [BASE+RA*8], xmm0 + | ins_next + break; + case BC_KPRI: + | ins_AD // RA = dst, RD = primitive type (~) + | shl RD, 47 + | not RD + | mov [BASE+RA*8], RD + | ins_next + break; + case BC_KNIL: + | ins_AD // RA = dst_start, RD = dst_end + | lea RA, [BASE+RA*8+8] + | lea RD, [BASE+RD*8] + | mov RB, LJ_TNIL + | mov [RA-8], RB // Sets minimum 2 slots. + |1: + | mov [RA], RB + | add RA, 8 + | cmp RA, RD + | jbe <1 + | ins_next + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | ins_AD // RA = dst, RD = upvalue # + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)] + | mov RB, UPVAL:RB->v + | mov RD, [RB] + | mov [BASE+RA*8], RD + | ins_next + break; + case BC_USETV: +#define TV2MARKOFS \ + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) + | ins_AD // RA = upvalue #, RD = src + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + | cmp byte UPVAL:RB->closed, 0 + | mov RB, UPVAL:RB->v + | mov RA, [BASE+RD*8] + | mov [RB], RA + | jz >1 + | // Check barrier for closed upvalue. + | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) + | jnz >2 + |1: + | ins_next + | + |2: // Upvalue is black. Check if new value is collectable and white. + | mov RD, RA + | sar RD, 47 + | sub RDd, LJ_TISGCV + | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) + | jbe <1 + | cleartp GCOBJ:RA + | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) + | jz <1 + | // Crossed a write barrier. Move the barrier forward. + |.if not X64WIN + | mov CARG2, RB + | mov RB, BASE // Save BASE. + |.else + | xchg CARG2, RB // Save BASE (CARG2 == BASE). + |.endif + | lea GL:CARG1, [DISPATCH+GG_DISP2G] + | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | mov BASE, RB // Restore BASE. + | jmp <1 + break; +#undef TV2MARKOFS + case BC_USETS: + | ins_AND // RA = upvalue #, RD = str const (~) + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + | mov STR:RA, [KBASE+RD*8] + | mov RD, UPVAL:RB->v + | settp STR:ITYPE, STR:RA, LJ_TSTR + | mov [RD], STR:ITYPE + | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) + | jnz >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) + | jz <1 + | cmp byte UPVAL:RB->closed, 0 + | jz <1 + | // Crossed a write barrier. Move the barrier forward. + | mov RB, BASE // Save BASE (CARG2 == BASE). + | mov CARG2, RD + | lea GL:CARG1, [DISPATCH+GG_DISP2G] + | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | mov BASE, RB // Restore BASE. + | jmp <1 + break; + case BC_USETN: + | ins_AD // RA = upvalue #, RD = num const + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | movsd xmm0, qword [KBASE+RD*8] + | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + | mov RA, UPVAL:RB->v + | movsd qword [RA], xmm0 + | ins_next + break; + case BC_USETP: + | ins_AD // RA = upvalue #, RD = primitive type (~) + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + | shl RD, 47 + | not RD + | mov RA, UPVAL:RB->v + | mov [RA], RD + | ins_next + break; + case BC_UCLO: + | ins_AD // RA = level, RD = target + | branchPC RD // Do this first to free RD. + | mov L:RB, SAVE_L + | cmp aword L:RB->openupval, 0 + | je >1 + | mov L:RB->base, BASE + | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE + | mov L:CARG1, L:RB // Caveat: CARG1 == RA + | call extern lj_func_closeuv // (lua_State *L, TValue *level) + | mov BASE, L:RB->base + |1: + | ins_next + break; + + case BC_FNEW: + | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG3, [BASE-16] + | cleartp CARG3 + | mov CARG2, [KBASE+RD*8] // Fetch GCproto *. + | mov CARG1, L:RB + | mov SAVE_PC, PC + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call extern lj_func_newL_gc + | // GCfuncL * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RAd, PC_RA + | settp LFUNC:RC, LJ_TFUNC + | mov [BASE+RA*8], LFUNC:RC + | ins_next + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + | ins_AD // RA = dst, RD = hbits|asize + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] + | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] + | mov SAVE_PC, PC + | jae >5 + |1: + | mov CARG3d, RDd + | and RDd, 0x7ff + | shr CARG3d, 11 + | cmp RDd, 0x7ff + | je >3 + |2: + | mov L:CARG1, L:RB + | mov CARG2d, RDd + | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) + | // Table * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RAd, PC_RA + | settp TAB:RC, LJ_TTAB + | mov [BASE+RA*8], TAB:RC + | ins_next + |3: // Turn 0x7ff into 0x801. + | mov RDd, 0x801 + | jmp <2 + |5: + | mov L:CARG1, L:RB + | call extern lj_gc_step_fixtop // (lua_State *L) + | movzx RDd, PC_RD + | jmp <1 + break; + case BC_TDUP: + | ins_AND // RA = dst, RD = table const (~) (holding template table) + | mov L:RB, SAVE_L + | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] + | mov SAVE_PC, PC + | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] + | mov L:RB->base, BASE + | jae >3 + |2: + | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE + | mov L:CARG1, L:RB // Caveat: CARG1 == RA + | call extern lj_tab_dup // (lua_State *L, Table *kt) + | // Table * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RAd, PC_RA + | settp TAB:RC, LJ_TTAB + | mov [BASE+RA*8], TAB:RC + | ins_next + |3: + | mov L:CARG1, L:RB + | call extern lj_gc_step_fixtop // (lua_State *L) + | movzx RDd, PC_RD // Need to reload RD. + | not RD + | jmp <2 + break; + + case BC_GGET: + | ins_AND // RA = dst, RD = str const (~) + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov TAB:RB, LFUNC:RB->env + | mov STR:RC, [KBASE+RD*8] + | jmp ->BC_TGETS_Z + break; + case BC_GSET: + | ins_AND // RA = src, RD = str const (~) + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov TAB:RB, LFUNC:RB->env + | mov STR:RC, [KBASE+RD*8] + | jmp ->BC_TSETS_Z + break; + + case BC_TGETV: + | ins_ABC // RA = dst, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + | mov RC, [BASE+RC*8] + | checktab TAB:RB, ->vmeta_tgetv + | + | // Integer key? + |.if DUALNUM + | checkint RC, >5 + |.else + | // Convert number to int and back and compare. + | checknum RC, >5 + | movd xmm0, RC + | cvttsd2si RCd, xmm0 + | cvtsi2sd xmm1, RCd + | ucomisd xmm0, xmm1 + | jne ->vmeta_tgetv // Generic numeric key? Use fallback. + |.endif + | cmp RCd, TAB:RB->asize // Takes care of unordered, too. + | jae ->vmeta_tgetv // Not in array part? Use fallback. + | shl RCd, 3 + | add RC, TAB:RB->array + | // Get array slot. + | mov ITYPE, [RC] + | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. + | je >2 + |1: + | mov [BASE+RA*8], ITYPE + | ins_next + | + |2: // Check for __index if table value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <1 + | test byte TAB:TMPR->nomm, 1<<MM_index + | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. + | jmp <1 + | + |5: // String key? + | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv + | cleartp STR:RC + | jmp ->BC_TGETS_Z + break; + case BC_TGETS: + | ins_ABC // RA = dst, RB = table, RC = str const (~) + | mov TAB:RB, [BASE+RB*8] + | not RC + | mov STR:RC, [KBASE+RC*8] + | checktab TAB:RB, ->vmeta_tgets + |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * + | mov TMPRd, TAB:RB->hmask + | and TMPRd, STR:RC->sid + | imul TMPRd, #NODE + | add NODE:TMPR, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cmp NODE:TMPR->key, ITYPE + | jne >4 + | // Get node value. + | mov ITYPE, NODE:TMPR->val + | cmp ITYPE, LJ_TNIL + | je >5 // Key found, but nil value? + |2: + | mov [BASE+RA*8], ITYPE + | ins_next + | + |4: // Follow hash chain. + | mov NODE:TMPR, NODE:TMPR->next + | test NODE:TMPR, NODE:TMPR + | jnz <1 + | // End of hash chain: key not found, nil result. + | mov ITYPE, LJ_TNIL + | + |5: // Check for __index if table value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <2 // No metatable: done. + | test byte TAB:TMPR->nomm, 1<<MM_index + | jnz <2 // 'no __index' flag set: done. + | jmp ->vmeta_tgets // Caveat: preserve STR:RC. + break; + case BC_TGETB: + | ins_ABC // RA = dst, RB = table, RC = byte literal + | mov TAB:RB, [BASE+RB*8] + | checktab TAB:RB, ->vmeta_tgetb + | cmp RCd, TAB:RB->asize + | jae ->vmeta_tgetb + | shl RCd, 3 + | add RC, TAB:RB->array + | // Get array slot. + | mov ITYPE, [RC] + | cmp ITYPE, LJ_TNIL + | je >2 + |1: + | mov [BASE+RA*8], ITYPE + | ins_next + | + |2: // Check for __index if table value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <1 + | test byte TAB:TMPR->nomm, 1<<MM_index + | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. + | jmp <1 + break; + case BC_TGETR: + | ins_ABC // RA = dst, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + | cleartp TAB:RB + |.if DUALNUM + | mov RCd, dword [BASE+RC*8] + |.else + | cvttsd2si RCd, qword [BASE+RC*8] + |.endif + | cmp RCd, TAB:RB->asize + | jae ->vmeta_tgetr // Not in array part? Use fallback. + | shl RCd, 3 + | add RC, TAB:RB->array + | // Get array slot. + |->BC_TGETR_Z: + | mov ITYPE, [RC] + |->BC_TGETR2_Z: + | mov [BASE+RA*8], ITYPE + | ins_next + break; + + case BC_TSETV: + | ins_ABC // RA = src, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + | mov RC, [BASE+RC*8] + | checktab TAB:RB, ->vmeta_tsetv + | + | // Integer key? + |.if DUALNUM + | checkint RC, >5 + |.else + | // Convert number to int and back and compare. + | checknum RC, >5 + | movd xmm0, RC + | cvttsd2si RCd, xmm0 + | cvtsi2sd xmm1, RCd + | ucomisd xmm0, xmm1 + | jne ->vmeta_tsetv // Generic numeric key? Use fallback. + |.endif + | cmp RCd, TAB:RB->asize // Takes care of unordered, too. + | jae ->vmeta_tsetv + | shl RCd, 3 + | add RC, TAB:RB->array + | cmp aword [RC], LJ_TNIL + | je >3 // Previous value is nil? + |1: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: // Set array slot. + | mov RB, [BASE+RA*8] + | mov [RC], RB + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <1 + | test byte TAB:TMPR->nomm, 1<<MM_newindex + | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. + | jmp <1 + | + |5: // String key? + | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv + | cleartp STR:RC + | jmp ->BC_TSETS_Z + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR + | jmp <2 + break; + case BC_TSETS: + | ins_ABC // RA = src, RB = table, RC = str const (~) + | mov TAB:RB, [BASE+RB*8] + | not RC + | mov STR:RC, [KBASE+RC*8] + | checktab TAB:RB, ->vmeta_tsets + |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * + | mov TMPRd, TAB:RB->hmask + | and TMPRd, STR:RC->sid + | imul TMPRd, #NODE + | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. + | add NODE:TMPR, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cmp NODE:TMPR->key, ITYPE + | jne >5 + | // Ok, key found. Assumes: offsetof(Node, val) == 0 + | cmp aword [TMPR], LJ_TNIL + | je >4 // Previous value is nil? + |2: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |3: // Set node value. + | mov ITYPE, [BASE+RA*8] + | mov [TMPR], ITYPE + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | mov TAB:ITYPE, TAB:RB->metatable + | test TAB:ITYPE, TAB:ITYPE + | jz <2 + | test byte TAB:ITYPE->nomm, 1<<MM_newindex + | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. + | jmp <2 + | + |5: // Follow hash chain. + | mov NODE:TMPR, NODE:TMPR->next + | test NODE:TMPR, NODE:TMPR + | jnz <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz >6 // No metatable: continue. + | test byte TAB:TMPR->nomm, 1<<MM_newindex + | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | mov TMP1, ITYPE + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE + | lea CARG3, TMP1 + | mov CARG2, TAB:RB + | mov SAVE_PC, PC + | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Handles write barrier for the new key. TValue * returned in eax (RC). + | mov L:CARG1, SAVE_L + | mov BASE, L:CARG1->base + | mov TMPR, rax + | movzx RAd, PC_RA + | jmp <2 // Must check write barrier for value. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, ITYPE + | jmp <3 + break; + case BC_TSETB: + | ins_ABC // RA = src, RB = table, RC = byte literal + | mov TAB:RB, [BASE+RB*8] + | checktab TAB:RB, ->vmeta_tsetb + | cmp RCd, TAB:RB->asize + | jae ->vmeta_tsetb + | shl RCd, 3 + | add RC, TAB:RB->array + | cmp aword [RC], LJ_TNIL + | je >3 // Previous value is nil? + |1: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: // Set array slot. + | mov ITYPE, [BASE+RA*8] + | mov [RC], ITYPE + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <1 + | test byte TAB:TMPR->nomm, 1<<MM_newindex + | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. + | jmp <1 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR + | jmp <2 + break; + case BC_TSETR: + | ins_ABC // RA = src, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + | cleartp TAB:RB + |.if DUALNUM + | mov RC, [BASE+RC*8] + |.else + | cvttsd2si RCd, qword [BASE+RC*8] + |.endif + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: + | cmp RCd, TAB:RB->asize + | jae ->vmeta_tsetr + | shl RCd, 3 + | add RC, TAB:RB->array + | // Set array slot. + |->BC_TSETR_Z: + | mov ITYPE, [BASE+RA*8] + | mov [RC], ITYPE + | ins_next + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR + | jmp <2 + break; + + case BC_TSETM: + | ins_AD // RA = base (table at base-1), RD = num const (start index) + |1: + | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word. + | lea RA, [BASE+RA*8] + | mov TAB:RB, [RA-8] // Guaranteed to be a table. + | cleartp TAB:RB + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: + | mov RDd, MULTRES + | sub RDd, 1 + | jz >4 // Nothing to copy? + | add RDd, TMPRd // Compute needed size. + | cmp RDd, TAB:RB->asize + | ja >5 // Doesn't fit into array part? + | sub RDd, TMPRd + | shl TMPRd, 3 + | add TMPR, TAB:RB->array + |3: // Copy result slots to table. + | mov RB, [RA] + | add RA, 8 + | mov [TMPR], RB + | add TMPR, 8 + | sub RDd, 1 + | jnz <3 + |4: + | ins_next + | + |5: // Need to resize array part. + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG2, TAB:RB + | mov CARG3d, RDd + | mov L:RB, L:CARG1 + | mov SAVE_PC, PC + | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | mov BASE, L:RB->base + | movzx RAd, PC_RA // Restore RA. + | movzx RDd, PC_RD // Restore RD. + | jmp <1 // Retry. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:RB, RD + | jmp <2 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALL: case BC_CALLM: + | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + if (op == BC_CALLM) { + | add NARGS:RDd, MULTRES + } + | mov LFUNC:RB, [BASE+RA*8] + | checkfunc LFUNC:RB, ->vmeta_call_ra + | lea BASE, [BASE+RA*8+16] + | ins_call + break; + + case BC_CALLMT: + | ins_AD // RA = base, RD = extra_nargs + | add NARGS:RDd, MULTRES + | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. + break; + case BC_CALLT: + | ins_AD // RA = base, RD = nargs+1 + | lea RA, [BASE+RA*8+16] + | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. + | mov LFUNC:RB, [RA-16] + | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call + |->BC_CALLT_Z: + | mov PC, [BASE-8] + | test PCd, FRAME_TYPE + | jnz >7 + |1: + | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below. + | mov MULTRES, NARGS:RDd + | sub NARGS:RDd, 1 + | jz >3 + |2: // Move args down. + | mov RB, [RA] + | add RA, 8 + | mov [KBASE], RB + | add KBASE, 8 + | sub NARGS:RDd, 1 + | jnz <2 + | + | mov LFUNC:RB, [BASE-16] + |3: + | cleartp LFUNC:RB + | mov NARGS:RDd, MULTRES + | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? + | ja >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function. + | test PCd, FRAME_TYPE // Lua frame below? + | jnz <4 + | movzx RAd, PC_RA + | neg RA + | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE. + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | jmp <4 + | + |7: // Tailcall from a vararg function. + | sub PC, FRAME_VARG + | test PCd, FRAME_TYPEP + | jnz >8 // Vararg frame below? + | sub BASE, PC // Need to relocate BASE/KBASE down. + | mov KBASE, BASE + | mov PC, [BASE-8] + | jmp <1 + |8: + | add PCd, FRAME_VARG + | jmp <1 + break; + + case BC_ITERC: + | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) + | lea RA, [BASE+RA*8+16] // fb = base+2 + | mov RB, [RA-32] // Copy state. fb[0] = fb[-4]. + | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3]. + | mov [RA], RB + | mov [RA+8], RC + | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5] + | mov [RA-16], LFUNC:RB + | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call. + | checkfunc LFUNC:RB, ->vmeta_call + | mov BASE, RA + | ins_call + break; + + case BC_ITERN: + |.if JIT + | hotloop RBd + |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | mov TAB:RB, [BASE+RA*8-16] + | cleartp TAB:RB + | mov RCd, [BASE+RA*8-8] // Get index from control var. + | mov TMPRd, TAB:RB->asize + | add PC, 4 + | mov ITYPE, TAB:RB->array + |1: // Traverse array part. + | cmp RCd, TMPRd; jae >5 // Index points after array part? + | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4 + |.if not DUALNUM + | cvtsi2sd xmm0, RCd + |.endif + | // Copy array slot to returned value. + | mov RB, [ITYPE+RC*8] + | mov [BASE+RA*8+8], RB + | // Return array index as a numeric key. + |.if DUALNUM + | setint ITYPE, RC + | mov [BASE+RA*8], ITYPE + |.else + | movsd qword [BASE+RA*8], xmm0 + |.endif + | add RCd, 1 + | mov [BASE+RA*8-8], RCd // Update control var. + |2: + | movzx RDd, PC_RD // Get target from ITERL. + | branchPC RD + |3: + | ins_next + | + |4: // Skip holes in array part. + | add RCd, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub RCd, TMPRd + |6: + | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. + | imul ITYPEd, RCd, #NODE + | add NODE:ITYPE, TAB:RB->node + | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7 + | lea TMPRd, [RCd+TMPRd+1] + | // Copy key and value from hash slot. + | mov RB, NODE:ITYPE->key + | mov RC, NODE:ITYPE->val + | mov [BASE+RA*8], RB + | mov [BASE+RA*8+8], RC + | mov [BASE+RA*8-8], TMPRd + | jmp <2 + | + |7: // Skip holes in hash part. + | add RCd, 1 + | jmp <6 + break; + + case BC_ISNEXT: + | ins_AD // RA = base, RD = target (points to ITERN) + | mov CFUNC:RB, [BASE+RA*8-24] + | checkfunc CFUNC:RB, >5 + | checktptp [BASE+RA*8-16], LJ_TTAB, >5 + | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 + | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 + | branchPC RD + | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32) + | mov [BASE+RA*8-8], TMPR // Initialize control var. + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | mov PC_OP, BC_JMP + | branchPC RD + |.if JIT + | cmp byte [PC], BC_ITERN + | jne >6 + |.endif + | mov byte [PC], BC_ITERC + | jmp <1 + |.if JIT + |6: // Unpatch JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | movzx RCd, word [PC+2] + | mov TRACE:RA, [RA+RC*8] + | mov eax, TRACE:RA->startins + | mov al, BC_ITERC + | mov dword [PC], eax + | jmp <1 + |.endif + break; + + case BC_VARG: + | ins_ABC // RA = base, RB = nresults+1, RC = numparams + | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)] + | lea RA, [BASE+RA*8] + | sub TMPR, [BASE-8] + | // Note: TMPR may now be even _above_ BASE if nargs was < numparams. + | test RB, RB + | jz >5 // Copy all varargs? + | lea RB, [RA+RB*8-8] + | cmp TMPR, BASE // No vararg slots? + | jnb >2 + |1: // Copy vararg slots to destination slots. + | mov RC, [TMPR-16] + | add TMPR, 8 + | mov [RA], RC + | add RA, 8 + | cmp RA, RB // All destination slots filled? + | jnb >3 + | cmp TMPR, BASE // No more vararg slots? + | jb <1 + |2: // Fill up remainder with nil. + | mov aword [RA], LJ_TNIL + | add RA, 8 + | cmp RA, RB + | jb <2 + |3: + | ins_next + | + |5: // Copy all varargs. + | mov MULTRES, 1 // MULTRES = 0+1 + | mov RC, BASE + | sub RC, TMPR + | jbe <3 // No vararg slots? + | mov RBd, RCd + | shr RBd, 3 + | add RBd, 1 + | mov MULTRES, RBd // MULTRES = #varargs+1 + | mov L:RB, SAVE_L + | add RC, RA + | cmp RC, L:RB->maxstack + | ja >7 // Need to grow stack? + |6: // Copy all vararg slots. + | mov RC, [TMPR-16] + | add TMPR, 8 + | mov [RA], RC + | add RA, 8 + | cmp TMPR, BASE // No more vararg slots? + | jb <6 + | jmp <3 + | + |7: // Grow stack for varargs. + | mov L:RB->base, BASE + | mov L:RB->top, RA + | mov SAVE_PC, PC + | sub TMPR, BASE // Need delta, because BASE may change. + | mov TMP1hi, TMPRd + | mov CARG2d, MULTRES + | sub CARG2d, 1 + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->base + | movsxd TMPR, TMP1hi + | mov RA, L:RB->top + | add TMPR, BASE + | jmp <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | ins_AD // RA = results, RD = extra_nresults + | add RDd, MULTRES // MULTRES >=1, so RD >=1. + | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. + break; + + case BC_RET: case BC_RET0: case BC_RET1: + | ins_AD // RA = results, RD = nresults+1 + if (op != BC_RET0) { + | shl RAd, 3 + } + |1: + | mov PC, [BASE-8] + | mov MULTRES, RDd // Save nresults+1. + | test PCd, FRAME_TYPE // Check frame type marker. + | jnz >7 // Not returning to a fixarg Lua func? + switch (op) { + case BC_RET: + |->BC_RET_Z: + | mov KBASE, BASE // Use KBASE for result move. + | sub RDd, 1 + | jz >3 + |2: // Move results down. + | mov RB, [KBASE+RA] + | mov [KBASE-16], RB + | add KBASE, 8 + | sub RDd, 1 + | jnz <2 + |3: + | mov RDd, MULTRES // Note: MULTRES may be >255. + | movzx RBd, PC_RB // So cannot compare with RDL! + |5: + | cmp RBd, RDd // More results expected? + | ja >6 + break; + case BC_RET1: + | mov RB, [BASE+RA] + | mov [BASE-16], RB + /* fallthrough */ + case BC_RET0: + |5: + | cmp PC_RB, RDL // More results expected? + | ja >6 + default: + break; + } + | movzx RAd, PC_RA + | neg RA + | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 + | mov LFUNC:KBASE, [BASE-16] + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | ins_next + | + |6: // Fill up results with nil. + if (op == BC_RET) { + | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base. + | add KBASE, 8 + } else { + | mov aword [BASE+RD*8-24], LJ_TNIL + } + | add RD, 1 + | jmp <5 + | + |7: // Non-standard return case. + | lea RB, [PC-FRAME_VARG] + | test RBd, FRAME_TYPEP + | jnz ->vm_return + | // Return from vararg function: relocate BASE down and RA up. + | sub BASE, RB + if (op != BC_RET0) { + | add RA, RB + } + | jmp <1 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, [RA] + |.define FOR_STOP, [RA+8] + |.define FOR_STEP, [RA+16] + |.define FOR_EXT, [RA+24] + + case BC_FORL: + |.if JIT + | hotloop RBd + |.endif + | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + vk = (op == BC_IFORL || op == BC_JFORL); + | ins_AJ // RA = base, RD = target (after end of loop or start of loop) + | lea RA, [BASE+RA*8] + if (LJ_DUALNUM) { + | mov RB, FOR_IDX + | checkint RB, >9 + | mov TMPR, FOR_STOP + if (!vk) { + | checkint TMPR, ->vmeta_for + | mov ITYPE, FOR_STEP + | test ITYPEd, ITYPEd; js >5 + | sar ITYPE, 47; + | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | checkinttp FOR_STOP, ->assert_bad_for_arg_type + | checkinttp FOR_STEP, ->assert_bad_for_arg_type +#endif + | mov ITYPE, FOR_STEP + | test ITYPEd, ITYPEd; js >5 + | add RBd, ITYPEd; jo >1 + | setint RB + | mov FOR_IDX, RB + } + | cmp RBd, TMPRd + | mov FOR_EXT, RB + if (op == BC_FORI) { + | jle >7 + |1: + |6: + | branchPC RD + } else if (op == BC_JFORI) { + | branchPC RD + | movzx RDd, PC_RD + | jle =>BC_JLOOP + |1: + |6: + } else if (op == BC_IFORL) { + | jg >7 + |6: + | branchPC RD + |1: + } else { + | jle =>BC_JLOOP + |1: + |6: + } + |7: + | ins_next + | + |5: // Invert check for negative step. + if (!vk) { + | sar ITYPE, 47; + | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for + } else { + | add RBd, ITYPEd; jo <1 + | setint RB + | mov FOR_IDX, RB + } + | cmp RBd, TMPRd + | mov FOR_EXT, RB + if (op == BC_FORI) { + | jge <7 + } else if (op == BC_JFORI) { + | branchPC RD + | movzx RDd, PC_RD + | jge =>BC_JLOOP + } else if (op == BC_IFORL) { + | jl <7 + } else { + | jge =>BC_JLOOP + } + | jmp <6 + |9: // Fallback to FP variant. + if (!vk) { + | jae ->vmeta_for + } + } else if (!vk) { + | checknumtp FOR_IDX, ->vmeta_for + } + if (!vk) { + | checknumtp FOR_STOP, ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | checknumtp FOR_STOP, ->assert_bad_for_arg_type + | checknumtp FOR_STEP, ->assert_bad_for_arg_type +#endif + } + | mov RB, FOR_STEP + if (!vk) { + | checknum RB, ->vmeta_for + } + | movsd xmm0, qword FOR_IDX + | movsd xmm1, qword FOR_STOP + if (vk) { + | addsd xmm0, qword FOR_STEP + | movsd qword FOR_IDX, xmm0 + | test RB, RB; js >3 + } else { + | jl >3 + } + | ucomisd xmm1, xmm0 + |1: + | movsd qword FOR_EXT, xmm0 + if (op == BC_FORI) { + |.if DUALNUM + | jnb <7 + |.else + | jnb >2 + | branchPC RD + |.endif + } else if (op == BC_JFORI) { + | branchPC RD + | movzx RDd, PC_RD + | jnb =>BC_JLOOP + } else if (op == BC_IFORL) { + |.if DUALNUM + | jb <7 + |.else + | jb >2 + | branchPC RD + |.endif + } else { + | jnb =>BC_JLOOP + } + |.if DUALNUM + | jmp <6 + |.else + |2: + | ins_next + |.endif + | + |3: // Invert comparison if step is negative. + | ucomisd xmm0, xmm1 + | jmp <1 + break; + + case BC_ITERL: + |.if JIT + | hotloop RBd + |.endif + | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | ins_AJ // RA = base, RD = target + | lea RA, [BASE+RA*8] + | mov RB, [RA] + | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | mov [RA-8], RB + | jmp =>BC_JLOOP + } else { + | branchPC RD // Otherwise save control var + branch. + | mov [RA-8], RB + } + |1: + | ins_next + break; + + case BC_LOOP: + | ins_A // RA = base, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop RBd + |.endif + | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. + break; + + case BC_ILOOP: + | ins_A // RA = base, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | ins_AD // RA = base (ignored), RD = traceno + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | mov TRACE:RD, [RA+RD*8] + | mov RD, TRACE:RD->mcode + | mov L:RB, SAVE_L + | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE + | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB + | // Save additional callee-save registers only used in compiled code. + |.if X64WIN + | mov CSAVE_4, r12 + | mov CSAVE_3, r13 + | mov CSAVE_2, r14 + | mov CSAVE_1, r15 + | mov RA, rsp + | sub rsp, 10*16+4*8 + | movdqa [RA-1*16], xmm6 + | movdqa [RA-2*16], xmm7 + | movdqa [RA-3*16], xmm8 + | movdqa [RA-4*16], xmm9 + | movdqa [RA-5*16], xmm10 + | movdqa [RA-6*16], xmm11 + | movdqa [RA-7*16], xmm12 + | movdqa [RA-8*16], xmm13 + | movdqa [RA-9*16], xmm14 + | movdqa [RA-10*16], xmm15 + |.else + | sub rsp, 16 + | mov [rsp+16], r12 + | mov [rsp+8], r13 + |.endif + | jmp RD + |.endif + break; + + case BC_JMP: + | ins_AJ // RA = unused, RD = target + | branchPC RD + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + /* + ** Reminder: A function may be called with func/args above L->maxstack, + ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, + ** too. This means all FUNC* ops (including fast functions) must check + ** for stack overflow _before_ adding more slots! + */ + + case BC_FUNCF: + |.if JIT + | hotcall RBd + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | mov KBASE, [PC-4+PC2PROTO(k)] + | mov L:RB, SAVE_L + | lea RA, [BASE+RA*8] // Top of frame. + | cmp RA, L:RB->maxstack + | ja ->vm_growstack_f + | movzx RAd, byte [PC-4+PC2PROTO(numparams)] + | cmp NARGS:RDd, RAd // Check for missing parameters. + | jbe >3 + |2: + if (op == BC_JFUNCF) { + | movzx RDd, PC_RD + | jmp =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL + | add NARGS:RDd, 1 + | cmp NARGS:RDd, RAd + | jbe <3 + | jmp <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | int3 // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | lea RBd, [NARGS:RD*8+FRAME_VARG+8] + | lea RD, [BASE+NARGS:RD*8+8] + | mov LFUNC:KBASE, [BASE-16] + | mov [RD-8], RB // Store delta + FRAME_VARG. + | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. + | mov L:RB, SAVE_L + | lea RA, [RD+RA*8] + | cmp RA, L:RB->maxstack + | ja ->vm_growstack_v // Need to grow stack. + | mov RA, BASE + | mov BASE, RD + | movzx RBd, byte [PC-4+PC2PROTO(numparams)] + | test RBd, RBd + | jz >2 + | add RA, 8 + |1: // Copy fixarg slots up to new frame. + | add RA, 8 + | cmp RA, BASE + | jnb >3 // Less args than parameters? + | mov KBASE, [RA-16] + | mov [RD], KBASE + | add RD, 8 + | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC). + | sub RBd, 1 + | jnz <1 + |2: + if (op == BC_JFUNCV) { + | movzx RDd, PC_RD + | jmp =>BC_JLOOP + } else { + | mov KBASE, [PC-4+PC2PROTO(k)] + | ins_next + } + | + |3: // Clear missing parameters. + | mov aword [RD], LJ_TNIL + | add RD, 8 + | sub RBd, 1 + | jnz <3 + | jmp <2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 + | mov CFUNC:RB, [BASE-16] + | cleartp CFUNC:RB + | mov KBASE, CFUNC:RB->f + | mov L:RB, SAVE_L + | lea RD, [BASE+NARGS:RD*8-8] + | mov L:RB->base, BASE + | lea RA, [RD+8*LUA_MINSTACK] + | cmp RA, L:RB->maxstack + | mov L:RB->top, RD + if (op == BC_FUNCC) { + | mov CARG1, L:RB // Caveat: CARG1 may be RA. + } else { + | mov CARG2, KBASE + | mov CARG1, L:RB // Caveat: CARG1 may be RA. + } + | ja ->vm_growstack_c // Need to grow stack. + | set_vmstate C + if (op == BC_FUNCC) { + | call KBASE // (lua_State *L) + } else { + | // (lua_State *L, lua_CFunction f) + | call aword [DISPATCH+DISPATCH_GL(wrapf)] + } + | // nresults returned in eax (RD). + | mov BASE, L:RB->base + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | lea RA, [BASE+RD*8] + | neg RA + | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 + | mov PC, [BASE-8] // Fetch PC of caller. + | jmp ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + dasm_growpc(Dst, BC__MAX); + build_subroutines(ctx); + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0x10\n" + "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" + "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" + "\t.align 8\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.quad .Lbegin\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ + "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ +#if LJ_NO_UNWIND + "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ + "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ +#endif + "\t.align 8\n" + ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.quad lj_vm_ffi_call\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.align 8\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND +#if LJ_TARGET_SOLARIS + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); +#else + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); +#endif + fprintf(ctx->fp, + ".Lframe1:\n" + "\t.long .LECIE1-.LSCIE1\n" + ".LSCIE1:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0x10\n" + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.long lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" + "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" + "\t.align 8\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, + ".LSFDE2:\n" + "\t.long .LEFDE2-.LASFDE2\n" + ".LASFDE2:\n" + "\t.long .LASFDE2-.Lframe1\n" + "\t.long .Lbegin-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ + "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ + "\t.align 8\n" + ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); +#if LJ_HASFFI + fprintf(ctx->fp, + ".Lframe2:\n" + "\t.long .LECIE2-.LSCIE2\n" + ".LSCIE2:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0x10\n" + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" + "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" + "\t.align 8\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, + ".LSFDE3:\n" + "\t.long .LEFDE3-.LASFDE3\n" + ".LASFDE3:\n" + "\t.long .LASFDE3-.Lframe2\n" + "\t.long lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.align 8\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); +#endif +#endif + break; +#if !LJ_NO_UNWIND + /* Mental note: never let Apple design an assembler. + ** Or a linker. Or a plastic case. But I digress. + */ + case BUILD_machasm: { +#if LJ_HASFFI + int fcsize = 0; +#endif + int i; + fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); + fprintf(ctx->fp, + "EH_frame1:\n" + "\t.set L$set$x,LECIEX-LSCIEX\n" + "\t.long L$set$x\n" + "LSCIEX:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zPR\\0\"\n" + "\t.byte 0x1\n" + "\t.byte 128-8\n" + "\t.byte 0x10\n" + "\t.byte 6\n" /* augmentation length */ + "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ + "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" + "\t.byte 0x80+0x10\n\t.byte 0x1\n" + "\t.align 3\n" + "LECIEX:\n\n"); + for (i = 0; i < ctx->nsym; i++) { + const char *name = ctx->sym[i].name; + int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; + if (size == 0) continue; +#if LJ_HASFFI + if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } +#endif + fprintf(ctx->fp, + "%s.eh:\n" + "LSFDE%d:\n" + "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" + "\t.long L$set$%d\n" + "LASFDE%d:\n" + "\t.long LASFDE%d-EH_frame1\n" + "\t.long %s-.\n" + "\t.long %d\n" + "\t.byte 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ + "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ + "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ + "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ + "\t.align 3\n" + "LEFDE%d:\n\n", + name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); + } +#if LJ_HASFFI + if (fcsize) { + fprintf(ctx->fp, + "EH_frame2:\n" + "\t.set L$set$y,LECIEY-LSCIEY\n" + "\t.long L$set$y\n" + "LSCIEY:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zR\\0\"\n" + "\t.byte 0x1\n" + "\t.byte 128-8\n" + "\t.byte 0x10\n" + "\t.byte 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" + "\t.byte 0x80+0x10\n\t.byte 0x1\n" + "\t.align 3\n" + "LECIEY:\n\n"); + fprintf(ctx->fp, + "_lj_vm_ffi_call.eh:\n" + "LSFDEY:\n" + "\t.set L$set$yy,LEFDEY-LASFDEY\n" + "\t.long L$set$yy\n" + "LASFDEY:\n" + "\t.long LASFDEY-EH_frame2\n" + "\t.long _lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.byte 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ + "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ + "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ + "\t.align 3\n" + "LEFDEY:\n\n", fcsize); + } +#endif + fprintf(ctx->fp, ".subsections_via_symbols\n"); + } + break; +#endif + default: /* Difficult for other modes. */ + break; + } +} + |