diff --git a/src/deps/src/luajit/doc/extensions.html b/src/deps/src/luajit/doc/extensions.html index eb591d1e7..a4f20841a 100644 --- a/src/deps/src/luajit/doc/extensions.html +++ b/src/deps/src/luajit/doc/extensions.html @@ -426,9 +426,7 @@ the toolchain used to compile LuaJIT: on the C stack. The contents of the C++ exception object pass through unmodified.
-Open a "Visual Studio Command Prompt" (either x86 or x64), cd to the +Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), cd to the directory with the source code and run these commands:
@@ -214,6 +214,9 @@ msvcbuild Check the msvcbuild.bat file for more options. Then follow the installation instructions below. ++For an x64 to ARM64 cross-build run this first: vcvarsall.bat x64_arm64 +
Building with MinGW or Cygwin
Open a command prompt window and make sure the MinGW or Cygwin programs diff --git a/src/deps/src/luajit/src/Makefile b/src/deps/src/luajit/src/Makefile index 5ed1c01ee..d80e45a8a 100644 --- a/src/deps/src/luajit/src/Makefile +++ b/src/deps/src/luajit/src/Makefile @@ -233,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) -TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) +TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) TARGET_LJARCH= x64 else @@ -488,7 +488,11 @@ DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) DASM_DASC= vm_$(DASM_ARCH).dasc GIT= git -GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || : +ifeq (Windows,$(HOST_SYS)$(HOST_MSYS)) + GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) +else + GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || : +endif GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*) BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \ diff --git a/src/deps/src/luajit/src/host/buildvm_peobj.c b/src/deps/src/luajit/src/host/buildvm_peobj.c index 5bca6df8c..7ce3b05ac 100644 --- a/src/deps/src/luajit/src/host/buildvm_peobj.c +++ b/src/deps/src/luajit/src/host/buildvm_peobj.c @@ -9,7 +9,7 @@ #include "buildvm.h" #include "lj_bc.h" -#if LJ_TARGET_X86ORX64 +#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN /* Context for PE object emitter. */ static char *strtab; @@ -93,6 +93,17 @@ typedef struct PEsymaux { #define PEOBJ_RELOC_ADDR32NB 0x03 #define PEOBJ_RELOC_OFS 0 #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ +#define PEOBJ_PDATA_NRELOC 6 +#define PEOBJ_XDATA_SIZE (8*2+4+6*2) +#elif LJ_TARGET_ARM64 +#define PEOBJ_ARCH_TARGET 0xaa64 +#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */ +#define PEOBJ_RELOC_DIR32 0x01 +#define PEOBJ_RELOC_ADDR32NB 0x02 +#define PEOBJ_RELOC_OFS (-4) +#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ +#define PEOBJ_PDATA_NRELOC 4 +#define PEOBJ_XDATA_SIZE (4+24+4 +4+8) #endif /* Section numbers (0-based). */ @@ -100,7 +111,7 @@ enum { PEOBJ_SECT_ABS = -2, PEOBJ_SECT_UNDEF = -1, PEOBJ_SECT_TEXT, -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC PEOBJ_SECT_PDATA, PEOBJ_SECT_XDATA, #elif LJ_TARGET_X86 @@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx) uint32_t sofs; int i, nrsym; union { uint8_t b; uint32_t u; } host_endian; +#ifdef PEOBJ_PDATA_NRELOC + uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs; +#endif sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection); @@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx) /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS; -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); pesect[PEOBJ_SECT_PDATA].ofs = sofs; - sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4); + sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4); pesect[PEOBJ_SECT_PDATA].relocofs = sofs; - sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE; + sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); pesect[PEOBJ_SECT_XDATA].ofs = sofs; - sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */ + sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */ pesect[PEOBJ_SECT_XDATA].relocofs = sofs; sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ @@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx) */ nrsym = ctx->nrelocsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ #endif @@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx) #if LJ_TARGET_X64 { /* Write .pdata section. */ - uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs; uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ PEreloc reloc; pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0; @@ -308,6 +321,86 @@ void emit_peobj(BuildCtx *ctx) reloc.type = PEOBJ_RELOC_ADDR32NB; owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); } +#elif LJ_TARGET_ARM64 + /* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */ + { /* Write .pdata section. */ + uint32_t pdata[4]; + PEreloc reloc; + pdata[0] = 0; + pdata[1] = 0; + pdata[2] = fcofs; + pdata[3] = 4+24+4; + owrite(ctx, &pdata, sizeof(pdata)); + /* Start of .text and start of .xdata. */ + reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + /* Start of vm_ffi_call and start of second part of .xdata. */ + reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } + { /* Write .xdata section. */ + uint32_t u32; + uint8_t *p, uwc[24]; + PEreloc reloc; + +#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2) +#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */ +#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */ +#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3)) +#define CSAVE_REGS(r1,r2,o1) do { \ + int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \ +} while (0) +#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3)) +#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3)) +#define CSAVE_FREGS(r1,r2,o1) do { \ + int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \ +} while (0) +#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */ +#define CODE_NOP 0xe3 +#define CODE_END 0xe4 +#define CEND_ALIGN do { \ + *p++ = CODE_END; \ + while ((p - uwc) & 3) *p++ = CODE_NOP; \ +} while (0) + + /* Unwind codes for .text section with handler. */ + p = uwc; + CSAVE_REGS(19, 28, 176); /* +5*2 */ + CSAVE_FREGS(8, 15, 96); /* +4*2 */ + CSAVE_FPLR(192); /* +1 */ + CALLOC_S(208); /* +1 */ + CEND_ALIGN; /* +1 +3 -> 24 */ + + u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2); + owrite(ctx, &u32, 4); + owrite(ctx, &uwc, 24); + + u32 = 0; /* Handler RVA to be relocated at 4 + 24. */ + owrite(ctx, &u32, 4); + + /* Unwind codes for vm_ffi_call without handler. */ + p = uwc; + CADD_FP(16); /* +2 */ + CSAVE_FPLR(16); /* +1 */ + CSAVE_REGPX(19, -32); /* +2 */ + CEND_ALIGN; /* +1 +2 -> 8 */ + + u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2); + owrite(ctx, &u32, 4); + owrite(ctx, &uwc, 8); + + reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } #elif LJ_TARGET_X86 /* Write .sxdata section. */ for (i = 0; i < nrsym; i++) { @@ -339,7 +432,7 @@ void emit_peobj(BuildCtx *ctx) emit_peobj_sym(ctx, ctx->relocsym[i], 0, PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); -#if LJ_TARGET_X64 +#ifdef PEOBJ_PDATA_NRELOC emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); emit_peobj_sym(ctx, "lj_err_unwind_win", 0, diff --git a/src/deps/src/luajit/src/host/genversion.lua b/src/deps/src/luajit/src/host/genversion.lua index 42b5e6fe9..28f7206c5 100644 --- a/src/deps/src/luajit/src/host/genversion.lua +++ b/src/deps/src/luajit/src/host/genversion.lua @@ -5,9 +5,10 @@ -- Released under the MIT license. See Copyright Notice in luajit.h ---------------------------------------------------------------------------- -local FILE_ROLLING_H = "luajit_rolling.h" -local FILE_RELVER_TXT = "luajit_relver.txt" -local FILE_LUAJIT_H = "luajit.h" +local arg = {...} +local FILE_ROLLING_H = arg[1] or "luajit_rolling.h" +local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt" +local FILE_LUAJIT_H = arg[3] or "luajit.h" local function file_read(file) local fp = assert(io.open(file, "rb"), "run from the wrong directory") diff --git a/src/deps/src/luajit/src/jit/dis_arm64.lua b/src/deps/src/luajit/src/jit/dis_arm64.lua index b10e2fb12..3d199bf26 100644 --- a/src/deps/src/luajit/src/jit/dis_arm64.lua +++ b/src/deps/src/luajit/src/jit/dis_arm64.lua @@ -107,24 +107,20 @@ local map_logsr = { -- Logical, shifted register. [0] = { shift = 29, mask = 3, [0] = { - shift = 21, mask = 7, - [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", - "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + shift = 21, mask = 1, + [0] = "andDNMSg", "bicDNMSg" }, { - shift = 21, mask = 7, - [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", - "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + shift = 21, mask = 1, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg" }, { - shift = 21, mask = 7, - [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", - "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + shift = 21, mask = 1, + [0] = "eorDNMSg", "eonDNMSg" }, { - shift = 21, mask = 7, - [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", - "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + shift = 21, mask = 1, + [0] = "ands|tstD0NMSg", "bicsDNMSg" } }, false -- unallocated @@ -132,24 +128,20 @@ local map_logsr = { -- Logical, shifted register. { shift = 29, mask = 3, [0] = { - shift = 21, mask = 7, - [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", - "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + shift = 21, mask = 1, + [0] = "andDNMSg", "bicDNMSg" }, { - shift = 21, mask = 7, - [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", - "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + shift = 21, mask = 1, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg" }, { - shift = 21, mask = 7, - [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", - "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + shift = 21, mask = 1, + [0] = "eorDNMSg", "eonDNMSg" }, { - shift = 21, mask = 7, - [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", - "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + shift = 21, mask = 1, + [0] = "ands|tstD0NMSg", "bicsDNMSg" } } } @@ -735,7 +727,7 @@ local map_cond = { "hi", "ls", "ge", "lt", "gt", "le", "al", } -local map_shift = { [0] = "lsl", "lsr", "asr", } +local map_shift = { [0] = "lsl", "lsr", "asr", "ror"} local map_extend = { [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", diff --git a/src/deps/src/luajit/src/lib_base.c b/src/deps/src/luajit/src/lib_base.c index b22658b44..cd743e30d 100644 --- a/src/deps/src/luajit/src/lib_base.c +++ b/src/deps/src/luajit/src/lib_base.c @@ -617,7 +617,10 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap) setstrV(L, L->base-LJ_FR2, lj_err_str(L, em)); return FFH_RES(2); } - lj_state_growstack(co, (MSize)(L->top - L->base)); + if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) { + cTValue *msg = --co->top; + lj_err_callermsg(L, strVdata(msg)); + } return FFH_RETRY; } diff --git a/src/deps/src/luajit/src/lib_ffi.c b/src/deps/src/luajit/src/lib_ffi.c index 6dee2e742..ba7831738 100644 --- a/src/deps/src/luajit/src/lib_ffi.c +++ b/src/deps/src/luajit/src/lib_ffi.c @@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.) "\003win" #endif #if LJ_ABI_PAUTH - "\007pauth" + "\005pauth" #endif #if LJ_TARGET_UWP "\003uwp" diff --git a/src/deps/src/luajit/src/lj_api.c b/src/deps/src/luajit/src/lj_api.c index fad6e09cb..689585109 100644 --- a/src/deps/src/luajit/src/lj_api.c +++ b/src/deps/src/luajit/src/lj_api.c @@ -104,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size) if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) { return 0; /* Stack overflow. */ } else if (size > 0) { - lj_state_checkstack(L, (MSize)size); + int avail = (int)(mref(L->maxstack, TValue) - L->top); + if (size > avail && + lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) { + L->top--; + return 0; /* Out of memory. */ + } } return 1; } diff --git a/src/deps/src/luajit/src/lj_arch.h b/src/deps/src/luajit/src/lj_arch.h index 4e50e8b7a..5f31a81b5 100644 --- a/src/deps/src/luajit/src/lj_arch.h +++ b/src/deps/src/luajit/src/lj_arch.h @@ -59,7 +59,7 @@ #define LUAJIT_TARGET LUAJIT_ARCH_X64 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) #define LUAJIT_TARGET LUAJIT_ARCH_ARM -#elif defined(__aarch64__) +#elif defined(__aarch64__) || defined(_M_ARM64) #define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #elif defined(__s390x__) || defined(__s390x) #define LUAJIT_TARGET LUAJIT_ARCH_S390X @@ -70,7 +70,7 @@ #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 #else -#error "No support for this architecture (yet)" +#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #endif #endif @@ -245,7 +245,7 @@ #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ +#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #define LJ_ARCH_VERSION 80 #elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #define LJ_ARCH_VERSION 70 @@ -523,30 +523,45 @@ #elif LJ_TARGET_ARM #if defined(__ARMEB__) #error "No support for big-endian ARM" +#undef LJ_TARGET_ARM #endif #if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ #error "No support for Cortex-M CPUs" +#undef LJ_TARGET_ARM #endif #if !(__ARM_EABI__ || LJ_TARGET_IOS) #error "Only ARM EABI or iOS 3.0+ ABI is supported" +#undef LJ_TARGET_ARM #endif #elif LJ_TARGET_ARM64 #if defined(_ILP32) #error "No support for ILP32 model on ARM64" +#undef LJ_TARGET_ARM64 #endif #elif LJ_TARGET_PPC +#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) +#error "No support for little-endian PPC32" +#undef LJ_TARGET_PPC +#endif +#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) +#error "No support for PPC/e500, use LuaJIT 2.0" +#undef LJ_TARGET_PPC +#endif #elif LJ_TARGET_MIPS32 #if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) #error "Only o32 ABI supported for MIPS32" +#undef LJ_TARGET_MIPS #endif #if LJ_TARGET_MIPSR6 /* Not that useful, since most available r6 CPUs are 64 bit. */ #error "No support for MIPS32R6" +#undef LJ_TARGET_MIPS #endif #elif LJ_TARGET_MIPS64 #if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ #error "Only n64 ABI supported for MIPS64" +#undef LJ_TARGET_MIPS #endif #endif #endif diff --git a/src/deps/src/luajit/src/lj_asm.c b/src/deps/src/luajit/src/lj_asm.c index 86f0872cd..b710ca3f5 100644 --- a/src/deps/src/luajit/src/lj_asm.c +++ b/src/deps/src/luajit/src/lj_asm.c @@ -606,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) IRIns *ir = IR(ref); if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || #if LJ_GC64 +#if LJ_TARGET_ARM64 + (ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) || +#else (ir->o == IR_KINT && k == ir->i) || +#endif (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && k == (intptr_t)ir_kptr(ir)) diff --git a/src/deps/src/luajit/src/lj_asm_arm.h b/src/deps/src/luajit/src/lj_asm_arm.h index b3b1f096b..a003d5cab 100644 --- a/src/deps/src/luajit/src/lj_asm_arm.h +++ b/src/deps/src/luajit/src/lj_asm_arm.h @@ -1990,6 +1990,7 @@ static void asm_prof(ASMState *as, IRIns *ir) static void asm_stack_check(ASMState *as, BCReg topslot, IRIns *irp, RegSet allow, ExitNo exitno) { + int savereg = 0; Reg pbase; uint32_t k; if (irp) { @@ -2000,12 +2001,14 @@ static void asm_stack_check(ASMState *as, BCReg topslot, pbase = rset_pickbot(allow); } else { pbase = RID_RET; - emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ + savereg = 1; } } else { pbase = RID_BASE; } emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); + if (savereg) + emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ k = emit_isk12(0, (int32_t)(8*topslot)); lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, ARMI_CMP^k, RID_TMP); @@ -2017,7 +2020,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, if (ra_hasspill(irp->s)) emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); - if (ra_hasspill(irp->s) && !allow) + if (savereg) emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ emit_loadi(as, RID_TMP, (i & ~4095)); } else { diff --git a/src/deps/src/luajit/src/lj_asm_arm64.h b/src/deps/src/luajit/src/lj_asm_arm64.h index 8ac766238..93bc2fa6f 100644 --- a/src/deps/src/luajit/src/lj_asm_arm64.h +++ b/src/deps/src/luajit/src/lj_asm_arm64.h @@ -84,18 +84,23 @@ static void asm_guardcc(ASMState *as, A64CC cc) emit_cond_branch(as, cc, target); } -/* Emit test and branch instruction to exit for guard. */ -static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) +/* Emit test and branch instruction to exit for guard, if in range. */ +static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) { MCode *target = asm_exitstub_addr(as, as->snapno); MCode *p = as->mcp; + ptrdiff_t delta = target - p; if (LJ_UNLIKELY(p == as->invmcp)) { + if (as->orignins > 1023) return 0; /* Delta might end up too large. */ as->loopinv = 1; - *p = A64I_B | A64F_S26(target-p); - emit_tnb(as, ai^0x01000000u, r, bit, p-1); - return; + *p = A64I_B | A64F_S26(delta); + ai ^= 0x01000000u; + target = p-1; + } else if (LJ_UNLIKELY(delta >= 0x1fff)) { + return 0; } emit_tnb(as, ai, r, bit, target); + return 1; } /* Emit compare and branch instruction to exit for guard. */ @@ -211,16 +216,14 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) { IRIns *ir = IR(ref); + int logical = (ai & 0x1f000000) == 0x0a000000; if (ra_hasreg(ir->r)) { ra_noweak(as, ir->r); return A64F_M(ir->r); } else if (irref_isk(ref)) { - uint32_t m; int64_t k = get_k64val(as, ref); - if ((ai & 0x1f000000) == 0x0a000000) - m = emit_isk13(k, irt_is64(ir->t)); - else - m = emit_isk12(k); + uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) : + emit_isk12(irt_is64(ir->t) ? k : (int32_t)k); if (m) return m; } else if (mayfuse(as, ref)) { @@ -232,7 +235,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); IRIns *irl = IR(ir->op1); if (sh == A64SH_LSL && - irl->o == IR_CONV && + irl->o == IR_CONV && !logical && irl->op2 == ((IRT_I64<
op1, allow); return A64F_M(m) | A64F_SH(sh, shift); } - } else if (ir->o == IR_CONV && + } else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) { + Reg m = ra_alloc1(as, ir->op1, allow); + int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); + return A64F_M(m) | A64F_SH(A64SH_ROR, shift); + } else if (ir->o == IR_CONV && !logical && ir->op2 == ((IRT_I64< op1, allow); return A64F_M(m) | A64F_EX(A64EX_SXTW); @@ -455,6 +462,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) as->cost[gpr] = REGCOST(~0u, ASMREF_L); gpr = REGARG_FIRSTGPR; +#if LJ_HASFFI && LJ_ABI_WIN + if ((ci->flags & CCI_VARARG)) { + fpr = REGARG_LASTFPR+1; + } +#endif for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; IRIns *ir = IR(ref); @@ -465,6 +477,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) "reg %d not free", fpr); /* Must have been evicted. */ ra_leftov(as, fpr, ref); fpr++; +#if LJ_HASFFI && LJ_ABI_WIN + } else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) { + Reg rf = ra_alloc1(as, ref, RSET_FPR); + emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31); +#endif } else { Reg r = ra_alloc1(as, ref, RSET_FPR); int32_t al = spalign; @@ -570,8 +587,6 @@ static void asm_retf(ASMState *as, IRIns *ir) as->topslot -= (BCReg)delta; if ((int32_t)as->topslot < 0) as->topslot = 0; irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ - /* Need to force a spill on REF_BASE now to update the stack slot. */ - emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); @@ -695,25 +710,22 @@ static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; - Reg dest = 0, tmp; - int destused = ra_used(ir); + Reg tmp; int32_t ofs = 0; ra_evictset(as, RSET_SCRATCH); - if (destused) { + if (ra_used(ir)) { if (ra_hasspill(ir->s)) { ofs = sps_scale(ir->s); - destused = 0; if (ra_hasreg(ir->r)) { ra_free(as, ir->r); ra_modified(as, ir->r); emit_spload(as, ir, ir->r, ofs); } } else { - dest = ra_dest(as, ir, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_FPR); + emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); } } - if (destused) - emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); asm_guardcnb(as, A64I_CBZ, RID_RET); args[0] = ir->op1; /* GCstr *str */ args[1] = ASMREF_TMP1; /* TValue *n */ @@ -804,113 +816,75 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) int destused = ra_used(ir); Reg dest = ra_dest(as, ir, allow); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); - Reg key = 0, tmp = RID_TMP; - Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE; + Reg tmp = RID_TMP, type = RID_NONE, key, tkey; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); - int isk = irref_isk(ir->op2); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t k = 0; uint32_t khash; - MCLabel l_end, l_loop, l_next; + MCLabel l_end, l_loop; rset_clear(allow, tab); - if (!isk) { - key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - rset_clear(allow, key); - if (!irt_isstr(kt)) { - tmp = ra_scratch(as, allow); - rset_clear(allow, tmp); - } - } else if (irt_isnum(kt)) { - int64_t val = (int64_t)ir_knum(irkey)->u64; - if (!(k = emit_isk12(val))) { - key = ra_allock(as, val, allow); - rset_clear(allow, key); - } - } else if (!irt_ispri(kt)) { - if (!(k = emit_isk12(irkey->i))) { - key = ra_alloc1(as, refkey, allow); - rset_clear(allow, key); - } - } - - /* Allocate constants early. */ - if (irt_isnum(kt)) { - if (!isk) { - tisnum = ra_allock(as, LJ_TISNUM << 15, allow); - ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); - rset_clear(allow, tisnum); - } - } else if (irt_isaddr(kt)) { - if (isk) { - int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; - scr = ra_allock(as, kk, allow); + /* Allocate register for tkey outside of the loop. */ + if (isk) { + int64_t kk; + if (irt_isaddr(kt)) { + kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else if (irt_isnum(kt)) { + kk = (int64_t)ir_knum(irkey)->u64; + /* Assumes -0.0 is already canonicalized to +0.0. */ } else { - scr = ra_scratch(as, allow); + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + kk = ~((int64_t)~irt_toitype(kt) << 47); } - rset_clear(allow, scr); + k = emit_isk12(kk); + tkey = k ? 0 : ra_allock(as, kk, allow); } else { - lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); - type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow); - scr = ra_scratch(as, rset_clear(allow, type)); - rset_clear(allow, scr); + tkey = ra_scratch(as, allow); } /* Key not found in chain: jump to exit (if merged) or load niltv. */ l_end = emit_label(as); as->invmcp = NULL; - if (merge == IR_NE) + if (merge == IR_NE) { asm_guardcc(as, CC_AL); - else if (destused) - emit_loada(as, dest, niltvg(J2G(as->J))); + } else if (destused) { + uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val)); + lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)"); + emit_dn(as, A64I_ADDx^k12, dest, RID_GL); + } /* Follow hash chain until the end. */ l_loop = --as->mcp; - emit_n(as, A64I_CMPx^A64I_K12^0, dest); - emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); - l_next = emit_label(as); + if (destused) + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); /* Type and value comparison. */ if (merge == IR_EQ) asm_guardcc(as, CC_EQ); else emit_cond_branch(as, CC_EQ, l_end); + emit_nm(as, A64I_CMPx^k, tmp, tkey); + if (!destused) + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key)); + *l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest; - if (irt_isnum(kt)) { - if (isk) { - /* Assumes -0.0 is already canonicalized to +0.0. */ - if (k) - emit_n(as, A64I_CMPx^k, tmp); - else - emit_nm(as, A64I_CMPx, key, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + /* Construct tkey as canonicalized or tagged key. */ + if (!isk) { + if (irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey); + /* A64I_FMOV_R_D from key to tkey done below. */ } else { - emit_nm(as, A64I_FCMPd, key, ftmp); - emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); - emit_cond_branch(as, CC_LO, l_next); - emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); + lj_assertA(irt_isaddr(kt), "bad HREF key type"); + key = ra_alloc1(as, refkey, allow); + type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key)); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type); } - } else if (irt_isaddr(kt)) { - if (isk) { - emit_nm(as, A64I_CMPx, scr, tmp); - emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); - } else { - emit_nm(as, A64I_CMPx, tmp, scr); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); - } - } else { - emit_nm(as, A64I_CMPx, scr, type); - emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); } - *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; - if (!isk && irt_isaddr(kt)) { - type = ra_allock(as, (int32_t)irt_toitype(kt), allow); - emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); - rset_clear(allow, type); - } /* Load main position relative to tab->node into dest. */ khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { @@ -924,7 +898,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_dnm(as, A64I_ANDw, dest, dest, tmphash); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); } else if (irt_isstr(kt)) { - /* Fetch of str->sid is cheaper than ra_allock. */ emit_dnm(as, A64I_ANDw, dest, dest, tmp); emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); @@ -933,23 +906,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); emit_dnm(as, A64I_SUBw, dest, dest, tmp); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); - emit_dnm(as, A64I_EORw, dest, dest, tmp); - emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); + emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest); emit_dnm(as, A64I_SUBw, tmp, tmp, dest); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); - emit_dnm(as, A64I_EORw, tmp, tmp, dest); if (irt_isnum(kt)) { + emit_dnm(as, A64I_EORw, tmp, tkey, dest); emit_dnm(as, A64I_ADDw, dest, dest, dest); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVw, tmp, dest); - emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey); + emit_nm(as, A64I_FCMPZd, (key & 31), 0); + emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31)); } else { - checkmclim(as); - emit_dm(as, A64I_MOVw, tmp, key); - emit_dnm(as, A64I_EORw, dest, dest, - ra_allock(as, irt_toitype(kt) << 15, allow)); - emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); - emit_dm(as, A64I_MOVx, dest, key); + emit_dnm(as, A64I_EORw, tmp, key, dest); + emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key); } } } @@ -964,7 +932,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int bigofs = !emit_checkofs(A64I_LDRx, kofs); Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key, idx = node; + Reg idx = node; RegSet allow = rset_exclude(RSET_GPR, node); uint64_t k; lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); @@ -983,9 +951,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir) } else { k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); } - key = ra_scratch(as, allow); - emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); - emit_lso(as, A64I_LDRx, key, idx, kofs); + emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow)); + emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs); if (bigofs) emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node)); } @@ -998,18 +965,16 @@ static void asm_uref(ASMState *as, IRIns *ir) MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; emit_lsptr(as, A64I_LDRx, dest, v); } else { - Reg uv = ra_scratch(as, RSET_GPR); - Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { - asm_guardcc(as, CC_NE); - emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); - emit_opk(as, A64I_ADDx, dest, uv, + asm_guardcnb(as, A64I_CBZ, RID_TMP); + emit_opk(as, A64I_ADDx, dest, dest, (int32_t)offsetof(GCupval, tv), RSET_GPR); - emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + emit_lso(as, A64I_LDRB, RID_TMP, dest, + (int32_t)offsetof(GCupval, closed)); } else { - emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v)); } - emit_lso(as, A64I_LDRx, uv, func, + emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR), (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); } } @@ -1135,7 +1100,7 @@ static void asm_xstore(ASMState *as, IRIns *ir) static void asm_ahuvload(ASMState *as, IRIns *ir) { - Reg idx, tmp, type; + Reg idx, tmp; int32_t ofs = 0; RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || @@ -1154,8 +1119,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) } else { tmp = ra_scratch(as, gpr); } - type = ra_scratch(as, rset_clear(gpr, tmp)); - idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx); rset_clear(gpr, idx); if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31); if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; @@ -1167,8 +1131,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), ra_allock(as, LJ_TISNUM << 15, gpr), tmp); } else if (irt_isaddr(ir->t)) { - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp); } else if (irt_isnil(ir->t)) { emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); } else { @@ -1291,9 +1255,8 @@ dotypecheck: emit_nm(as, A64I_CMPx, ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); } else { - Reg type = ra_scratch(as, allow); - emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); - emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp); } emit_lso(as, A64I_LDRx, tmp, base, ofs); return; @@ -1384,7 +1347,6 @@ static void asm_obar(ASMState *as, IRIns *ir) const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; IRRef args[2]; MCLabel l_end; - RegSet allow = RSET_GPR; Reg obj, val, tmp; /* No need for other object barriers (yet). */ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); @@ -1395,14 +1357,13 @@ static void asm_obar(ASMState *as, IRIns *ir) asm_gencall(as, ci, args); emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); obj = IR(ir->op1)->r; - tmp = ra_scratch(as, rset_exclude(allow, obj)); - emit_cond_branch(as, CC_EQ, l_end); - emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); + tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); + emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end); emit_cond_branch(as, CC_EQ, l_end); emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); emit_lso(as, A64I_LDRB, tmp, obj, - (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); } @@ -1444,12 +1405,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) if (irref_isk(lref)) return 1; /* But swap constants to the right. */ ir = IR(rref); - if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || (ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_CONV && ir->op2 == ((IRT_I64< o >= IR_BSHL && ir->o <= IR_BSAR) || + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || (ir->o == IR_ADD && ir->op1 == ir->op2) || (ir->o == IR_CONV && ir->op2 == ((IRT_I64< t)) { /* IR_MULOV */ asm_guardcc(as, CC_NE); emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ - emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); - emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); + emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest); emit_dnm(as, A64I_SMULL, dest, right, left); } else { emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); @@ -1762,16 +1722,15 @@ static void asm_intcomp(ASMState *as, IRIns *ir) if (asm_swapops(as, blref, brref)) { Reg tmp = blref; blref = brref; brref = tmp; } + bleft = ra_alloc1(as, blref, RSET_GPR); if (irref_isk(brref)) { uint64_t k = get_k64val(as, brref); - if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { - asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, - ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); + if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) && + asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft, + emit_ctz64(k))) return; - } m2 = emit_isk13(k, irt_is64(irl->t)); } - bleft = ra_alloc1(as, blref, RSET_GPR); ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); if (!m2) m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); @@ -1846,37 +1805,28 @@ static void asm_prof(ASMState *as, IRIns *ir) static void asm_stack_check(ASMState *as, BCReg topslot, IRIns *irp, RegSet allow, ExitNo exitno) { - Reg pbase; uint32_t k; + Reg pbase = RID_BASE; if (irp) { - if (!ra_hasspill(irp->s)) { - pbase = irp->r; - lj_assertA(ra_hasreg(pbase), "base reg lost"); - } else if (allow) { - pbase = rset_pickbot(allow); - } else { - pbase = RID_RET; - emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ - } - } else { - pbase = RID_BASE; + pbase = irp->r; + if (!ra_hasreg(pbase)) + pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET); } emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); + if (pbase & 0x80) /* Restore temp. register. */ + emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0); k = emit_isk12((8*topslot)); lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, A64I_CMPx^k, RID_TMP); - emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); + emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31)); emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, (int32_t)offsetof(lua_State, maxstack)); - if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - if (ra_hasspill(irp->s)) - emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); - emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); - if (ra_hasspill(irp->s) && !allow) - emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ - } else { - emit_getgl(as, RID_TMP, cur_L); + if (pbase & 0x40) { + emit_getgl(as, (pbase & 31), jit_base); + if (pbase & 0x80) /* Save temp register. */ + emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0); } + emit_getgl(as, RID_TMP, cur_L); } /* Restore Lua stack from on-trace state. */ @@ -1918,7 +1868,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) /* Marker to prevent patching the GC check exit. */ #define ARM64_NOPATCH_GC_CHECK \ - (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP)) + (A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO)) /* Check GC threshold and do one or more GC steps. */ static void asm_gc_check(ASMState *as) @@ -1973,57 +1923,40 @@ static void asm_loop_tail_fixup(ASMState *as) /* -- Head of trace ------------------------------------------------------- */ -/* Reload L register from g->cur_L. */ -static void asm_head_lreg(ASMState *as) -{ - IRIns *ir = IR(ASMREF_L); - if (ra_used(ir)) { - Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, cur_L); - ra_evictk(as); - } -} - /* Coalesce BASE register for a root trace. */ static void asm_head_root_base(ASMState *as) { - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - ra_destreg(as, ir, RID_BASE); + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (r != RID_BASE) + emit_movrr(as, ir, r, RID_BASE); + } } /* Coalesce BASE register for a side trace. */ static Reg asm_head_side_base(ASMState *as, IRIns *irp) { - IRIns *ir; - asm_head_lreg(as); - ir = IR(REF_BASE); - - /* IRRefs that get into the side trace from the parent trace may restore - * REF_BASE under severe register pressure and thus reach here holding on to - * the register. Restore such references so that REF_BASE gets RID_BASE back - * when it tries to allocate below. */ - if (!ra_hasreg(ir->r)) { - Reg r = ra_gethint(ir->r); - if (!rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - } - - if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) - ra_spill(as, ir); - if (ra_hasspill(irp->s)) { - return ra_dest(as, ir, RSET_GPR); - } else { - Reg r = irp->r; - lj_assertA(ra_hasreg(r), "base reg lost"); - if (r != ir->r && !rset_test(as->freeset, r)) - ra_restore(as, regcost_ref(as->cost[r])); - ra_destreg(as, ir, r); - return r; + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { + return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + /* Move from coalesced parent reg. */ + emit_movrr(as, ir, r, irp->r); + return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } } + return RID_NONE; } /* -- Tail of trace ------------------------------------------------------- */ @@ -2075,6 +2008,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots; asm_collectargs(as, ir, ci, args); +#if LJ_ABI_WIN + if ((ci->flags & CCI_VARARG)) nfpr = 0; +#endif for (i = 0; i < nargs; i++) { int al = spalign; if (!args[i]) { @@ -2086,7 +2022,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) #endif } else if (irt_isfp(IR(args[i])->t)) { if (nfpr > 0) { nfpr--; continue; } -#if LJ_TARGET_OSX +#if LJ_ABI_WIN + if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; } +#elif LJ_TARGET_OSX al |= irt_isnum(IR(args[i])->t) ? 7 : 3; #endif } else { diff --git a/src/deps/src/luajit/src/lj_asm_x86.h b/src/deps/src/luajit/src/lj_asm_x86.h index 34e1ad743..7065c37f5 100644 --- a/src/deps/src/luajit/src/lj_asm_x86.h +++ b/src/deps/src/luajit/src/lj_asm_x86.h @@ -140,7 +140,8 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) } } else if (irb->o == IR_ADD && irref_isk(irb->op2)) { /* Fuse base offset (vararg load). */ - as->mrm.ofs = IR(irb->op2)->i; + IRIns *irk = IR(irb->op2); + as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64; return irb->op1; } return ref; /* Otherwise use the given array base. */ diff --git a/src/deps/src/luajit/src/lj_ccall.c b/src/deps/src/luajit/src/lj_ccall.c index b21eea58c..667369b43 100644 --- a/src/deps/src/luajit/src/lj_ccall.c +++ b/src/deps/src/luajit/src/lj_ccall.c @@ -1141,6 +1141,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, fid = ctf->sib; } +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + if ((ct->info & CTF_VARARG)) { + nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */ + ngpr = maxgpr; + nfpr = CCALL_NARG_FPR; + } +#endif + /* Walk through all passed arguments. */ for (o = L->base+1, narg = 1; o < top; o++, narg++) { CTypeID did; @@ -1201,9 +1209,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, align = CTSIZE_PTR-1; nsp = (nsp + align) & ~align; } +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + /* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */ + dp = ((uint8_t *)cc->stack) + (int32_t)nsp; +#else dp = ((uint8_t *)cc->stack) + nsp; +#endif nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR; - if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ + if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */ err_nyi: lj_err_caller(L, LJ_ERR_FFI_NYICALL); } @@ -1314,6 +1327,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #endif } if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ +#if LJ_TARGET_ARM64 && LJ_ABI_WIN + if ((int32_t)nsp < 0) nsp = 0; +#endif #if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) cc->nfpr = nfpr; /* Required for vararg functions. */ diff --git a/src/deps/src/luajit/src/lj_crecord.c b/src/deps/src/luajit/src/lj_crecord.c index d7a522fba..55d0b3ef6 100644 --- a/src/deps/src/luajit/src/lj_crecord.c +++ b/src/deps/src/luajit/src/lj_crecord.c @@ -1118,12 +1118,8 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, ngpr = 1; else if (ctype_cconv(ct->info) == CTCC_FASTCALL) ngpr = 2; -#elif LJ_TARGET_ARM64 -#if LJ_ABI_WIN -#error "NYI: ARM64 Windows ABI calling conventions" -#elif LJ_TARGET_OSX +#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX int ngpr = CCALL_NARG_GPR; -#endif #endif /* Skip initial attributes. */ diff --git a/src/deps/src/luajit/src/lj_def.h b/src/deps/src/luajit/src/lj_def.h index aa161df97..cfef37b97 100644 --- a/src/deps/src/luajit/src/lj_def.h +++ b/src/deps/src/luajit/src/lj_def.h @@ -69,7 +69,7 @@ typedef unsigned int uintptr_t; #define LJ_MAX_UPVAL 120 /* Max. # of upvalues. */ #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ -#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ +#define LJ_STACK_EXTRA (5+3*LJ_FR2) /* Extra stack space (metamethods). */ #if defined(__powerpc64__) && _CALL_ELF != 2 #define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */ @@ -154,15 +154,9 @@ typedef uintptr_t BloomFilter; #define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0) #define lj_ffs(x) ((uint32_t)__builtin_ctz(x)) -/* Don't ask ... */ -#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__)) -static LJ_AINLINE uint32_t lj_fls(uint32_t x) -{ - uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r; -} -#else #define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31)) -#endif +#define lj_ffs64(x) ((uint32_t)__builtin_ctzll(x)) +#define lj_fls64(x) ((uint32_t)(__builtin_clzll(x)^63)) #if defined(__arm__) static LJ_AINLINE uint32_t lj_bswap(uint32_t x) @@ -273,8 +267,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) #else unsigned char _BitScanForward(unsigned long *, unsigned long); unsigned char _BitScanReverse(unsigned long *, unsigned long); +unsigned char _BitScanForward64(unsigned long *, uint64_t); +unsigned char _BitScanReverse64(unsigned long *, uint64_t); #pragma intrinsic(_BitScanForward) #pragma intrinsic(_BitScanReverse) +#pragma intrinsic(_BitScanForward64) +#pragma intrinsic(_BitScanReverse64) static LJ_AINLINE uint32_t lj_ffs(uint32_t x) { @@ -285,6 +283,16 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x) { unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r; } + +static LJ_AINLINE uint32_t lj_ffs64(uint64_t x) +{ + unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r; +} + +static LJ_AINLINE uint32_t lj_fls64(uint64_t x) +{ + unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r; +} #endif unsigned long _byteswap_ulong(unsigned long); diff --git a/src/deps/src/luajit/src/lj_dispatch.c b/src/deps/src/luajit/src/lj_dispatch.c index 57809e627..b9748bba7 100644 --- a/src/deps/src/luajit/src/lj_dispatch.c +++ b/src/deps/src/luajit/src/lj_dispatch.c @@ -453,7 +453,7 @@ static int call_init(lua_State *L, GCfunc *fn) int numparams = pt->numparams; int gotparams = (int)(L->top - L->base); int need = pt->framesize; - if ((pt->flags & PROTO_VARARG)) need += 1+gotparams; + if ((pt->flags & PROTO_VARARG)) need += 1+LJ_FR2+gotparams; lj_state_checkstack(L, (MSize)need); numparams -= gotparams; return numparams >= 0 ? numparams : 0; diff --git a/src/deps/src/luajit/src/lj_emit_arm64.h b/src/deps/src/luajit/src/lj_emit_arm64.h index 6926c71a8..3c5104923 100644 --- a/src/deps/src/luajit/src/lj_emit_arm64.h +++ b/src/deps/src/luajit/src/lj_emit_arm64.h @@ -20,7 +20,7 @@ static uint64_t get_k64val(ASMState *as, IRRef ref) } else { lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, "bad 64 bit const IR op %d", ir->o); - return ir->i; /* Sign-extended. */ + return (uint32_t)ir->i; /* Zero-extended. */ } } @@ -30,39 +30,31 @@ static uint32_t emit_isk12(int64_t n) uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n; uint32_t m = n < 0 ? 0x40000000 : 0; if (k < 0x1000) { - return A64I_K12|m|A64F_U12(k); + return (uint32_t)(A64I_K12|m|A64F_U12(k)); } else if ((k & 0xfff000) == k) { - return A64I_K12|m|0x400000|A64F_U12(k>>12); + return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12)); } return 0; } -#define emit_clz64(n) __builtin_clzll(n) -#define emit_ctz64(n) __builtin_ctzll(n) +#define emit_clz64(n) (lj_fls64(n)^63) +#define emit_ctz64(n) lj_ffs64(n) /* Encode constant in K13 format for logical data processing instructions. */ static uint32_t emit_isk13(uint64_t n, int is64) { - int inv = 0, w = 128, lz, tz; - if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ - if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ - do { /* Find the repeat width. */ - if (is64 && (uint32_t)(n^(n>>32))) break; - n = (uint32_t)n; - if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ - w = 32; if ((n^(n>>16)) & 0xffff) break; - n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; - n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; - n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; - n = n & 0x3; w = 2; - } while (0); - lz = emit_clz64(n); - tz = emit_ctz64(n); - if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ - if (inv) - return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); - else - return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); + /* Thanks to: https://dougallj.wordpress.com/2021/10/30/ */ + int rot, ones, size, immr, imms; + if (!is64) n = ((uint64_t)n << 32) | (uint32_t)n; + if ((n+1u) <= 1u) return 0; /* Neither all-zero nor all-ones are allowed. */ + rot = (n & (n+1u)) ? emit_ctz64(n & (n+1u)) : 64; + n = lj_ror(n, rot & 63); + ones = emit_ctz64(~n); + size = emit_clz64(n) + ones; + if (lj_ror(n, size & 63) != n) return 0; /* Non-repeating? */ + immr = -rot & (size - 1); + imms = (-(size << 1) | (ones - 1)) & 63; + return A64I_K13 | A64F_IMMR(immr | (size & 64)) | A64F_IMMS(imms); } static uint32_t emit_isfpk64(uint64_t n) @@ -121,9 +113,20 @@ static int emit_checkofs(A64Ins ai, int64_t ofs) } } -static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) +static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc) { - int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; + if (ofs >= 0) { + return ai | A64F_U12(ofs>>sc); /* Subsequent lj_ror checks ofs. */ + } else if (ofs >= -256) { + return (ai^A64I_LS_U) | A64F_S9(ofs & 0x1ff); + } else { + return A64F_D(31); /* Will mismatch prev. */ + } +} + +static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64) +{ + int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64; lj_assertA(ot, "load/store offset %d out of range", ofs); /* Combine LDR/STR pairs to LDP/STP. */ if ((sc == 2 || sc == 3) && @@ -132,11 +135,9 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) uint32_t prev = *as->mcp & ~A64F_D(31); int ofsm = ofs - (1< >sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { + if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsm, sc)) { aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); - } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || - prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { + } else if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsp, sc)) { aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); ofsm = ofs; } else { @@ -158,13 +159,12 @@ nopair: /* -- Emit loads/stores --------------------------------------------------- */ /* Prefer rematerialization of BASE/L from global_State over spills. */ -#define emit_canremat(ref) ((ref) <= ASMREF_L) +#define emit_canremat(ref) ((ref) <= REF_BASE) -/* Try to find an N-step delta relative to other consts with N < lim. */ -static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) +/* Try to find a one-step delta relative to other consts. */ +static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64) { RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); - if (lim <= 1) return 0; /* Can't beat that. */ while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); @@ -173,13 +173,14 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : get_k64val(as, ref); int64_t delta = (int64_t)(k - kx); + if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */ if (delta == 0) { - emit_dm(as, A64I_MOVx, rd, r); + emit_dm(as, is64|A64I_MOVw, rd, r); return 1; } else { uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta); if (k12) { - emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); + emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r); return 1; } /* Do other ops or multi-step deltas pay off? Probably not. @@ -192,53 +193,52 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) return 0; /* Failed. */ } -static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) +static void emit_loadk(ASMState *as, Reg rd, uint64_t u64) { - int i, zeros = 0, ones = 0, neg; - if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ - /* Count homogeneous 16 bit fragments. */ - for (i = 0; i < 4; i++) { - uint64_t frag = (u64 >> i*16) & 0xffff; - zeros += (frag == 0); - ones += (frag == 0xffff); + int zeros = 0, ones = 0, neg, lshift = 0; + int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2; + /* Count non-homogeneous 16 bit fragments. */ + while (--i >= 0) { + uint32_t frag = (u64 >> i*16) & 0xffff; + zeros += (frag != 0); + ones += (frag != 0xffff); } - neg = ones > zeros; /* Use MOVN if it pays off. */ - if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ + neg = ones < zeros; /* Use MOVN if it pays off. */ + if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */ uint32_t k13 = emit_isk13(u64, is64); if (k13) { emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); return; } - } - if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { - int shift = 0, lshift = 0; - uint64_t n64 = neg ? ~u64 : u64; - if (n64 != 0) { - /* Find first/last fragment to be filled. */ - shift = (63-emit_clz64(n64)) & ~15; - lshift = emit_ctz64(n64) & ~15; + if (emit_kdelta(as, rd, u64, is64)) { + return; } - /* MOVK requires the original value (u64). */ - while (shift > lshift) { - uint32_t u16 = (u64 >> shift) & 0xffff; - /* Skip fragments that are correctly filled by MOVN/MOVZ. */ - if (u16 != (neg ? 0xffff : 0)) - emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); - shift -= 16; - } - /* But MOVN needs an inverted value (n64). */ - emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | - A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); } + if (neg) { + u64 = ~u64; + if (!is64) u64 = (uint32_t)u64; + } + if (u64) { + /* Find first/last fragment to be filled. */ + int shift = (63-emit_clz64(u64)) & ~15; + lshift = emit_ctz64(u64) & ~15; + for (; shift > lshift; shift -= 16) { + uint32_t frag = (u64 >> shift) & 0xffff; + if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */ + if (neg) frag ^= 0xffff; /* MOVK requires the original value. */ + emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd); + } + } + /* But MOVN needs an inverted value. */ + emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) | + A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); } /* Load a 32 bit constant into a GPR. */ -#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) +#define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i) /* Load a 64 bit constant into a GPR. */ -#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) - -#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) +#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i) #define glofs(as, k) \ ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) @@ -252,19 +252,20 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) { - /* First, check if ip + offset is in range. */ - if ((ai & 0x00400000) && checkmcpofs(as, p)) { + Reg base = RID_GL; + int64_t ofs = glofs(as, p); + if (emit_checkofs(ai, ofs)) { + /* GL + offset, might subsequently fuse to LDP/STP. */ + } else if (ai == A64I_LDRx && checkmcpofs(as, p)) { + /* IP + offset is cheaper than allock, but address must be in range. */ emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); - } else { - Reg base = RID_GL; /* Next, try GL + offset. */ - int64_t ofs = glofs(as, p); - if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ - int64_t i64 = i64ptr(p); - base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); - ofs = i64 & 0x7fffull; - } - emit_lso(as, ai, r, base, ofs); + return; + } else { /* Split up into base reg + offset. */ + int64_t i64 = i64ptr(p); + base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); + ofs = i64 & 0x7fffull; } + emit_lso(as, ai, r, base, ofs); } /* Load 64 bit IR constant into register. */ diff --git a/src/deps/src/luajit/src/lj_err.c b/src/deps/src/luajit/src/lj_err.c index 49cf98574..16f633073 100644 --- a/src/deps/src/luajit/src/lj_err.c +++ b/src/deps/src/luajit/src/lj_err.c @@ -174,12 +174,15 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) case FRAME_PCALL: /* FF pcall() frame. */ case FRAME_PCALLH: /* FF pcall() frame inside hook. */ if (errcode) { + global_State *g; if (errcode == LUA_YIELD) { frame = frame_prevd(frame); break; } + g = G(L); + setgcref(g->cur_L, obj2gco(L)); if (frame_typep(frame) == FRAME_PCALL) - hook_leave(G(L)); + hook_leave(g); L->base = frame_prevd(frame) + 1; L->cframe = cf; unwindstack(L, L->base); @@ -209,11 +212,6 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) ** from 3rd party docs or must be found by trial-and-error. They really ** don't want you to write your own language-specific exception handler ** or to interact gracefully with MSVC. :-( -** -** Apparently MSVC doesn't call C++ destructors for foreign exceptions -** unless you compile your C++ code with /EHa. Unfortunately this means -** catch (...) also catches things like access violations. The use of -** _set_se_translator doesn't really help, because it requires /EHa, too. */ #define WIN32_LEAN_AND_MEAN @@ -261,6 +259,8 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, { #if LJ_TARGET_X86 void *cf = (char *)f - CFRAME_OFS_SEH; +#elif LJ_TARGET_ARM64 + void *cf = (char *)f - CFRAME_SIZE; #else void *cf = f; #endif @@ -268,11 +268,25 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ + if (rec->ExceptionCode == STATUS_LONGJUMP && + rec->ExceptionRecord && + LJ_EXCODE_CHECK(rec->ExceptionRecord->ExceptionCode)) { + errcode = LJ_EXCODE_ERRCODE(rec->ExceptionRecord->ExceptionCode); + if ((rec->ExceptionFlags & 0x20)) { /* EH_TARGET_UNWIND */ + /* Unwinding is about to finish; revert the ExceptionCode so that + ** RtlRestoreContext does not try to restore from a _JUMP_BUFFER. + */ + rec->ExceptionCode = 0; + } + } /* Unwind internal frames. */ err_unwind(L, cf, errcode); } else { void *cf2 = err_unwind(L, cf, 0); if (cf2) { /* We catch it, so start unwinding the upper frames. */ +#if !LJ_TARGET_X86 + EXCEPTION_RECORD rec2; +#endif if (rec->ExceptionCode == LJ_MSVC_EXCODE || rec->ExceptionCode == LJ_GCC_EXCODE) { #if !LJ_TARGET_CYGWIN @@ -295,14 +309,29 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); /* lj_vm_rtlunwind does not return. */ #else + if (LJ_EXCODE_CHECK(rec->ExceptionCode)) { + /* For unwind purposes, wrap the EXCEPTION_RECORD in something that + ** looks like a longjmp, so that MSVC will execute C++ destructors in + ** the frames we unwind over. ExceptionInformation[0] should really + ** contain a _JUMP_BUFFER*, but hopefully nobody is looking too closely + ** at this point. + */ + rec2.ExceptionCode = STATUS_LONGJUMP; + rec2.ExceptionRecord = rec; + rec2.ExceptionAddress = 0; + rec2.NumberParameters = 1; + rec2.ExceptionInformation[0] = (ULONG_PTR)ctx; + rec = &rec2; + } /* Unwind the stack and call all handlers for all lower C frames ** (including ourselves) again with EH_UNWINDING set. Then set - ** stack pointer = cf, result = errcode and jump to the specified target. + ** stack pointer = f, result = errcode and jump to the specified target. */ - RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? - lj_vm_unwind_ff_eh : - lj_vm_unwind_c_eh), - rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); + RtlUnwindEx(f, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? + lj_vm_unwind_ff_eh : + lj_vm_unwind_c_eh), + rec, (void *)(uintptr_t)errcode, dispatch->ContextRecord, + dispatch->HistoryTable); /* RtlUnwindEx should never return. */ #endif } diff --git a/src/deps/src/luajit/src/lj_ffrecord.c b/src/deps/src/luajit/src/lj_ffrecord.c index 7a016a84d..c45e0fde7 100644 --- a/src/deps/src/luajit/src/lj_ffrecord.c +++ b/src/deps/src/luajit/src/lj_ffrecord.c @@ -1131,7 +1131,7 @@ static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg) /* Emit BUFHDR for write to extended string buffer. */ static TRef recff_sbufx_write(jit_State *J, TRef ud) { - TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata))); + TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kintpgc(J, sizeof(GCudata))); return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE); } @@ -1165,20 +1165,19 @@ static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd SBufExt *sbx = bufV(&rd->argv[0]); int iscow = (int)sbufiscow(sbx); TRef trl = recff_sbufx_get_L(J, ud); - TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW)); - TRef zero = lj_ir_kint(J, 0); - emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero); + TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW)); + TRef zeropgc = lj_ir_kintpgc(J, 0); + emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zeropgc); if (iscow) { - trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, - LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) : - lj_ir_kint(J, SBUF_FLAG_COW)); - recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero); - recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero); - recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero); + TRef zerop = lj_ir_kintp(J, 0); + trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW)); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zerop); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zerop); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zerop); recff_sbufx_set_L(J, ud, trl); emitir(IRT(IR_FSTORE, IRT_PGC), - emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero); - recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero); + emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zeropgc); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zerop); } else { TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B); recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb); diff --git a/src/deps/src/luajit/src/lj_ir.h b/src/deps/src/luajit/src/lj_ir.h index 101d10f5a..6cec51dd3 100644 --- a/src/deps/src/luajit/src/lj_ir.h +++ b/src/deps/src/luajit/src/lj_ir.h @@ -76,8 +76,8 @@ \ _(ABS, N , ref, ref) \ _(LDEXP, N , ref, ref) \ - _(MIN, C , ref, ref) \ - _(MAX, C , ref, ref) \ + _(MIN, N , ref, ref) \ + _(MAX, N , ref, ref) \ _(FPMATH, N , ref, lit) \ \ /* Overflow-checking arithmetic ops. */ \ diff --git a/src/deps/src/luajit/src/lj_ircall.h b/src/deps/src/luajit/src/lj_ircall.h index 2993da625..0d4688feb 100644 --- a/src/deps/src/luajit/src/lj_ircall.h +++ b/src/deps/src/luajit/src/lj_ircall.h @@ -63,7 +63,7 @@ typedef struct CCallInfo { /* Helpers for conditional function definitions. */ #define IRCALLCOND_ANY(x) x -#if LJ_TARGET_X86ORX64 +#if LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 #define IRCALLCOND_FPMATH(x) NULL #else #define IRCALLCOND_FPMATH(x) x diff --git a/src/deps/src/luajit/src/lj_iropt.h b/src/deps/src/luajit/src/lj_iropt.h index 458a55118..a71a717b9 100644 --- a/src/deps/src/luajit/src/lj_iropt.h +++ b/src/deps/src/luajit/src/lj_iropt.h @@ -56,6 +56,12 @@ LJ_FUNC TRef lj_ir_ktrace(jit_State *J); #define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k)) #endif +#if LJ_GC64 +#define lj_ir_kintpgc lj_ir_kintp +#else +#define lj_ir_kintpgc lj_ir_kint +#endif + static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) { TValue tv; diff --git a/src/deps/src/luajit/src/lj_mcode.c b/src/deps/src/luajit/src/lj_mcode.c index 6017b7c94..0f87caf75 100644 --- a/src/deps/src/luajit/src/lj_mcode.c +++ b/src/deps/src/luajit/src/lj_mcode.c @@ -29,6 +29,11 @@ #include #endif +#if LJ_TARGET_WINDOWS +#define WIN32_LEAN_AND_MEAN +#include +#endif + #if LJ_TARGET_IOS void sys_icache_invalidate(void *start, size_t len); #endif @@ -41,6 +46,8 @@ void lj_mcode_sync(void *start, void *end) #endif #if LJ_TARGET_X86ORX64 UNUSED(start); UNUSED(end); +#elif LJ_TARGET_WINDOWS + FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start); #elif LJ_TARGET_IOS sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC @@ -58,9 +65,6 @@ void lj_mcode_sync(void *start, void *end) #if LJ_TARGET_WINDOWS -#define WIN32_LEAN_AND_MEAN -#include - #define MCPROT_RW PAGE_READWRITE #define MCPROT_RX PAGE_EXECUTE_READ #define MCPROT_RWX PAGE_EXECUTE_READWRITE @@ -363,7 +367,7 @@ void lj_mcode_limiterr(jit_State *J, size_t need) sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; - if ((size_t)need > sizemcode) + if (need * sizeof(MCode) > sizemcode) lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ if (J->szallmcarea + sizemcode > maxmcode) lj_trace_err(J, LJ_TRERR_MCODEAL); diff --git a/src/deps/src/luajit/src/lj_opt_dce.c b/src/deps/src/luajit/src/lj_opt_dce.c index c6c3e1bc3..e6fcc552c 100644 --- a/src/deps/src/luajit/src/lj_opt_dce.c +++ b/src/deps/src/luajit/src/lj_opt_dce.c @@ -44,12 +44,12 @@ static void dce_propagate(jit_State *J) IRIns *ir = IR(ins); if (irt_ismarked(ir->t)) { irt_clearmark(ir->t); - pchain[ir->o] = &ir->prev; } else if (!ir_sideeff(ir)) { *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */ lj_ir_nop(ir); continue; } + pchain[ir->o] = &ir->prev; if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t); if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t); } diff --git a/src/deps/src/luajit/src/lj_opt_fold.c b/src/deps/src/luajit/src/lj_opt_fold.c index 48effb8ab..743dfb074 100644 --- a/src/deps/src/luajit/src/lj_opt_fold.c +++ b/src/deps/src/luajit/src/lj_opt_fold.c @@ -377,10 +377,10 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; case IR_BSHL: k1 <<= (k2 & 63); break; - case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; - case IR_BSAR: k1 >>= (k2 & 63); break; - case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; - case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; + case IR_BSHR: k1 >>= (k2 & 63); break; + case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break; + case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break; + case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break; default: lj_assertJ(0, "bad IR op %d", op); break; } #else @@ -1972,7 +1972,10 @@ LJFOLD(NE any any) LJFOLDF(comm_equal) { /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */ - if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) + if (fins->op1 == fins->op2 && + (!irt_isnum(fins->t) || + (fleft->o == IR_CONV && /* Converted integers cannot be NaN. */ + (uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8)))) return CONDFOLD(fins->o == IR_EQ); return fold_comm_swap(J); } diff --git a/src/deps/src/luajit/src/lj_record.c b/src/deps/src/luajit/src/lj_record.c index b9d8b7a1e..e69de6f33 100644 --- a/src/deps/src/luajit/src/lj_record.c +++ b/src/deps/src/luajit/src/lj_record.c @@ -1599,10 +1599,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) lj_assertJ(!hasmm, "inconsistent metamethod handling"); if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ TRef key = ix->key; - if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ + if (tref_isinteger(key)) { /* NEWREF needs a TValue as a key. */ key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); - else if (tref_isnumber(key) && tref_isk(key) && tvismzero(&ix->keyv)) - key = lj_ir_knum_zero(J); /* Canonicalize -0.0 to +0.0. */ + } else if (tref_isnum(key)) { + if (tref_isk(key)) { + if (tvismzero(&ix->keyv)) + key = lj_ir_knum_zero(J); /* Canonicalize -0.0 to +0.0. */ + } else { + emitir(IRTG(IR_EQ, IRT_NUM), key, key); /* Check for !NaN. */ + } + } xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); keybarrier = 0; /* NEWREF already takes care of the key barrier. */ #ifdef LUAJIT_ENABLE_TABLE_BUMP @@ -1775,7 +1781,7 @@ noconstify: emitir(IRTG(IR_EQ, IRT_PGC), REF_BASE, emitir(IRT(IR_ADD, IRT_PGC), uref, - lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); + lj_ir_kintpgc(J, (slot - 1 - LJ_FR2) * -8))); slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ if (val == 0) { return getslot(J, slot); @@ -1788,7 +1794,7 @@ noconstify: } emitir(IRTG(IR_UGT, IRT_PGC), emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), - lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); + lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8)); } else { needbarrier = 1; uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); @@ -1966,7 +1972,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8*(1+LJ_FR2))); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, + lj_ir_kintpgc(J, frofs-8*(1+LJ_FR2))); for (i = 0; i < nload; i++) { IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t); @@ -1985,8 +1992,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) TRef tr = TREF_NIL; ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]); if (idx < 0) goto nyivarg; - if (idx != 0 && !tref_isinteger(tridx)) + if (idx != 0 && !tref_isinteger(tridx)) { + if (tref_isstr(tridx)) + tridx = emitir(IRTG(IR_STRTO, IRT_NUM), tridx, 0); tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX); + } if (idx != 0 && tref_isk(tridx)) { emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT), fr, lj_ir_kint(J, frofs+8*(int32_t)idx)); @@ -2014,7 +2024,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) IRType t; TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, - lj_ir_kint(J, frofs-(8< L->base[idx-2-LJ_FR2-nvararg]); aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); tr = lj_record_vload(J, aref, 0, t); diff --git a/src/deps/src/luajit/src/lj_state.c b/src/deps/src/luajit/src/lj_state.c index 25eb4a7be..29ba1b1f6 100644 --- a/src/deps/src/luajit/src/lj_state.c +++ b/src/deps/src/luajit/src/lj_state.c @@ -103,8 +103,17 @@ void lj_state_shrinkstack(lua_State *L, MSize used) void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) { MSize n; - if (L->stacksize > LJ_STACK_MAXEX) /* Overflow while handling overflow? */ - lj_err_throw(L, LUA_ERRERR); + if (L->stacksize >= LJ_STACK_MAXEX) { + /* 4. Throw 'error in error handling' when we are _over_ the limit. */ + if (L->stacksize > LJ_STACK_MAXEX) + lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */ + /* 1. We are _at_ the limit after the last growth. */ + if (L->status < LUA_ERRRUN) { /* 2. Throw 'stack overflow'. */ + L->status = LUA_ERRRUN; /* Prevent ending here again for pushed msg. */ + lj_err_msg(L, LJ_ERR_STKOV); /* May invoke an error handler. */ + } + /* 3. Add space (over the limit) for pushed message and error handler. */ + } n = L->stacksize + need; if (n > LJ_STACK_MAX) { n += 2*LUA_MINSTACK; @@ -114,8 +123,6 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need) n = LJ_STACK_MAX; } resizestack(L, n); - if (L->stacksize >= LJ_STACK_MAXEX) - lj_err_msg(L, LJ_ERR_STKOV); } void LJ_FASTCALL lj_state_growstack1(lua_State *L) @@ -123,6 +130,18 @@ void LJ_FASTCALL lj_state_growstack1(lua_State *L) lj_state_growstack(L, 1); } +static TValue *cpgrowstack(lua_State *co, lua_CFunction dummy, void *ud) +{ + UNUSED(dummy); + lj_state_growstack(co, *(MSize *)ud); + return NULL; +} + +int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need) +{ + return lj_vm_cpcall(L, NULL, &need, cpgrowstack); +} + /* Allocate basic stack for new state. */ static void stack_init(lua_State *L1, lua_State *L) { diff --git a/src/deps/src/luajit/src/lj_state.h b/src/deps/src/luajit/src/lj_state.h index db67f03bf..3850e5a18 100644 --- a/src/deps/src/luajit/src/lj_state.h +++ b/src/deps/src/luajit/src/lj_state.h @@ -18,6 +18,7 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L); LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used); LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need); LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L); +LJ_FUNC int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need); static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) { diff --git a/src/deps/src/luajit/src/lj_target.h b/src/deps/src/luajit/src/lj_target.h index 701cf8732..d00554d4b 100644 --- a/src/deps/src/luajit/src/lj_target.h +++ b/src/deps/src/luajit/src/lj_target.h @@ -58,9 +58,13 @@ typedef uint32_t RegSP; #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 typedef uint64_t RegSet; #define RSET_BITS 6 +#define rset_picktop_(rs) ((Reg)lj_fls64(rs)) +#define rset_pickbot_(rs) ((Reg)lj_ffs64(rs)) #else typedef uint32_t RegSet; #define RSET_BITS 5 +#define rset_picktop_(rs) ((Reg)lj_fls(rs)) +#define rset_pickbot_(rs) ((Reg)lj_ffs(rs)) #endif #define RID2RSET(r) (((RegSet)1) << (r)) @@ -71,13 +75,6 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -#define rset_picktop_(rs) ((Reg)(__builtin_clzll(rs)^63)) -#define rset_pickbot_(rs) ((Reg)__builtin_ctzll(rs)) -#else -#define rset_picktop_(rs) ((Reg)lj_fls(rs)) -#define rset_pickbot_(rs) ((Reg)lj_ffs(rs)) -#endif /* -- Register allocation cost -------------------------------------------- */ diff --git a/src/deps/src/luajit/src/lj_trace.c b/src/deps/src/luajit/src/lj_trace.c index c55222b84..cc647500f 100644 --- a/src/deps/src/luajit/src/lj_trace.c +++ b/src/deps/src/luajit/src/lj_trace.c @@ -621,22 +621,27 @@ static int trace_abort(jit_State *J) J->cur.link = 0; J->cur.linktype = LJ_TRLINK_NONE; lj_vmevent_send(L, TRACE, + cTValue *bot = tvref(L->stack)+LJ_FR2; cTValue *frame; - int size; - BCIns pc; - GCfunc *fn; + const BCIns *pc; + BCPos pos = 0; setstrV(L, L->top++, lj_str_newlit(L, "abort")); setintV(L->top++, traceno); - /* Find original function call to generate a better error message. */ - frame = lj_debug_frame(L, 0, &size); - lj_assertL(frame != NULL, "missing debug frame"); - fn = frame_func(frame); - if (frame == L->base-1 && isluafunc(fn)) - pc = proto_bcpos(funcproto(fn), J->pc); - else - pc = lj_debug_framepc(L, fn, frame); - setfuncV(L, L->top++, fn); - setintV(L->top++, pc); + /* Find original Lua function call to generate a better error message. */ + for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { + if (isluafunc(frame_func(frame))) { + pos = proto_bcpos(funcproto(frame_func(frame)), pc); + break; + } else if (frame_prev(frame) <= bot) { + break; + } else if (frame_iscont(frame)) { + pc = frame_contpc(frame) - 1; + } else { + pc = frame_pc(frame) - 1; + } + } + setfuncV(L, L->top++, frame_func(frame)); + setintV(L->top++, pos); copyTV(L, L->top++, restorestack(L, errobj)); copyTV(L, L->top++, &J->errinfo); ); diff --git a/src/deps/src/luajit/src/msvcbuild.bat b/src/deps/src/luajit/src/msvcbuild.bat index f9bf25289..cd25beee1 100644 --- a/src/deps/src/luajit/src/msvcbuild.bat +++ b/src/deps/src/luajit/src/msvcbuild.bat @@ -27,39 +27,52 @@ @set BUILDTYPE=release @set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c +@setlocal +@call :SETHOSTVARS %LJCOMPILE% host\minilua.c @if errorlevel 1 goto :BAD %LJLINK% /out:minilua.exe minilua.obj @if errorlevel 1 goto :BAD if exist minilua.exe.manifest^ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe +@endlocal -@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64 +@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64 @set LJARCH=x64 @minilua -@if errorlevel 8 goto :X64 +@if errorlevel 8 goto :NO32 @set DASC=vm_x86.dasc -@set DASMFLAGS=-D WIN -D JIT -D FFI +@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU @set LJARCH=x86 @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 +@goto :DA +:NO32 +@if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64 +@set DASC=vm_arm64.dasc +@set DASMTARGET=-D LUAJIT_TARGET=LUAJIT_ARCH_ARM64 +@set LJARCH=arm64 +@goto :DA :X64 -@if "%1" neq "nogc64" goto :GC64 +@if "%1" neq "nogc64" goto :DA @shift @set DASC=vm_x86.dasc @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 -:GC64 +:DA minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% @if errorlevel 1 goto :BAD if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt ) minilua host\genversion.lua -%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c +@setlocal +@call :SETHOSTVARS +%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% host\buildvm*.c @if errorlevel 1 goto :BAD %LJLINK% /out:buildvm.exe buildvm*.obj @if errorlevel 1 goto :BAD if exist buildvm.exe.manifest^ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe +@endlocal buildvm -m peobj -o lj_vm.obj @if errorlevel 1 goto :BAD @@ -118,6 +131,12 @@ if exist luajit.exe.manifest^ @echo. @echo === Successfully built LuaJIT for Windows/%LJARCH% === +@goto :END +:SETHOSTVARS +@if "%VSCMD_ARG_HOST_ARCH%_%VSCMD_ARG_TGT_ARCH%" equ "x64_arm64" ( + call "%VSINSTALLDIR%Common7\Tools\VsDevCmd.bat" -arch=%VSCMD_ARG_HOST_ARCH% -no_logo + echo on +) @goto :END :BAD @echo. diff --git a/src/deps/src/luajit/src/vm_arm64.dasc b/src/deps/src/luajit/src/vm_arm64.dasc index 698b42104..3044a8ac4 100644 --- a/src/deps/src/luajit/src/vm_arm64.dasc +++ b/src/deps/src/luajit/src/vm_arm64.dasc @@ -113,13 +113,37 @@ | |.define TMPDofs, #24 | +|.if WIN +|// Windows unwind data is suited to r1 stored first. +|.macro stp_unwind, r1, r2, where +| stp r1, r2, where +|.endmacro +|.macro ldp_unwind, r1, r2, where +| ldp r1, r2, where +|.endmacro +|.macro ldp_unwind, r1, r2, where, post_index +| ldp r1, r2, where, post_index +|.endmacro +|.else +|// Otherwise store r2 first for compact unwind info (OSX). +|.macro stp_unwind, r1, r2, where +| stp r2, r1, where +|.endmacro +|.macro ldp_unwind, r1, r2, where +| ldp r2, r1, where +|.endmacro +|.macro ldp_unwind, r1, r2, where, post_index +| ldp r2, r1, where, post_index +|.endmacro +|.endif +| |.macro save_, gpr1, gpr2, fpr1, fpr2 -| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] -| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] +| stp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8] +| stp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8] |.endmacro |.macro rest_, gpr1, gpr2, fpr1, fpr2 -| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] -| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] +| ldp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8] +| ldp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8] |.endmacro | |.macro saveregs @@ -127,14 +151,14 @@ | sub sp, sp, # CFRAME_SPACE | stp fp, lr, [sp, # SAVE_FP_LR_] | add fp, sp, # SAVE_FP_LR_ -| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] +| stp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8] | save_ 21, 22, 8, 9 | save_ 23, 24, 10, 11 | save_ 25, 26, 12, 13 | save_ 27, 28, 14, 15 |.endmacro |.macro restoreregs -| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] +| ldp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8] | rest_ 21, 22, 8, 9 | rest_ 23, 24, 10, 11 | rest_ 25, 26, 12, 13 @@ -408,24 +432,24 @@ static void build_subroutines(BuildCtx *ctx) | // (void *cframe, int errcode) | mov sp, CARG1 | mov CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. | ldr L, SAVE_L - | mv_vmstate TMP0w, C | ldr GL, L->glref + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | mv_vmstate TMP0w, C | st_vmstate TMP0w | b ->vm_leave_unw | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) | and sp, CARG1, #CFRAME_RAWMASK - |->vm_unwind_ff_eh: // Landing pad for external unwinder. | ldr L, SAVE_L | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | movn TISNIL, #0 + | ldr GL, L->glref // Setup pointer to global state. + |->vm_unwind_ff_eh: // Landing pad for external unwinder. | mov RC, #16 // 2 results: false + error message. | ldr BASE, L->base - | ldr GL, L->glref // Setup pointer to global state. | mov_false TMP0 | sub RA, BASE, #8 // Results start at BASE-8. | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. @@ -2005,13 +2029,13 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | ldr L, SAVE_L |1: + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | movn TISNIL, #0 | cmn CARG1w, #LUA_ERRERR | bhs >9 // Check for error from exit. - | lsl RC, CARG1, #3 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] - | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 - | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 - | movn TISNIL, #0 + | lsl RC, CARG1, #3 | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | str RCw, SAVE_MULTRES | str BASE, L->base @@ -2162,7 +2186,7 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |// Handler for callback functions. - |// Saveregs already performed. Callback slot number in [sp], g in r12. + |// Saveregs already performed. Callback slot number in w9, g in x10. |->vm_ffi_callback: |.if FFI |.type CTSTATE, CTState, PC @@ -2215,7 +2239,7 @@ static void build_subroutines(BuildCtx *ctx) |.if FFI | .type CCSTATE, CCallState, x19 | sp_auth - | stp x20, CCSTATE, [sp, #-32]! + | stp_unwind CCSTATE, x20, [sp, #-32]! | stp fp, lr, [sp, #16] | add fp, sp, #16 | mov CCSTATE, x0 @@ -2247,7 +2271,7 @@ static void build_subroutines(BuildCtx *ctx) | stp d0, d1, CCSTATE->fpr[0] | stp d2, d3, CCSTATE->fpr[2] | ldp fp, lr, [sp, #16] - | ldp x20, CCSTATE, [sp], #32 + | ldp_unwind CCSTATE, x20, [sp], #32 | ret_auth |.endif |// Note: vm_ffi_call must be the last function in this object file! @@ -3816,9 +3840,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if JIT | // RA = base (ignored), RC = traceno | ldr CARG1, [GL, #GL_J(trace)] - | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. + | st_vmstate wzr // Traces on ARM64 don't store the trace #, so use 0. | ldr TRACE:RC, [CARG1, RC, lsl #3] - | st_vmstate CARG2w |.if PAUTH | ldr RA, TRACE:RC->mcauth |.else @@ -3893,6 +3916,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add TMP2, BASE, RC | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | add RA, RA, RC + | sub CARG1, CARG1, #8 | add TMP0, RC, #16+FRAME_VARG | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. | ldr KBASE, [PC, #-4+PC2PROTO(k)] diff --git a/src/deps/src/luajit/src/vm_mips64.dasc b/src/deps/src/luajit/src/vm_mips64.dasc index 6c215f2bb..ef0d901da 100644 --- a/src/deps/src/luajit/src/vm_mips64.dasc +++ b/src/deps/src/luajit/src/vm_mips64.dasc @@ -5396,6 +5396,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | settp LFUNC:RB, TMP0 | daddu TMP0, RA, RC | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | daddiu TMP2, TMP2, -8 | daddiu TMP3, RC, 16+FRAME_VARG | sltu AT, TMP0, TMP2 | ld KBASE, -4+PC2PROTO(k)(PC) diff --git a/src/deps/src/luajit/t/exdata.t b/src/deps/src/luajit/t/exdata.t index e3dffdf0a..4cd270c3e 100644 --- a/src/deps/src/luajit/t/exdata.t +++ b/src/deps/src/luajit/t/exdata.t @@ -136,7 +136,7 @@ print(tostring(saved_q)) cdata : 0xefdeaddeadbeef cdata : 0xefdeaddeadbeef --- err -[TRACE --- test.lua:8 -- trace too short at thread.exdata] +[TRACE --- test.lua:8 -- trace too short at test.lua:9] @@ -195,7 +195,7 @@ print("get: " .. total) set: 0 get: 10 --- err -[TRACE --- test.lua:14 -- trace too short at thread.exdata] +[TRACE --- test.lua:14 -- trace too short at test.lua:15] [TRACE 1 test.lua:21 loop] diff --git a/src/deps/src/luajit/t/exdata2.t b/src/deps/src/luajit/t/exdata2.t index 1bc583a99..3eed13005 100644 --- a/src/deps/src/luajit/t/exdata2.t +++ b/src/deps/src/luajit/t/exdata2.t @@ -160,7 +160,7 @@ print(tostring(saved_q)) cdata : 0xefdeaddeadbeef cdata : 0xefdeaddeadbeef --- err -[TRACE --- test.lua:8 -- trace too short at thread.exdata2] +[TRACE --- test.lua:8 -- trace too short at test.lua:9] @@ -224,7 +224,7 @@ get: 10 cdata : NULL cdata : 0xefdeaddeadbeef --- err -[TRACE --- test.lua:15 -- trace too short at thread.exdata2] +[TRACE --- test.lua:15 -- trace too short at test.lua:16] [TRACE 1 test.lua:22 loop] diff --git a/src/deps/src/luajit/t/prngstate.t b/src/deps/src/luajit/t/prngstate.t index e1d8a396e..511a2a601 100644 --- a/src/deps/src/luajit/t/prngstate.t +++ b/src/deps/src/luajit/t/prngstate.t @@ -49,7 +49,7 @@ print('ok') ok --- jv --- err eval -qr/trace too short at jit\.prngstate/ +qr/trace too short at test.lua:4/