Merge commit '29737209b138a1485d55c53acf1a6783b6e60167' into dev

This commit is contained in:
Théophile Diot 2023-10-13 09:39:16 +02:00
commit 8093c61613
No known key found for this signature in database
GPG key ID: 248FEA4BAE400D06
38 changed files with 655 additions and 457 deletions

View file

@ -426,9 +426,7 @@ the toolchain used to compile LuaJIT:
on the C stack. The contents of the C++ exception object
pass through unmodified.</li>
<li>Lua errors can be caught on the C++ side with <tt>catch(...)</tt>.
The corresponding Lua error message can be retrieved from the Lua stack.<br>
For MSVC for Windows 64 bit this requires compilation of your C++ code
with <tt>/EHa</tt>.</li>
The corresponding Lua error message can be retrieved from the Lua stack.</li>
<li>Throwing Lua errors across C++ frames is safe. C++ destructors
will be called.</li>
</ul>

View file

@ -203,7 +203,7 @@ Or install Microsoft's Visual Studio (MSVC).
</p>
<h3>Building with MSVC</h3>
<p>
Open a "Visual Studio Command Prompt" (either x86 or x64), <tt>cd</tt> to the
Open a "Visual Studio Command Prompt" (x86, x64 or ARM64), <tt>cd</tt> to the
directory with the source code and run these commands:
</p>
<pre class="code">
@ -214,6 +214,9 @@ msvcbuild
Check the <tt>msvcbuild.bat</tt> file for more options.
Then follow the installation instructions below.
</p>
<p>
For an x64 to ARM64 cross-build run this first: <tt>vcvarsall.bat x64_arm64</tt>
</p>
<h3>Building with MinGW or Cygwin</h3>
<p>
Open a command prompt window and make sure the MinGW or Cygwin programs

View file

@ -233,7 +233,7 @@ TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAG
TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
TARGET_TESTARCH:=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM)
ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= x64
else
@ -488,7 +488,11 @@ DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
DASM_DASC= vm_$(DASM_ARCH).dasc
GIT= git
GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
ifeq (Windows,$(HOST_SYS)$(HOST_MSYS))
GIT_RELVER= if exist ..\.git ( $(GIT) show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
else
GIT_RELVER= [ -e ../.git ] && $(GIT) show -s --format=%ct >luajit_relver.txt 2>/dev/null || cat ../.relver >luajit_relver.txt 2>/dev/null || :
endif
GIT_DEP= $(wildcard ../.git/HEAD ../.git/refs/heads/*)
BUILDVM_O= host/buildvm.o host/buildvm_asm.o host/buildvm_peobj.o \

View file

@ -9,7 +9,7 @@
#include "buildvm.h"
#include "lj_bc.h"
#if LJ_TARGET_X86ORX64
#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
/* Context for PE object emitter. */
static char *strtab;
@ -93,6 +93,17 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_ADDR32NB 0x03
#define PEOBJ_RELOC_OFS 0
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 6
#define PEOBJ_XDATA_SIZE (8*2+4+6*2)
#elif LJ_TARGET_ARM64
#define PEOBJ_ARCH_TARGET 0xaa64
#define PEOBJ_RELOC_REL32 0x03 /* MS: BRANCH26. */
#define PEOBJ_RELOC_DIR32 0x01
#define PEOBJ_RELOC_ADDR32NB 0x02
#define PEOBJ_RELOC_OFS (-4)
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
#define PEOBJ_PDATA_NRELOC 4
#define PEOBJ_XDATA_SIZE (4+24+4 +4+8)
#endif
/* Section numbers (0-based). */
@ -100,7 +111,7 @@ enum {
PEOBJ_SECT_ABS = -2,
PEOBJ_SECT_UNDEF = -1,
PEOBJ_SECT_TEXT,
#if LJ_TARGET_X64
#ifdef PEOBJ_PDATA_NRELOC
PEOBJ_SECT_PDATA,
PEOBJ_SECT_XDATA,
#elif LJ_TARGET_X86
@ -175,6 +186,9 @@ void emit_peobj(BuildCtx *ctx)
uint32_t sofs;
int i, nrsym;
union { uint8_t b; uint32_t u; } host_endian;
#ifdef PEOBJ_PDATA_NRELOC
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
#endif
sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
@ -188,18 +202,18 @@ void emit_peobj(BuildCtx *ctx)
/* Flags: 60 = read+execute, 50 = align16, 20 = code. */
pesect[PEOBJ_SECT_TEXT].flags = PEOBJ_TEXT_FLAGS;
#if LJ_TARGET_X64
#ifdef PEOBJ_PDATA_NRELOC
memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1);
pesect[PEOBJ_SECT_PDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].size = 6*4);
sofs += (pesect[PEOBJ_SECT_PDATA].size = PEOBJ_PDATA_NRELOC*4);
pesect[PEOBJ_SECT_PDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 6) * PEOBJ_RELOC_SIZE;
sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = PEOBJ_PDATA_NRELOC) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
pesect[PEOBJ_SECT_PDATA].flags = 0x40300040;
memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1);
pesect[PEOBJ_SECT_XDATA].ofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4+6*2); /* See below. */
sofs += (pesect[PEOBJ_SECT_XDATA].size = PEOBJ_XDATA_SIZE); /* See below. */
pesect[PEOBJ_SECT_XDATA].relocofs = sofs;
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
@ -234,7 +248,7 @@ void emit_peobj(BuildCtx *ctx)
*/
nrsym = ctx->nrelocsym;
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
#if LJ_TARGET_X64
#ifdef PEOBJ_PDATA_NRELOC
pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
#endif
@ -259,7 +273,6 @@ void emit_peobj(BuildCtx *ctx)
#if LJ_TARGET_X64
{ /* Write .pdata section. */
uint32_t fcofs = (uint32_t)ctx->sym[ctx->nsym-1].ofs;
uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */
PEreloc reloc;
pdata[0] = 0; pdata[1] = fcofs; pdata[2] = 0;
@ -308,6 +321,86 @@ void emit_peobj(BuildCtx *ctx)
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_ARM64
/* https://learn.microsoft.com/en-us/cpp/build/arm64-exception-handling */
{ /* Write .pdata section. */
uint32_t pdata[4];
PEreloc reloc;
pdata[0] = 0;
pdata[1] = 0;
pdata[2] = fcofs;
pdata[3] = 4+24+4;
owrite(ctx, &pdata, sizeof(pdata));
/* Start of .text and start of .xdata. */
reloc.vaddr = 0; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 4; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
/* Start of vm_ffi_call and start of second part of .xdata. */
reloc.vaddr = 8; reloc.symidx = 1+2+nrsym+2+2+1;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
reloc.vaddr = 12; reloc.symidx = 1+2+nrsym+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
{ /* Write .xdata section. */
uint32_t u32;
uint8_t *p, uwc[24];
PEreloc reloc;
#define CBE16(x) (*p = ((x) >> 8) & 0xff, p[1] = (x) & 0xff, p += 2)
#define CALLOC_S(s) (*p++ = ((s) >> 4)) /* s < 512 */
#define CSAVE_FPLR(o) (*p++ = 0x40 | ((o) >> 3)) /* o <= 504 */
#define CSAVE_REGP(r,o) CBE16(0xc800 | (((r) - 19) << 6) | ((o) >> 3))
#define CSAVE_REGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_REGP(r, o); \
} while (0)
#define CSAVE_REGPX(r,o) CBE16(0xcc00 | (((r) - 19) << 6) | (~(o) >> 3))
#define CSAVE_FREGP(r,o) CBE16(0xd800 | (((r) - 8) << 6) | ((o) >> 3))
#define CSAVE_FREGS(r1,r2,o1) do { \
int r, o; for (r = r1, o = o1; r <= r2; r += 2, o -= 16) CSAVE_FREGP(r, o); \
} while (0)
#define CADD_FP(s) CBE16(0xe200 | ((s) >> 3)) /* s < 8*256 */
#define CODE_NOP 0xe3
#define CODE_END 0xe4
#define CEND_ALIGN do { \
*p++ = CODE_END; \
while ((p - uwc) & 3) *p++ = CODE_NOP; \
} while (0)
/* Unwind codes for .text section with handler. */
p = uwc;
CSAVE_REGS(19, 28, 176); /* +5*2 */
CSAVE_FREGS(8, 15, 96); /* +4*2 */
CSAVE_FPLR(192); /* +1 */
CALLOC_S(208); /* +1 */
CEND_ALIGN; /* +1 +3 -> 24 */
u32 = ((24u >> 2) << 27) | (1u << 20) | (fcofs >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 24);
u32 = 0; /* Handler RVA to be relocated at 4 + 24. */
owrite(ctx, &u32, 4);
/* Unwind codes for vm_ffi_call without handler. */
p = uwc;
CADD_FP(16); /* +2 */
CSAVE_FPLR(16); /* +1 */
CSAVE_REGPX(19, -32); /* +2 */
CEND_ALIGN; /* +1 +2 -> 8 */
u32 = ((8u >> 2) << 27) | (((uint32_t)ctx->codesz - fcofs) >> 2);
owrite(ctx, &u32, 4);
owrite(ctx, &uwc, 8);
reloc.vaddr = 4 + 24; reloc.symidx = 1+2+nrsym+2+2;
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
#elif LJ_TARGET_X86
/* Write .sxdata section. */
for (i = 0; i < nrsym; i++) {
@ -339,7 +432,7 @@ void emit_peobj(BuildCtx *ctx)
emit_peobj_sym(ctx, ctx->relocsym[i], 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
#if LJ_TARGET_X64
#ifdef PEOBJ_PDATA_NRELOC
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
emit_peobj_sym(ctx, "lj_err_unwind_win", 0,

View file

@ -5,9 +5,10 @@
-- Released under the MIT license. See Copyright Notice in luajit.h
----------------------------------------------------------------------------
local FILE_ROLLING_H = "luajit_rolling.h"
local FILE_RELVER_TXT = "luajit_relver.txt"
local FILE_LUAJIT_H = "luajit.h"
local arg = {...}
local FILE_ROLLING_H = arg[1] or "luajit_rolling.h"
local FILE_RELVER_TXT = arg[2] or "luajit_relver.txt"
local FILE_LUAJIT_H = arg[3] or "luajit.h"
local function file_read(file)
local fp = assert(io.open(file, "rb"), "run from the wrong directory")

View file

@ -107,24 +107,20 @@ local map_logsr = { -- Logical, shifted register.
[0] = {
shift = 29, mask = 3,
[0] = {
shift = 21, mask = 7,
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
shift = 21, mask = 1,
[0] = "andDNMSg", "bicDNMSg"
},
{
shift = 21, mask = 7,
[0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
shift = 21, mask = 1,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
},
{
shift = 21, mask = 7,
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
shift = 21, mask = 1,
[0] = "eorDNMSg", "eonDNMSg"
},
{
shift = 21, mask = 7,
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
shift = 21, mask = 1,
[0] = "ands|tstD0NMSg", "bicsDNMSg"
}
},
false -- unallocated
@ -132,24 +128,20 @@ local map_logsr = { -- Logical, shifted register.
{
shift = 29, mask = 3,
[0] = {
shift = 21, mask = 7,
[0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
"andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
shift = 21, mask = 1,
[0] = "andDNMSg", "bicDNMSg"
},
{
shift = 21, mask = 7,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
"orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
shift = 21, mask = 1,
[0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
},
{
shift = 21, mask = 7,
[0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
"eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
shift = 21, mask = 1,
[0] = "eorDNMSg", "eonDNMSg"
},
{
shift = 21, mask = 7,
[0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
"ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
shift = 21, mask = 1,
[0] = "ands|tstD0NMSg", "bicsDNMSg"
}
}
}
@ -735,7 +727,7 @@ local map_cond = {
"hi", "ls", "ge", "lt", "gt", "le", "al",
}
local map_shift = { [0] = "lsl", "lsr", "asr", }
local map_shift = { [0] = "lsl", "lsr", "asr", "ror"}
local map_extend = {
[0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",

View file

@ -617,7 +617,10 @@ static int ffh_resume(lua_State *L, lua_State *co, int wrap)
setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
return FFH_RES(2);
}
lj_state_growstack(co, (MSize)(L->top - L->base));
if (lj_state_cpgrowstack(co, (MSize)(L->top - L->base)) != LUA_OK) {
cTValue *msg = --co->top;
lj_err_callermsg(L, strVdata(msg));
}
return FFH_RETRY;
}

View file

@ -746,7 +746,7 @@ LJLIB_CF(ffi_abi) LJLIB_REC(.)
"\003win"
#endif
#if LJ_ABI_PAUTH
"\007pauth"
"\005pauth"
#endif
#if LJ_TARGET_UWP
"\003uwp"

View file

@ -104,7 +104,12 @@ LUA_API int lua_checkstack(lua_State *L, int size)
if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
return 0; /* Stack overflow. */
} else if (size > 0) {
lj_state_checkstack(L, (MSize)size);
int avail = (int)(mref(L->maxstack, TValue) - L->top);
if (size > avail &&
lj_state_cpgrowstack(L, (MSize)(size - avail)) != LUA_OK) {
L->top--;
return 0; /* Out of memory. */
}
}
return 1;
}

View file

@ -59,7 +59,7 @@
#define LUAJIT_TARGET LUAJIT_ARCH_X64
#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
#elif defined(__aarch64__)
#elif defined(__aarch64__) || defined(_M_ARM64)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__s390x__) || defined(__s390x)
#define LUAJIT_TARGET LUAJIT_ARCH_S390X
@ -70,7 +70,7 @@
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#else
#error "No support for this architecture (yet)"
#error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures"
#endif
#endif
@ -245,7 +245,7 @@
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#if __ARM_ARCH >= 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#define LJ_ARCH_VERSION 80
#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
#define LJ_ARCH_VERSION 70
@ -523,30 +523,45 @@
#elif LJ_TARGET_ARM
#if defined(__ARMEB__)
#error "No support for big-endian ARM"
#undef LJ_TARGET_ARM
#endif
#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
#error "No support for Cortex-M CPUs"
#undef LJ_TARGET_ARM
#endif
#if !(__ARM_EABI__ || LJ_TARGET_IOS)
#error "Only ARM EABI or iOS 3.0+ ABI is supported"
#undef LJ_TARGET_ARM
#endif
#elif LJ_TARGET_ARM64
#if defined(_ILP32)
#error "No support for ILP32 model on ARM64"
#undef LJ_TARGET_ARM64
#endif
#elif LJ_TARGET_PPC
#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
#error "No support for little-endian PPC32"
#undef LJ_TARGET_PPC
#endif
#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
#error "No support for PPC/e500, use LuaJIT 2.0"
#undef LJ_TARGET_PPC
#endif
#elif LJ_TARGET_MIPS32
#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
#error "Only o32 ABI supported for MIPS32"
#undef LJ_TARGET_MIPS
#endif
#if LJ_TARGET_MIPSR6
/* Not that useful, since most available r6 CPUs are 64 bit. */
#error "No support for MIPS32R6"
#undef LJ_TARGET_MIPS
#endif
#elif LJ_TARGET_MIPS64
#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
#error "Only n64 ABI supported for MIPS64"
#undef LJ_TARGET_MIPS
#endif
#endif
#endif

View file

@ -606,7 +606,11 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
IRIns *ir = IR(ref);
if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
#if LJ_GC64
#if LJ_TARGET_ARM64
(ir->o == IR_KINT && (uint64_t)k == (uint32_t)ir->i) ||
#else
(ir->o == IR_KINT && k == ir->i) ||
#endif
(ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
k == (intptr_t)ir_kptr(ir))

View file

@ -1990,6 +1990,7 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno)
{
int savereg = 0;
Reg pbase;
uint32_t k;
if (irp) {
@ -2000,12 +2001,14 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
pbase = rset_pickbot(allow);
} else {
pbase = RID_RET;
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
savereg = 1;
}
} else {
pbase = RID_BASE;
}
emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
if (savereg)
emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
k = emit_isk12(0, (int32_t)(8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, ARMI_CMP^k, RID_TMP);
@ -2017,7 +2020,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
if (ra_hasspill(irp->s))
emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
if (ra_hasspill(irp->s) && !allow)
if (savereg)
emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
emit_loadi(as, RID_TMP, (i & ~4095));
} else {

View file

@ -84,18 +84,23 @@ static void asm_guardcc(ASMState *as, A64CC cc)
emit_cond_branch(as, cc, target);
}
/* Emit test and branch instruction to exit for guard. */
static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
/* Emit test and branch instruction to exit for guard, if in range. */
static int asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
{
MCode *target = asm_exitstub_addr(as, as->snapno);
MCode *p = as->mcp;
ptrdiff_t delta = target - p;
if (LJ_UNLIKELY(p == as->invmcp)) {
if (as->orignins > 1023) return 0; /* Delta might end up too large. */
as->loopinv = 1;
*p = A64I_B | A64F_S26(target-p);
emit_tnb(as, ai^0x01000000u, r, bit, p-1);
return;
*p = A64I_B | A64F_S26(delta);
ai ^= 0x01000000u;
target = p-1;
} else if (LJ_UNLIKELY(delta >= 0x1fff)) {
return 0;
}
emit_tnb(as, ai, r, bit, target);
return 1;
}
/* Emit compare and branch instruction to exit for guard. */
@ -211,16 +216,14 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
{
IRIns *ir = IR(ref);
int logical = (ai & 0x1f000000) == 0x0a000000;
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
return A64F_M(ir->r);
} else if (irref_isk(ref)) {
uint32_t m;
int64_t k = get_k64val(as, ref);
if ((ai & 0x1f000000) == 0x0a000000)
m = emit_isk13(k, irt_is64(ir->t));
else
m = emit_isk12(k);
uint32_t m = logical ? emit_isk13(k, irt_is64(ir->t)) :
emit_isk12(irt_is64(ir->t) ? k : (int32_t)k);
if (m)
return m;
} else if (mayfuse(as, ref)) {
@ -232,7 +235,7 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
(IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
IRIns *irl = IR(ir->op1);
if (sh == A64SH_LSL &&
irl->o == IR_CONV &&
irl->o == IR_CONV && !logical &&
irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
shift <= 4 &&
canfuse(as, irl)) {
@ -242,7 +245,11 @@ static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_SH(sh, shift);
}
} else if (ir->o == IR_CONV &&
} else if (ir->o == IR_BROR && logical && irref_isk(ir->op2)) {
Reg m = ra_alloc1(as, ir->op1, allow);
int shift = (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
return A64F_M(m) | A64F_SH(A64SH_ROR, shift);
} else if (ir->o == IR_CONV && !logical &&
ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
Reg m = ra_alloc1(as, ir->op1, allow);
return A64F_M(m) | A64F_EX(A64EX_SXTW);
@ -455,6 +462,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
as->cost[gpr] = REGCOST(~0u, ASMREF_L);
gpr = REGARG_FIRSTGPR;
#if LJ_HASFFI && LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) {
fpr = REGARG_LASTFPR+1;
}
#endif
for (n = 0; n < nargs; n++) { /* Setup args. */
IRRef ref = args[n];
IRIns *ir = IR(ref);
@ -465,6 +477,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
"reg %d not free", fpr); /* Must have been evicted. */
ra_leftov(as, fpr, ref);
fpr++;
#if LJ_HASFFI && LJ_ABI_WIN
} else if ((ci->flags & CCI_VARARG) && (gpr <= REGARG_LASTGPR)) {
Reg rf = ra_alloc1(as, ref, RSET_FPR);
emit_dn(as, A64I_FMOV_R_D, gpr++, rf & 31);
#endif
} else {
Reg r = ra_alloc1(as, ref, RSET_FPR);
int32_t al = spalign;
@ -570,8 +587,6 @@ static void asm_retf(ASMState *as, IRIns *ir)
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
/* Need to force a spill on REF_BASE now to update the stack slot. */
emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
emit_setgl(as, base, jit_base);
emit_addptr(as, base, -8*delta);
asm_guardcc(as, CC_NE);
@ -695,25 +710,22 @@ static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
Reg dest = 0, tmp;
int destused = ra_used(ir);
Reg tmp;
int32_t ofs = 0;
ra_evictset(as, RSET_SCRATCH);
if (destused) {
if (ra_used(ir)) {
if (ra_hasspill(ir->s)) {
ofs = sps_scale(ir->s);
destused = 0;
if (ra_hasreg(ir->r)) {
ra_free(as, ir->r);
ra_modified(as, ir->r);
emit_spload(as, ir, ir->r, ofs);
}
} else {
dest = ra_dest(as, ir, RSET_FPR);
Reg dest = ra_dest(as, ir, RSET_FPR);
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
}
}
if (destused)
emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
asm_guardcnb(as, A64I_CBZ, RID_RET);
args[0] = ir->op1; /* GCstr *str */
args[1] = ASMREF_TMP1; /* TValue *n */
@ -804,113 +816,75 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
int destused = ra_used(ir);
Reg dest = ra_dest(as, ir, allow);
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
Reg key = 0, tmp = RID_TMP;
Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
Reg tmp = RID_TMP, type = RID_NONE, key, tkey;
IRRef refkey = ir->op2;
IRIns *irkey = IR(refkey);
int isk = irref_isk(ir->op2);
int isk = irref_isk(refkey);
IRType1 kt = irkey->t;
uint32_t k = 0;
uint32_t khash;
MCLabel l_end, l_loop, l_next;
MCLabel l_end, l_loop;
rset_clear(allow, tab);
if (!isk) {
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
rset_clear(allow, key);
if (!irt_isstr(kt)) {
tmp = ra_scratch(as, allow);
rset_clear(allow, tmp);
}
} else if (irt_isnum(kt)) {
int64_t val = (int64_t)ir_knum(irkey)->u64;
if (!(k = emit_isk12(val))) {
key = ra_allock(as, val, allow);
rset_clear(allow, key);
}
} else if (!irt_ispri(kt)) {
if (!(k = emit_isk12(irkey->i))) {
key = ra_alloc1(as, refkey, allow);
rset_clear(allow, key);
}
}
/* Allocate constants early. */
if (irt_isnum(kt)) {
if (!isk) {
tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
rset_clear(allow, tisnum);
}
} else if (irt_isaddr(kt)) {
if (isk) {
int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
scr = ra_allock(as, kk, allow);
/* Allocate register for tkey outside of the loop. */
if (isk) {
int64_t kk;
if (irt_isaddr(kt)) {
kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
} else if (irt_isnum(kt)) {
kk = (int64_t)ir_knum(irkey)->u64;
/* Assumes -0.0 is already canonicalized to +0.0. */
} else {
scr = ra_scratch(as, allow);
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
kk = ~((int64_t)~irt_toitype(kt) << 47);
}
rset_clear(allow, scr);
k = emit_isk12(kk);
tkey = k ? 0 : ra_allock(as, kk, allow);
} else {
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
scr = ra_scratch(as, rset_clear(allow, type));
rset_clear(allow, scr);
tkey = ra_scratch(as, allow);
}
/* Key not found in chain: jump to exit (if merged) or load niltv. */
l_end = emit_label(as);
as->invmcp = NULL;
if (merge == IR_NE)
if (merge == IR_NE) {
asm_guardcc(as, CC_AL);
else if (destused)
emit_loada(as, dest, niltvg(J2G(as->J)));
} else if (destused) {
uint32_t k12 = emit_isk12(offsetof(global_State, nilnode.val));
lj_assertA(k12 != 0, "Cannot k12 encode niltv(L)");
emit_dn(as, A64I_ADDx^k12, dest, RID_GL);
}
/* Follow hash chain until the end. */
l_loop = --as->mcp;
emit_n(as, A64I_CMPx^A64I_K12^0, dest);
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
l_next = emit_label(as);
if (destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
/* Type and value comparison. */
if (merge == IR_EQ)
asm_guardcc(as, CC_EQ);
else
emit_cond_branch(as, CC_EQ, l_end);
emit_nm(as, A64I_CMPx^k, tmp, tkey);
if (!destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
*l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
if (irt_isnum(kt)) {
if (isk) {
/* Assumes -0.0 is already canonicalized to +0.0. */
if (k)
emit_n(as, A64I_CMPx^k, tmp);
else
emit_nm(as, A64I_CMPx, key, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
/* Construct tkey as canonicalized or tagged key. */
if (!isk) {
if (irt_isnum(kt)) {
key = ra_alloc1(as, refkey, RSET_FPR);
emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
/* A64I_FMOV_R_D from key to tkey done below. */
} else {
emit_nm(as, A64I_FCMPd, key, ftmp);
emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
emit_cond_branch(as, CC_LO, l_next);
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
lj_assertA(irt_isaddr(kt), "bad HREF key type");
key = ra_alloc1(as, refkey, allow);
type = ra_allock(as, irt_toitype(kt) << 15, rset_clear(allow, key));
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
}
} else if (irt_isaddr(kt)) {
if (isk) {
emit_nm(as, A64I_CMPx, scr, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else {
emit_nm(as, A64I_CMPx, tmp, scr);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
}
} else {
emit_nm(as, A64I_CMPx, scr, type);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
}
*l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
if (!isk && irt_isaddr(kt)) {
type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
rset_clear(allow, type);
}
/* Load main position relative to tab->node into dest. */
khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) {
@ -924,7 +898,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
} else if (irt_isstr(kt)) {
/* Fetch of str->sid is cheaper than ra_allock. */
emit_dnm(as, A64I_ANDw, dest, dest, tmp);
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
@ -933,23 +906,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
emit_dnm(as, A64I_SUBw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
emit_dnm(as, A64I_EORw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
emit_dnm(as, A64I_EORw, tmp, tmp, dest);
if (irt_isnum(kt)) {
emit_dnm(as, A64I_EORw, tmp, tkey, dest);
emit_dnm(as, A64I_ADDw, dest, dest, dest);
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVw, tmp, dest);
emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
emit_nm(as, A64I_FCMPZd, (key & 31), 0);
emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
} else {
checkmclim(as);
emit_dm(as, A64I_MOVw, tmp, key);
emit_dnm(as, A64I_EORw, dest, dest,
ra_allock(as, irt_toitype(kt) << 15, allow));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVx, dest, key);
emit_dnm(as, A64I_EORw, tmp, key, dest);
emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
}
}
}
@ -964,7 +932,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
int bigofs = !emit_checkofs(A64I_LDRx, kofs);
Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
Reg key, idx = node;
Reg idx = node;
RegSet allow = rset_exclude(RSET_GPR, node);
uint64_t k;
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
@ -983,9 +951,8 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
} else {
k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
}
key = ra_scratch(as, allow);
emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
emit_lso(as, A64I_LDRx, key, idx, kofs);
emit_nm(as, A64I_CMPx, RID_TMP, ra_allock(as, k, allow));
emit_lso(as, A64I_LDRx, RID_TMP, idx, kofs);
if (bigofs)
emit_opk(as, A64I_ADDx, dest, node, ofs, rset_exclude(RSET_GPR, node));
}
@ -998,18 +965,16 @@ static void asm_uref(ASMState *as, IRIns *ir)
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
emit_lsptr(as, A64I_LDRx, dest, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guardcc(as, CC_NE);
emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
emit_opk(as, A64I_ADDx, dest, uv,
asm_guardcnb(as, A64I_CBZ, RID_TMP);
emit_opk(as, A64I_ADDx, dest, dest,
(int32_t)offsetof(GCupval, tv), RSET_GPR);
emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
emit_lso(as, A64I_LDRB, RID_TMP, dest,
(int32_t)offsetof(GCupval, closed));
} else {
emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
emit_lso(as, A64I_LDRx, dest, dest, (int32_t)offsetof(GCupval, v));
}
emit_lso(as, A64I_LDRx, uv, func,
emit_lso(as, A64I_LDRx, dest, ra_alloc1(as, ir->op1, RSET_GPR),
(int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
}
}
@ -1135,7 +1100,7 @@ static void asm_xstore(ASMState *as, IRIns *ir)
static void asm_ahuvload(ASMState *as, IRIns *ir)
{
Reg idx, tmp, type;
Reg idx, tmp;
int32_t ofs = 0;
RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
@ -1154,8 +1119,7 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
} else {
tmp = ra_scratch(as, gpr);
}
type = ra_scratch(as, rset_clear(gpr, tmp));
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, tmp), A64I_LDRx);
rset_clear(gpr, idx);
if (ofs & FUSE_REG) rset_clear(gpr, ofs & 31);
if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
@ -1167,8 +1131,8 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
ra_allock(as, LJ_TISNUM << 15, gpr), tmp);
} else if (irt_isaddr(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), RID_TMP);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
} else if (irt_isnil(ir->t)) {
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
} else {
@ -1291,9 +1255,8 @@ dotypecheck:
emit_nm(as, A64I_CMPx,
ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
} else {
Reg type = ra_scratch(as, allow);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), RID_TMP);
emit_dn(as, A64I_ASRx | A64F_IMMR(47), RID_TMP, tmp);
}
emit_lso(as, A64I_LDRx, tmp, base, ofs);
return;
@ -1384,7 +1347,6 @@ static void asm_obar(ASMState *as, IRIns *ir)
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
IRRef args[2];
MCLabel l_end;
RegSet allow = RSET_GPR;
Reg obj, val, tmp;
/* No need for other object barriers (yet). */
lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
@ -1395,14 +1357,13 @@ static void asm_obar(ASMState *as, IRIns *ir)
asm_gencall(as, ci, args);
emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
obj = IR(ir->op1)->r;
tmp = ra_scratch(as, rset_exclude(allow, obj));
emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
emit_tnb(as, A64I_TBZ, tmp, lj_ffs(LJ_GC_BLACK), l_end);
emit_cond_branch(as, CC_EQ, l_end);
emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
emit_lso(as, A64I_LDRB, tmp, obj,
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
}
@ -1444,12 +1405,12 @@ static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
if (irref_isk(lref))
return 1; /* But swap constants to the right. */
ir = IR(rref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 0; /* Don't swap fusable operands to the left. */
ir = IR(lref);
if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
(ir->o == IR_ADD && ir->op1 == ir->op2) ||
(ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
return 1; /* But swap fusable operands to the right. */
@ -1500,8 +1461,7 @@ static void asm_intmul(ASMState *as, IRIns *ir)
if (irt_isguard(ir->t)) { /* IR_MULOV */
asm_guardcc(as, CC_NE);
emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
emit_nm(as, A64I_CMPx | A64F_EX(A64EX_SXTW), dest, dest);
emit_dnm(as, A64I_SMULL, dest, right, left);
} else {
emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
@ -1762,16 +1722,15 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
if (asm_swapops(as, blref, brref)) {
Reg tmp = blref; blref = brref; brref = tmp;
}
bleft = ra_alloc1(as, blref, RSET_GPR);
if (irref_isk(brref)) {
uint64_t k = get_k64val(as, brref);
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE) &&
asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, bleft,
emit_ctz64(k)))
return;
}
m2 = emit_isk13(k, irt_is64(irl->t));
}
bleft = ra_alloc1(as, blref, RSET_GPR);
ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
if (!m2)
m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
@ -1846,37 +1805,28 @@ static void asm_prof(ASMState *as, IRIns *ir)
static void asm_stack_check(ASMState *as, BCReg topslot,
IRIns *irp, RegSet allow, ExitNo exitno)
{
Reg pbase;
uint32_t k;
Reg pbase = RID_BASE;
if (irp) {
if (!ra_hasspill(irp->s)) {
pbase = irp->r;
lj_assertA(ra_hasreg(pbase), "base reg lost");
} else if (allow) {
pbase = rset_pickbot(allow);
} else {
pbase = RID_RET;
emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
}
} else {
pbase = RID_BASE;
pbase = irp->r;
if (!ra_hasreg(pbase))
pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET);
}
emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
if (pbase & 0x80) /* Restore temp. register. */
emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
k = emit_isk12((8*topslot));
lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, A64I_CMPx^k, RID_TMP);
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31));
emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
(int32_t)offsetof(lua_State, maxstack));
if (irp) { /* Must not spill arbitrary registers in head of side trace. */
if (ra_hasspill(irp->s))
emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
if (ra_hasspill(irp->s) && !allow)
emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
} else {
emit_getgl(as, RID_TMP, cur_L);
if (pbase & 0x40) {
emit_getgl(as, (pbase & 31), jit_base);
if (pbase & 0x80) /* Save temp register. */
emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0);
}
emit_getgl(as, RID_TMP, cur_L);
}
/* Restore Lua stack from on-trace state. */
@ -1918,7 +1868,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
/* Marker to prevent patching the GC check exit. */
#define ARM64_NOPATCH_GC_CHECK \
(A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
(A64I_ORRx|A64F_D(RID_ZERO)|A64F_M(RID_ZERO)|A64F_N(RID_ZERO))
/* Check GC threshold and do one or more GC steps. */
static void asm_gc_check(ASMState *as)
@ -1973,57 +1923,40 @@ static void asm_loop_tail_fixup(ASMState *as)
/* -- Head of trace ------------------------------------------------------- */
/* Reload L register from g->cur_L. */
static void asm_head_lreg(ASMState *as)
{
IRIns *ir = IR(ASMREF_L);
if (ra_used(ir)) {
Reg r = ra_dest(as, ir, RSET_GPR);
emit_getgl(as, r, cur_L);
ra_evictk(as);
}
}
/* Coalesce BASE register for a root trace. */
static void asm_head_root_base(ASMState *as)
{
IRIns *ir;
asm_head_lreg(as);
ir = IR(REF_BASE);
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
ra_spill(as, ir);
ra_destreg(as, ir, RID_BASE);
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
if (ra_hasreg(r)) {
ra_free(as, r);
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (r != RID_BASE)
emit_movrr(as, ir, r, RID_BASE);
}
}
/* Coalesce BASE register for a side trace. */
static Reg asm_head_side_base(ASMState *as, IRIns *irp)
{
IRIns *ir;
asm_head_lreg(as);
ir = IR(REF_BASE);
/* IRRefs that get into the side trace from the parent trace may restore
* REF_BASE under severe register pressure and thus reach here holding on to
* the register. Restore such references so that REF_BASE gets RID_BASE back
* when it tries to allocate below. */
if (!ra_hasreg(ir->r)) {
Reg r = ra_gethint(ir->r);
if (!rset_test(as->freeset, r))
ra_restore(as, regcost_ref(as->cost[r]));
}
if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
ra_spill(as, ir);
if (ra_hasspill(irp->s)) {
return ra_dest(as, ir, RSET_GPR);
} else {
Reg r = irp->r;
lj_assertA(ra_hasreg(r), "base reg lost");
if (r != ir->r && !rset_test(as->freeset, r))
ra_restore(as, regcost_ref(as->cost[r]));
ra_destreg(as, ir, r);
return r;
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
if (ra_hasreg(r)) {
ra_free(as, r);
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (irp->r == r) {
return r; /* Same BASE register already coalesced. */
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
/* Move from coalesced parent reg. */
emit_movrr(as, ir, r, irp->r);
return irp->r;
} else {
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
}
}
return RID_NONE;
}
/* -- Tail of trace ------------------------------------------------------- */
@ -2075,6 +2008,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
int ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
int spofs = 0, spalign = LJ_TARGET_OSX ? 0 : 7, nslots;
asm_collectargs(as, ir, ci, args);
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG)) nfpr = 0;
#endif
for (i = 0; i < nargs; i++) {
int al = spalign;
if (!args[i]) {
@ -2086,7 +2022,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
#endif
} else if (irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) { nfpr--; continue; }
#if LJ_TARGET_OSX
#if LJ_ABI_WIN
if ((ci->flags & CCI_VARARG) && ngpr > 0) { ngpr--; continue; }
#elif LJ_TARGET_OSX
al |= irt_isnum(IR(args[i])->t) ? 7 : 3;
#endif
} else {

View file

@ -140,7 +140,8 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
}
} else if (irb->o == IR_ADD && irref_isk(irb->op2)) {
/* Fuse base offset (vararg load). */
as->mrm.ofs = IR(irb->op2)->i;
IRIns *irk = IR(irb->op2);
as->mrm.ofs = irk->o == IR_KINT ? irk->i : (int32_t)ir_kint64(irk)->u64;
return irb->op1;
}
return ref; /* Otherwise use the given array base. */

View file

@ -1141,6 +1141,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
fid = ctf->sib;
}
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((ct->info & CTF_VARARG)) {
nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
ngpr = maxgpr;
nfpr = CCALL_NARG_FPR;
}
#endif
/* Walk through all passed arguments. */
for (o = L->base+1, narg = 1; o < top; o++, narg++) {
CTypeID did;
@ -1201,9 +1209,14 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
align = CTSIZE_PTR-1;
nsp = (nsp + align) & ~align;
}
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
/* A negative nsp points into cc->gpr. Blame MS for their messy ABI. */
dp = ((uint8_t *)cc->stack) + (int32_t)nsp;
#else
dp = ((uint8_t *)cc->stack) + nsp;
#endif
nsp += CCALL_PACK_STACKARG ? sz : n * CTSIZE_PTR;
if (nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
if ((int32_t)nsp > CCALL_SIZE_STACK) { /* Too many arguments. */
err_nyi:
lj_err_caller(L, LJ_ERR_FFI_NYICALL);
}
@ -1314,6 +1327,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#endif
}
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
if ((int32_t)nsp < 0) nsp = 0;
#endif
#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */

View file

@ -1118,12 +1118,8 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
ngpr = 1;
else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
ngpr = 2;
#elif LJ_TARGET_ARM64
#if LJ_ABI_WIN
#error "NYI: ARM64 Windows ABI calling conventions"
#elif LJ_TARGET_OSX
#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
int ngpr = CCALL_NARG_GPR;
#endif
#endif
/* Skip initial attributes. */

View file

@ -69,7 +69,7 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_UPVAL 120 /* Max. # of upvalues. */
#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
#define LJ_STACK_EXTRA (5+3*LJ_FR2) /* Extra stack space (metamethods). */
#if defined(__powerpc64__) && _CALL_ELF != 2
#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */
@ -154,15 +154,9 @@ typedef uintptr_t BloomFilter;
#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0)
#define lj_ffs(x) ((uint32_t)__builtin_ctz(x))
/* Don't ask ... */
#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__))
static LJ_AINLINE uint32_t lj_fls(uint32_t x)
{
uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r;
}
#else
#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31))
#endif
#define lj_ffs64(x) ((uint32_t)__builtin_ctzll(x))
#define lj_fls64(x) ((uint32_t)(__builtin_clzll(x)^63))
#if defined(__arm__)
static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
@ -273,8 +267,12 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
#else
unsigned char _BitScanForward(unsigned long *, unsigned long);
unsigned char _BitScanReverse(unsigned long *, unsigned long);
unsigned char _BitScanForward64(unsigned long *, uint64_t);
unsigned char _BitScanReverse64(unsigned long *, uint64_t);
#pragma intrinsic(_BitScanForward)
#pragma intrinsic(_BitScanReverse)
#pragma intrinsic(_BitScanForward64)
#pragma intrinsic(_BitScanReverse64)
static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
{
@ -285,6 +283,16 @@ static LJ_AINLINE uint32_t lj_fls(uint32_t x)
{
unsigned long r; _BitScanReverse(&r, x); return (uint32_t)r;
}
static LJ_AINLINE uint32_t lj_ffs64(uint64_t x)
{
unsigned long r; _BitScanForward64(&r, x); return (uint32_t)r;
}
static LJ_AINLINE uint32_t lj_fls64(uint64_t x)
{
unsigned long r; _BitScanReverse64(&r, x); return (uint32_t)r;
}
#endif
unsigned long _byteswap_ulong(unsigned long);

View file

@ -453,7 +453,7 @@ static int call_init(lua_State *L, GCfunc *fn)
int numparams = pt->numparams;
int gotparams = (int)(L->top - L->base);
int need = pt->framesize;
if ((pt->flags & PROTO_VARARG)) need += 1+gotparams;
if ((pt->flags & PROTO_VARARG)) need += 1+LJ_FR2+gotparams;
lj_state_checkstack(L, (MSize)need);
numparams -= gotparams;
return numparams >= 0 ? numparams : 0;

View file

@ -20,7 +20,7 @@ static uint64_t get_k64val(ASMState *as, IRRef ref)
} else {
lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
"bad 64 bit const IR op %d", ir->o);
return ir->i; /* Sign-extended. */
return (uint32_t)ir->i; /* Zero-extended. */
}
}
@ -30,39 +30,31 @@ static uint32_t emit_isk12(int64_t n)
uint64_t k = n < 0 ? ~(uint64_t)n+1u : (uint64_t)n;
uint32_t m = n < 0 ? 0x40000000 : 0;
if (k < 0x1000) {
return A64I_K12|m|A64F_U12(k);
return (uint32_t)(A64I_K12|m|A64F_U12(k));
} else if ((k & 0xfff000) == k) {
return A64I_K12|m|0x400000|A64F_U12(k>>12);
return (uint32_t)(A64I_K12|m|0x400000|A64F_U12(k>>12));
}
return 0;
}
#define emit_clz64(n) __builtin_clzll(n)
#define emit_ctz64(n) __builtin_ctzll(n)
#define emit_clz64(n) (lj_fls64(n)^63)
#define emit_ctz64(n) lj_ffs64(n)
/* Encode constant in K13 format for logical data processing instructions. */
static uint32_t emit_isk13(uint64_t n, int is64)
{
int inv = 0, w = 128, lz, tz;
if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */
if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */
do { /* Find the repeat width. */
if (is64 && (uint32_t)(n^(n>>32))) break;
n = (uint32_t)n;
if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */
w = 32; if ((n^(n>>16)) & 0xffff) break;
n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
n = n & 0x3; w = 2;
} while (0);
lz = emit_clz64(n);
tz = emit_ctz64(n);
if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
if (inv)
return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
else
return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
/* Thanks to: https://dougallj.wordpress.com/2021/10/30/ */
int rot, ones, size, immr, imms;
if (!is64) n = ((uint64_t)n << 32) | (uint32_t)n;
if ((n+1u) <= 1u) return 0; /* Neither all-zero nor all-ones are allowed. */
rot = (n & (n+1u)) ? emit_ctz64(n & (n+1u)) : 64;
n = lj_ror(n, rot & 63);
ones = emit_ctz64(~n);
size = emit_clz64(n) + ones;
if (lj_ror(n, size & 63) != n) return 0; /* Non-repeating? */
immr = -rot & (size - 1);
imms = (-(size << 1) | (ones - 1)) & 63;
return A64I_K13 | A64F_IMMR(immr | (size & 64)) | A64F_IMMS(imms);
}
static uint32_t emit_isfpk64(uint64_t n)
@ -121,9 +113,20 @@ static int emit_checkofs(A64Ins ai, int64_t ofs)
}
}
static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
static LJ_AINLINE uint32_t emit_lso_pair_candidate(A64Ins ai, int ofs, int sc)
{
int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
if (ofs >= 0) {
return ai | A64F_U12(ofs>>sc); /* Subsequent lj_ror checks ofs. */
} else if (ofs >= -256) {
return (ai^A64I_LS_U) | A64F_S9(ofs & 0x1ff);
} else {
return A64F_D(31); /* Will mismatch prev. */
}
}
static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs64)
{
int ot = emit_checkofs(ai, ofs64), sc = (ai >> 30) & 3, ofs = (int)ofs64;
lj_assertA(ot, "load/store offset %d out of range", ofs);
/* Combine LDR/STR pairs to LDP/STP. */
if ((sc == 2 || sc == 3) &&
@ -132,11 +135,9 @@ static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
uint32_t prev = *as->mcp & ~A64F_D(31);
int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
A64Ins aip;
if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsm, sc)) {
aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
} else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
} else if (prev == emit_lso_pair_candidate(ai | A64F_N(rn), ofsp, sc)) {
aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
ofsm = ofs;
} else {
@ -158,13 +159,12 @@ nopair:
/* -- Emit loads/stores --------------------------------------------------- */
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= ASMREF_L)
#define emit_canremat(ref) ((ref) <= REF_BASE)
/* Try to find an N-step delta relative to other consts with N < lim. */
static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
/* Try to find a one-step delta relative to other consts. */
static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64)
{
RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
if (lim <= 1) return 0; /* Can't beat that. */
while (work) {
Reg r = rset_picktop(work);
IRRef ref = regcost_ref(as->cost[r]);
@ -173,13 +173,14 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
get_k64val(as, ref);
int64_t delta = (int64_t)(k - kx);
if (!is64) delta = (int64_t)(int32_t)delta; /* Sign-extend. */
if (delta == 0) {
emit_dm(as, A64I_MOVx, rd, r);
emit_dm(as, is64|A64I_MOVw, rd, r);
return 1;
} else {
uint32_t k12 = emit_isk12(delta < 0 ? (int64_t)(~(uint64_t)delta+1u) : delta);
if (k12) {
emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
emit_dn(as, (delta < 0 ? A64I_SUBw : A64I_ADDw)^is64^k12, rd, r);
return 1;
}
/* Do other ops or multi-step deltas pay off? Probably not.
@ -192,53 +193,52 @@ static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
return 0; /* Failed. */
}
static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
static void emit_loadk(ASMState *as, Reg rd, uint64_t u64)
{
int i, zeros = 0, ones = 0, neg;
if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
/* Count homogeneous 16 bit fragments. */
for (i = 0; i < 4; i++) {
uint64_t frag = (u64 >> i*16) & 0xffff;
zeros += (frag == 0);
ones += (frag == 0xffff);
int zeros = 0, ones = 0, neg, lshift = 0;
int is64 = (u64 >> 32) ? A64I_X : 0, i = is64 ? 4 : 2;
/* Count non-homogeneous 16 bit fragments. */
while (--i >= 0) {
uint32_t frag = (u64 >> i*16) & 0xffff;
zeros += (frag != 0);
ones += (frag != 0xffff);
}
neg = ones > zeros; /* Use MOVN if it pays off. */
if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */
neg = ones < zeros; /* Use MOVN if it pays off. */
if ((neg ? ones : zeros) > 1) { /* Need 2+ ins. Try 1 ins encodings. */
uint32_t k13 = emit_isk13(u64, is64);
if (k13) {
emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
return;
}
}
if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
int shift = 0, lshift = 0;
uint64_t n64 = neg ? ~u64 : u64;
if (n64 != 0) {
/* Find first/last fragment to be filled. */
shift = (63-emit_clz64(n64)) & ~15;
lshift = emit_ctz64(n64) & ~15;
if (emit_kdelta(as, rd, u64, is64)) {
return;
}
/* MOVK requires the original value (u64). */
while (shift > lshift) {
uint32_t u16 = (u64 >> shift) & 0xffff;
/* Skip fragments that are correctly filled by MOVN/MOVZ. */
if (u16 != (neg ? 0xffff : 0))
emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
shift -= 16;
}
/* But MOVN needs an inverted value (n64). */
emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
}
if (neg) {
u64 = ~u64;
if (!is64) u64 = (uint32_t)u64;
}
if (u64) {
/* Find first/last fragment to be filled. */
int shift = (63-emit_clz64(u64)) & ~15;
lshift = emit_ctz64(u64) & ~15;
for (; shift > lshift; shift -= 16) {
uint32_t frag = (u64 >> shift) & 0xffff;
if (frag == 0) continue; /* Will be correctly filled by MOVN/MOVZ. */
if (neg) frag ^= 0xffff; /* MOVK requires the original value. */
emit_d(as, is64 | A64I_MOVKw | A64F_U16(frag) | A64F_LSL16(shift), rd);
}
}
/* But MOVN needs an inverted value. */
emit_d(as, is64 | (neg ? A64I_MOVNw : A64I_MOVZw) |
A64F_U16((u64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
}
/* Load a 32 bit constant into a GPR. */
#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0)
#define emit_loadi(as, rd, i) emit_loadk(as, rd, (uint32_t)i)
/* Load a 64 bit constant into a GPR. */
#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X)
#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr))
#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i)
#define glofs(as, k) \
((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
@ -252,19 +252,20 @@ static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
/* Get/set from constant pointer. */
static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
{
/* First, check if ip + offset is in range. */
if ((ai & 0x00400000) && checkmcpofs(as, p)) {
Reg base = RID_GL;
int64_t ofs = glofs(as, p);
if (emit_checkofs(ai, ofs)) {
/* GL + offset, might subsequently fuse to LDP/STP. */
} else if (ai == A64I_LDRx && checkmcpofs(as, p)) {
/* IP + offset is cheaper than allock, but address must be in range. */
emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
} else {
Reg base = RID_GL; /* Next, try GL + offset. */
int64_t ofs = glofs(as, p);
if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */
int64_t i64 = i64ptr(p);
base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
ofs = i64 & 0x7fffull;
}
emit_lso(as, ai, r, base, ofs);
return;
} else { /* Split up into base reg + offset. */
int64_t i64 = i64ptr(p);
base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
ofs = i64 & 0x7fffull;
}
emit_lso(as, ai, r, base, ofs);
}
/* Load 64 bit IR constant into register. */

View file

@ -174,12 +174,15 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
case FRAME_PCALL: /* FF pcall() frame. */
case FRAME_PCALLH: /* FF pcall() frame inside hook. */
if (errcode) {
global_State *g;
if (errcode == LUA_YIELD) {
frame = frame_prevd(frame);
break;
}
g = G(L);
setgcref(g->cur_L, obj2gco(L));
if (frame_typep(frame) == FRAME_PCALL)
hook_leave(G(L));
hook_leave(g);
L->base = frame_prevd(frame) + 1;
L->cframe = cf;
unwindstack(L, L->base);
@ -209,11 +212,6 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
** from 3rd party docs or must be found by trial-and-error. They really
** don't want you to write your own language-specific exception handler
** or to interact gracefully with MSVC. :-(
**
** Apparently MSVC doesn't call C++ destructors for foreign exceptions
** unless you compile your C++ code with /EHa. Unfortunately this means
** catch (...) also catches things like access violations. The use of
** _set_se_translator doesn't really help, because it requires /EHa, too.
*/
#define WIN32_LEAN_AND_MEAN
@ -261,6 +259,8 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
{
#if LJ_TARGET_X86
void *cf = (char *)f - CFRAME_OFS_SEH;
#elif LJ_TARGET_ARM64
void *cf = (char *)f - CFRAME_SIZE;
#else
void *cf = f;
#endif
@ -268,11 +268,25 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */
if (rec->ExceptionCode == STATUS_LONGJUMP &&
rec->ExceptionRecord &&
LJ_EXCODE_CHECK(rec->ExceptionRecord->ExceptionCode)) {
errcode = LJ_EXCODE_ERRCODE(rec->ExceptionRecord->ExceptionCode);
if ((rec->ExceptionFlags & 0x20)) { /* EH_TARGET_UNWIND */
/* Unwinding is about to finish; revert the ExceptionCode so that
** RtlRestoreContext does not try to restore from a _JUMP_BUFFER.
*/
rec->ExceptionCode = 0;
}
}
/* Unwind internal frames. */
err_unwind(L, cf, errcode);
} else {
void *cf2 = err_unwind(L, cf, 0);
if (cf2) { /* We catch it, so start unwinding the upper frames. */
#if !LJ_TARGET_X86
EXCEPTION_RECORD rec2;
#endif
if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
rec->ExceptionCode == LJ_GCC_EXCODE) {
#if !LJ_TARGET_CYGWIN
@ -295,14 +309,29 @@ LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
(void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
/* lj_vm_rtlunwind does not return. */
#else
if (LJ_EXCODE_CHECK(rec->ExceptionCode)) {
/* For unwind purposes, wrap the EXCEPTION_RECORD in something that
** looks like a longjmp, so that MSVC will execute C++ destructors in
** the frames we unwind over. ExceptionInformation[0] should really
** contain a _JUMP_BUFFER*, but hopefully nobody is looking too closely
** at this point.
*/
rec2.ExceptionCode = STATUS_LONGJUMP;
rec2.ExceptionRecord = rec;
rec2.ExceptionAddress = 0;
rec2.NumberParameters = 1;
rec2.ExceptionInformation[0] = (ULONG_PTR)ctx;
rec = &rec2;
}
/* Unwind the stack and call all handlers for all lower C frames
** (including ourselves) again with EH_UNWINDING set. Then set
** stack pointer = cf, result = errcode and jump to the specified target.
** stack pointer = f, result = errcode and jump to the specified target.
*/
RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
lj_vm_unwind_ff_eh :
lj_vm_unwind_c_eh),
rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
RtlUnwindEx(f, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
lj_vm_unwind_ff_eh :
lj_vm_unwind_c_eh),
rec, (void *)(uintptr_t)errcode, dispatch->ContextRecord,
dispatch->HistoryTable);
/* RtlUnwindEx should never return. */
#endif
}

View file

@ -1131,7 +1131,7 @@ static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg)
/* Emit BUFHDR for write to extended string buffer. */
static TRef recff_sbufx_write(jit_State *J, TRef ud)
{
TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata)));
TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kintpgc(J, sizeof(GCudata)));
return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
}
@ -1165,20 +1165,19 @@ static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd
SBufExt *sbx = bufV(&rd->argv[0]);
int iscow = (int)sbufiscow(sbx);
TRef trl = recff_sbufx_get_L(J, ud);
TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW));
TRef zero = lj_ir_kint(J, 0);
emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero);
TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
TRef zeropgc = lj_ir_kintpgc(J, 0);
emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zeropgc);
if (iscow) {
trl = emitir(IRT(IR_BXOR, IRT_IGC), trl,
LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) :
lj_ir_kint(J, SBUF_FLAG_COW));
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero);
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero);
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero);
TRef zerop = lj_ir_kintp(J, 0);
trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, lj_ir_kintpgc(J, SBUF_FLAG_COW));
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zerop);
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zerop);
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zerop);
recff_sbufx_set_L(J, ud, trl);
emitir(IRT(IR_FSTORE, IRT_PGC),
emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero);
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero);
emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zeropgc);
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zerop);
} else {
TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);

View file

@ -76,8 +76,8 @@
\
_(ABS, N , ref, ref) \
_(LDEXP, N , ref, ref) \
_(MIN, C , ref, ref) \
_(MAX, C , ref, ref) \
_(MIN, N , ref, ref) \
_(MAX, N , ref, ref) \
_(FPMATH, N , ref, lit) \
\
/* Overflow-checking arithmetic ops. */ \

View file

@ -63,7 +63,7 @@ typedef struct CCallInfo {
/* Helpers for conditional function definitions. */
#define IRCALLCOND_ANY(x) x
#if LJ_TARGET_X86ORX64
#if LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64
#define IRCALLCOND_FPMATH(x) NULL
#else
#define IRCALLCOND_FPMATH(x) x

View file

@ -56,6 +56,12 @@ LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
#define lj_ir_kintp(J, k) lj_ir_kint(J, (int32_t)(k))
#endif
#if LJ_GC64
#define lj_ir_kintpgc lj_ir_kintp
#else
#define lj_ir_kintpgc lj_ir_kint
#endif
static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
{
TValue tv;

View file

@ -29,6 +29,11 @@
#include <valgrind/valgrind.h>
#endif
#if LJ_TARGET_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
#if LJ_TARGET_IOS
void sys_icache_invalidate(void *start, size_t len);
#endif
@ -41,6 +46,8 @@ void lj_mcode_sync(void *start, void *end)
#endif
#if LJ_TARGET_X86ORX64
UNUSED(start); UNUSED(end);
#elif LJ_TARGET_WINDOWS
FlushInstructionCache(GetCurrentProcess(), start, (char *)end-(char *)start);
#elif LJ_TARGET_IOS
sys_icache_invalidate(start, (char *)end-(char *)start);
#elif LJ_TARGET_PPC
@ -58,9 +65,6 @@ void lj_mcode_sync(void *start, void *end)
#if LJ_TARGET_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#define MCPROT_RW PAGE_READWRITE
#define MCPROT_RX PAGE_EXECUTE_READ
#define MCPROT_RWX PAGE_EXECUTE_READWRITE
@ -363,7 +367,7 @@ void lj_mcode_limiterr(jit_State *J, size_t need)
sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10;
sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10;
if ((size_t)need > sizemcode)
if (need * sizeof(MCode) > sizemcode)
lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */
if (J->szallmcarea + sizemcode > maxmcode)
lj_trace_err(J, LJ_TRERR_MCODEAL);

View file

@ -44,12 +44,12 @@ static void dce_propagate(jit_State *J)
IRIns *ir = IR(ins);
if (irt_ismarked(ir->t)) {
irt_clearmark(ir->t);
pchain[ir->o] = &ir->prev;
} else if (!ir_sideeff(ir)) {
*pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */
lj_ir_nop(ir);
continue;
}
pchain[ir->o] = &ir->prev;
if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t);
}

View file

@ -377,10 +377,10 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
case IR_BOR: k1 |= k2; break;
case IR_BXOR: k1 ^= k2; break;
case IR_BSHL: k1 <<= (k2 & 63); break;
case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
case IR_BSAR: k1 >>= (k2 & 63); break;
case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
case IR_BSHR: k1 >>= (k2 & 63); break;
case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break;
case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
default: lj_assertJ(0, "bad IR op %d", op); break;
}
#else
@ -1972,7 +1972,10 @@ LJFOLD(NE any any)
LJFOLDF(comm_equal)
{
/* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
if (fins->op1 == fins->op2 &&
(!irt_isnum(fins->t) ||
(fleft->o == IR_CONV && /* Converted integers cannot be NaN. */
(uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8))))
return CONDFOLD(fins->o == IR_EQ);
return fold_comm_swap(J);
}

View file

@ -1599,10 +1599,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
lj_assertJ(!hasmm, "inconsistent metamethod handling");
if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
TRef key = ix->key;
if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
if (tref_isinteger(key)) { /* NEWREF needs a TValue as a key. */
key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
else if (tref_isnumber(key) && tref_isk(key) && tvismzero(&ix->keyv))
key = lj_ir_knum_zero(J); /* Canonicalize -0.0 to +0.0. */
} else if (tref_isnum(key)) {
if (tref_isk(key)) {
if (tvismzero(&ix->keyv))
key = lj_ir_knum_zero(J); /* Canonicalize -0.0 to +0.0. */
} else {
emitir(IRTG(IR_EQ, IRT_NUM), key, key); /* Check for !NaN. */
}
}
xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
keybarrier = 0; /* NEWREF already takes care of the key barrier. */
#ifdef LUAJIT_ENABLE_TABLE_BUMP
@ -1775,7 +1781,7 @@ noconstify:
emitir(IRTG(IR_EQ, IRT_PGC),
REF_BASE,
emitir(IRT(IR_ADD, IRT_PGC), uref,
lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
lj_ir_kintpgc(J, (slot - 1 - LJ_FR2) * -8)));
slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
if (val == 0) {
return getslot(J, slot);
@ -1788,7 +1794,7 @@ noconstify:
}
emitir(IRTG(IR_UGT, IRT_PGC),
emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
lj_ir_kintpgc(J, (J->baseslot + J->maxslot) * 8));
} else {
needbarrier = 1;
uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
@ -1966,7 +1972,8 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
emitir(IRTGI(IR_EQ), fr,
lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8*(1+LJ_FR2)));
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
lj_ir_kintpgc(J, frofs-8*(1+LJ_FR2)));
for (i = 0; i < nload; i++) {
IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t);
@ -1985,8 +1992,11 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
TRef tr = TREF_NIL;
ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]);
if (idx < 0) goto nyivarg;
if (idx != 0 && !tref_isinteger(tridx))
if (idx != 0 && !tref_isinteger(tridx)) {
if (tref_isstr(tridx))
tridx = emitir(IRTG(IR_STRTO, IRT_NUM), tridx, 0);
tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX);
}
if (idx != 0 && tref_isk(tridx)) {
emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT),
fr, lj_ir_kint(J, frofs+8*(int32_t)idx));
@ -2014,7 +2024,7 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
IRType t;
TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
lj_ir_kint(J, frofs-(8<<LJ_FR2)));
lj_ir_kintpgc(J, frofs-(8<<LJ_FR2)));
t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
tr = lj_record_vload(J, aref, 0, t);

View file

@ -103,8 +103,17 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
{
MSize n;
if (L->stacksize > LJ_STACK_MAXEX) /* Overflow while handling overflow? */
lj_err_throw(L, LUA_ERRERR);
if (L->stacksize >= LJ_STACK_MAXEX) {
/* 4. Throw 'error in error handling' when we are _over_ the limit. */
if (L->stacksize > LJ_STACK_MAXEX)
lj_err_throw(L, LUA_ERRERR); /* Does not invoke an error handler. */
/* 1. We are _at_ the limit after the last growth. */
if (L->status < LUA_ERRRUN) { /* 2. Throw 'stack overflow'. */
L->status = LUA_ERRRUN; /* Prevent ending here again for pushed msg. */
lj_err_msg(L, LJ_ERR_STKOV); /* May invoke an error handler. */
}
/* 3. Add space (over the limit) for pushed message and error handler. */
}
n = L->stacksize + need;
if (n > LJ_STACK_MAX) {
n += 2*LUA_MINSTACK;
@ -114,8 +123,6 @@ void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need)
n = LJ_STACK_MAX;
}
resizestack(L, n);
if (L->stacksize >= LJ_STACK_MAXEX)
lj_err_msg(L, LJ_ERR_STKOV);
}
void LJ_FASTCALL lj_state_growstack1(lua_State *L)
@ -123,6 +130,18 @@ void LJ_FASTCALL lj_state_growstack1(lua_State *L)
lj_state_growstack(L, 1);
}
static TValue *cpgrowstack(lua_State *co, lua_CFunction dummy, void *ud)
{
UNUSED(dummy);
lj_state_growstack(co, *(MSize *)ud);
return NULL;
}
int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need)
{
return lj_vm_cpcall(L, NULL, &need, cpgrowstack);
}
/* Allocate basic stack for new state. */
static void stack_init(lua_State *L1, lua_State *L)
{

View file

@ -18,6 +18,7 @@ LJ_FUNC void lj_state_relimitstack(lua_State *L);
LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
LJ_FUNCA void LJ_FASTCALL lj_state_growstack(lua_State *L, MSize need);
LJ_FUNC void LJ_FASTCALL lj_state_growstack1(lua_State *L);
LJ_FUNC int LJ_FASTCALL lj_state_cpgrowstack(lua_State *L, MSize need);
static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
{

View file

@ -58,9 +58,13 @@ typedef uint32_t RegSP;
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
typedef uint64_t RegSet;
#define RSET_BITS 6
#define rset_picktop_(rs) ((Reg)lj_fls64(rs))
#define rset_pickbot_(rs) ((Reg)lj_ffs64(rs))
#else
typedef uint32_t RegSet;
#define RSET_BITS 5
#define rset_picktop_(rs) ((Reg)lj_fls(rs))
#define rset_pickbot_(rs) ((Reg)lj_ffs(rs))
#endif
#define RID2RSET(r) (((RegSet)1) << (r))
@ -71,13 +75,6 @@ typedef uint32_t RegSet;
#define rset_set(rs, r) (rs |= RID2RSET(r))
#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
#define rset_picktop_(rs) ((Reg)(__builtin_clzll(rs)^63))
#define rset_pickbot_(rs) ((Reg)__builtin_ctzll(rs))
#else
#define rset_picktop_(rs) ((Reg)lj_fls(rs))
#define rset_pickbot_(rs) ((Reg)lj_ffs(rs))
#endif
/* -- Register allocation cost -------------------------------------------- */

View file

@ -621,22 +621,27 @@ static int trace_abort(jit_State *J)
J->cur.link = 0;
J->cur.linktype = LJ_TRLINK_NONE;
lj_vmevent_send(L, TRACE,
cTValue *bot = tvref(L->stack)+LJ_FR2;
cTValue *frame;
int size;
BCIns pc;
GCfunc *fn;
const BCIns *pc;
BCPos pos = 0;
setstrV(L, L->top++, lj_str_newlit(L, "abort"));
setintV(L->top++, traceno);
/* Find original function call to generate a better error message. */
frame = lj_debug_frame(L, 0, &size);
lj_assertL(frame != NULL, "missing debug frame");
fn = frame_func(frame);
if (frame == L->base-1 && isluafunc(fn))
pc = proto_bcpos(funcproto(fn), J->pc);
else
pc = lj_debug_framepc(L, fn, frame);
setfuncV(L, L->top++, fn);
setintV(L->top++, pc);
/* Find original Lua function call to generate a better error message. */
for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) {
if (isluafunc(frame_func(frame))) {
pos = proto_bcpos(funcproto(frame_func(frame)), pc);
break;
} else if (frame_prev(frame) <= bot) {
break;
} else if (frame_iscont(frame)) {
pc = frame_contpc(frame) - 1;
} else {
pc = frame_pc(frame) - 1;
}
}
setfuncV(L, L->top++, frame_func(frame));
setintV(L->top++, pos);
copyTV(L, L->top++, restorestack(L, errobj));
copyTV(L, L->top++, &J->errinfo);
);

View file

@ -27,39 +27,52 @@
@set BUILDTYPE=release
@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
@setlocal
@call :SETHOSTVARS
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
%LJLINK% /out:minilua.exe minilua.obj
@if errorlevel 1 goto :BAD
if exist minilua.exe.manifest^
%LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
@endlocal
@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64
@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU -D P64
@set LJARCH=x64
@minilua
@if errorlevel 8 goto :X64
@if errorlevel 8 goto :NO32
@set DASC=vm_x86.dasc
@set DASMFLAGS=-D WIN -D JIT -D FFI
@set DASMFLAGS=-D WIN -D JIT -D FFI -D ENDIAN_LE -D FPU
@set LJARCH=x86
@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
@goto :DA
:NO32
@if "%VSCMD_ARG_TGT_ARCH%" neq "arm64" goto :X64
@set DASC=vm_arm64.dasc
@set DASMTARGET=-D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
@set LJARCH=arm64
@goto :DA
:X64
@if "%1" neq "nogc64" goto :GC64
@if "%1" neq "nogc64" goto :DA
@shift
@set DASC=vm_x86.dasc
@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
:GC64
:DA
minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
if exist ..\.git ( git show -s --format=%%ct >luajit_relver.txt ) else ( type ..\.relver >luajit_relver.txt )
minilua host\genversion.lua
%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
@setlocal
@call :SETHOSTVARS
%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% host\buildvm*.c
@if errorlevel 1 goto :BAD
%LJLINK% /out:buildvm.exe buildvm*.obj
@if errorlevel 1 goto :BAD
if exist buildvm.exe.manifest^
%LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
@endlocal
buildvm -m peobj -o lj_vm.obj
@if errorlevel 1 goto :BAD
@ -118,6 +131,12 @@ if exist luajit.exe.manifest^
@echo.
@echo === Successfully built LuaJIT for Windows/%LJARCH% ===
@goto :END
:SETHOSTVARS
@if "%VSCMD_ARG_HOST_ARCH%_%VSCMD_ARG_TGT_ARCH%" equ "x64_arm64" (
call "%VSINSTALLDIR%Common7\Tools\VsDevCmd.bat" -arch=%VSCMD_ARG_HOST_ARCH% -no_logo
echo on
)
@goto :END
:BAD
@echo.

View file

@ -113,13 +113,37 @@
|
|.define TMPDofs, #24
|
|.if WIN
|// Windows unwind data is suited to r1 stored first.
|.macro stp_unwind, r1, r2, where
| stp r1, r2, where
|.endmacro
|.macro ldp_unwind, r1, r2, where
| ldp r1, r2, where
|.endmacro
|.macro ldp_unwind, r1, r2, where, post_index
| ldp r1, r2, where, post_index
|.endmacro
|.else
|// Otherwise store r2 first for compact unwind info (OSX).
|.macro stp_unwind, r1, r2, where
| stp r2, r1, where
|.endmacro
|.macro ldp_unwind, r1, r2, where
| ldp r2, r1, where
|.endmacro
|.macro ldp_unwind, r1, r2, where, post_index
| ldp r2, r1, where, post_index
|.endmacro
|.endif
|
|.macro save_, gpr1, gpr2, fpr1, fpr2
| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
| stp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
| stp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
|.endmacro
|.macro rest_, gpr1, gpr2, fpr1, fpr2
| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
| ldp_unwind d..fpr1, d..fpr2, [sp, # SAVE_FPR_+(14-fpr1)*8]
| ldp_unwind x..gpr1, x..gpr2, [sp, # SAVE_GPR_+(27-gpr1)*8]
|.endmacro
|
|.macro saveregs
@ -127,14 +151,14 @@
| sub sp, sp, # CFRAME_SPACE
| stp fp, lr, [sp, # SAVE_FP_LR_]
| add fp, sp, # SAVE_FP_LR_
| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
| stp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
| save_ 21, 22, 8, 9
| save_ 23, 24, 10, 11
| save_ 25, 26, 12, 13
| save_ 27, 28, 14, 15
|.endmacro
|.macro restoreregs
| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
| ldp_unwind x19, x20, [sp, # SAVE_GPR_+(27-19)*8]
| rest_ 21, 22, 8, 9
| rest_ 23, 24, 10, 11
| rest_ 25, 26, 12, 13
@ -408,24 +432,24 @@ static void build_subroutines(BuildCtx *ctx)
| // (void *cframe, int errcode)
| mov sp, CARG1
| mov CRET1, CARG2
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| ldr L, SAVE_L
| mv_vmstate TMP0w, C
| ldr GL, L->glref
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| mv_vmstate TMP0w, C
| st_vmstate TMP0w
| b ->vm_leave_unw
|
|->vm_unwind_ff: // Unwind C stack, return from ff pcall.
| // (void *cframe)
| and sp, CARG1, #CFRAME_RAWMASK
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| ldr L, SAVE_L
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
| movn TISNIL, #0
| ldr GL, L->glref // Setup pointer to global state.
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| mov RC, #16 // 2 results: false + error message.
| ldr BASE, L->base
| ldr GL, L->glref // Setup pointer to global state.
| mov_false TMP0
| sub RA, BASE, #8 // Results start at BASE-8.
| ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
@ -2005,13 +2029,13 @@ static void build_subroutines(BuildCtx *ctx)
|.if JIT
| ldr L, SAVE_L
|1:
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
| movn TISNIL, #0
| cmn CARG1w, #LUA_ERRERR
| bhs >9 // Check for error from exit.
| lsl RC, CARG1, #3
| ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
| movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
| movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
| movn TISNIL, #0
| lsl RC, CARG1, #3
| and LFUNC:CARG2, CARG2, #LJ_GCVMASK
| str RCw, SAVE_MULTRES
| str BASE, L->base
@ -2162,7 +2186,7 @@ static void build_subroutines(BuildCtx *ctx)
|//-----------------------------------------------------------------------
|
|// Handler for callback functions.
|// Saveregs already performed. Callback slot number in [sp], g in r12.
|// Saveregs already performed. Callback slot number in w9, g in x10.
|->vm_ffi_callback:
|.if FFI
|.type CTSTATE, CTState, PC
@ -2215,7 +2239,7 @@ static void build_subroutines(BuildCtx *ctx)
|.if FFI
| .type CCSTATE, CCallState, x19
| sp_auth
| stp x20, CCSTATE, [sp, #-32]!
| stp_unwind CCSTATE, x20, [sp, #-32]!
| stp fp, lr, [sp, #16]
| add fp, sp, #16
| mov CCSTATE, x0
@ -2247,7 +2271,7 @@ static void build_subroutines(BuildCtx *ctx)
| stp d0, d1, CCSTATE->fpr[0]
| stp d2, d3, CCSTATE->fpr[2]
| ldp fp, lr, [sp, #16]
| ldp x20, CCSTATE, [sp], #32
| ldp_unwind CCSTATE, x20, [sp], #32
| ret_auth
|.endif
|// Note: vm_ffi_call must be the last function in this object file!
@ -3816,9 +3840,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.if JIT
| // RA = base (ignored), RC = traceno
| ldr CARG1, [GL, #GL_J(trace)]
| mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0.
| st_vmstate wzr // Traces on ARM64 don't store the trace #, so use 0.
| ldr TRACE:RC, [CARG1, RC, lsl #3]
| st_vmstate CARG2w
|.if PAUTH
| ldr RA, TRACE:RC->mcauth
|.else
@ -3893,6 +3916,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add TMP2, BASE, RC
| add LFUNC:CARG3, CARG3, TMP0, lsl #47
| add RA, RA, RC
| sub CARG1, CARG1, #8
| add TMP0, RC, #16+FRAME_VARG
| str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC.
| ldr KBASE, [PC, #-4+PC2PROTO(k)]

View file

@ -5396,6 +5396,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| settp LFUNC:RB, TMP0
| daddu TMP0, RA, RC
| sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
| daddiu TMP2, TMP2, -8
| daddiu TMP3, RC, 16+FRAME_VARG
| sltu AT, TMP0, TMP2
| ld KBASE, -4+PC2PROTO(k)(PC)

View file

@ -136,7 +136,7 @@ print(tostring(saved_q))
cdata<void *>: 0xefdeaddeadbeef
cdata<void *>: 0xefdeaddeadbeef
--- err
[TRACE --- test.lua:8 -- trace too short at thread.exdata]
[TRACE --- test.lua:8 -- trace too short at test.lua:9]
@ -195,7 +195,7 @@ print("get: " .. total)
set: 0
get: 10
--- err
[TRACE --- test.lua:14 -- trace too short at thread.exdata]
[TRACE --- test.lua:14 -- trace too short at test.lua:15]
[TRACE 1 test.lua:21 loop]

View file

@ -160,7 +160,7 @@ print(tostring(saved_q))
cdata<void *>: 0xefdeaddeadbeef
cdata<void *>: 0xefdeaddeadbeef
--- err
[TRACE --- test.lua:8 -- trace too short at thread.exdata2]
[TRACE --- test.lua:8 -- trace too short at test.lua:9]
@ -224,7 +224,7 @@ get: 10
cdata<void *>: NULL
cdata<void *>: 0xefdeaddeadbeef
--- err
[TRACE --- test.lua:15 -- trace too short at thread.exdata2]
[TRACE --- test.lua:15 -- trace too short at test.lua:16]
[TRACE 1 test.lua:22 loop]

View file

@ -49,7 +49,7 @@ print('ok')
ok
--- jv
--- err eval
qr/trace too short at jit\.prngstate/
qr/trace too short at test.lua:4/