nuttx-apps/interpreters/luajit/0001-luajit-armv7m-nuttx.patch
Xu Xingliang cd7869001f interpreters/luajit: use section name for custom heap
Signed-off-by: Xu Xingliang <xuxingliang@xiaomi.com>
2023-08-09 09:49:23 +02:00

10331 lines
304 KiB
Diff

diff --git a/Makefile b/Makefile
index b0288b4d..f387077c 100644
--- a/Makefile
+++ b/Makefile
@@ -90,7 +90,7 @@ FILE_MAN= luajit.1
FILE_PC= luajit.pc
FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
- dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
+ dis_x86.lua dis_x64.lua dis_arm.lua dis_armv7m.lua dis_arm64.lua \
dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
dis_mips64.lua dis_mips64el.lua vmdef.lua
diff --git a/dynasm/dasm_armv7m.h b/dynasm/dasm_armv7m.h
new file mode 100644
index 00000000..8f94ba40
--- /dev/null
+++ b/dynasm/dasm_armv7m.h
@@ -0,0 +1,563 @@
+/*
+** DynASM ARM encoding engine.
+** Copyright (C) 2018 Jernej Turnsek. All rights reserved.
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "armv7m"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. */
+enum {
+ DASM_STOP,
+ DASM_SECTION,
+ DASM_ESC,
+ DASM_REL_EXT,
+ /* The following actions need a buffer position. */
+ DASM_ALIGN,
+ DASM_REL_LG,
+ DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+ DASM_REL_PC,
+ DASM_LABEL_PC,
+ DASM_IMM,
+ DASM_IMM12,
+ DASM_IMM16,
+ DASM_IMML8,
+ DASM_IMML12,
+ DASM_IMMV8,
+ DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_REL 0x15000000
+#define DASM_S_UNDEF_LG 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals (bias -10). */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ int i;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = (dasm_ActList)actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+ int i;
+ unsigned int m = n;
+
+ if (m <= 255) {
+ /* i:imm3 = 0000 */
+ return ((((m) & 0xff) << 16) | (((m) & 0x700) << 20) | (((m) & 0x800) >> 1));
+ }
+ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) {
+ /* i:imm3 = 0001 */
+ return ((((0x100 | (m & 0xff)) & 0xff) << 16) | (((0x100 | (m & 0xff)) & 0x700) << 20) | (((0x100 | (m & 0xff)) & 0x800) >> 1));
+ }
+ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) {
+ /* i:imm3 = 0010 */
+ return ((((0x200 | (m >> 8 & 0xff)) & 0xff) << 16) | (((0x200 | (m >> 8 & 0xff)) & 0x700) << 20) | (((0x200 | (m >> 8 & 0xff)) & 0x800) >> 1));
+ }
+ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) {
+ /* i:imm3 = 0011 */
+ return ((((0x300 | (m & 0xff)) & 0xff) << 16) | (((0x300 | (m & 0xff)) & 0x700) << 20) | (((0x300 | (m & 0xff)) & 0x800) >> 1));
+ }
+ else {
+ for (i = 0; i < 4096; i += 128, m = ((m << 1) | (m >> (-(unsigned int)(1)&(8*sizeof(m) - 1))))) {
+ if (m <= 255) {
+ if ((m & 0x80) && (i >= 128 * 8))
+ return ((((i | (m & 0x7f)) & 0xff) << 16) | (((i | (m & 0x7f)) & 0x700) << 20) | (((i | (m & 0x7f)) & 0x800) >> 1));
+ else
+ continue;
+ }
+ }
+ }
+ if (n < 4096) {
+ return -2; /* Used for additional encoding of add/sub TODO: better solution! */
+ }
+ return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst,
+ int,
+ sec->buf,
+ sec->bsize,
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos = (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ if (action >= DASM__MAX) {
+ ofs += 4;
+ }
+ else {
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+ case DASM_STOP: goto stop;
+ case DASM_SECTION:
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n]; goto stop;
+ case DASM_ESC: p++; ofs += 4; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+ case DASM_REL_LG:
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+ if (n >= 0) { CK(n >= 10 || *pl < 0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+ pl += 10; n = *pl;
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+putrel:
+ n = *pl;
+ if (n < 0) {
+ /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ }
+ else {
+linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM:
+ case DASM_IMM16:
+#ifdef DASM_CHECKS
+ CK((n & ((1 << ((ins >> 10) & 31)) - 1)) == 0, RANGE_I);
+ if ((ins & 0x8000))
+ CK(((n + (1 << (((ins >> 5) & 31) - 1))) >> ((ins >> 5) & 31)) == 0, RANGE_I);
+ else
+ CK((n >> ((ins >> 5) & 31)) == 0, RANGE_I);
+#endif
+ b[pos++] = n;
+ break;
+ case DASM_IMMV8:
+ CK((n & 3) == 0, RANGE_I);
+ n >>= 2;
+ /* fallthrough */
+ case DASM_IMML8:
+ case DASM_IMML12:
+ CK(n >= 0 ? ((n >> ((ins >> 5) & 31)) == 0) :
+ (((-n) >> ((ins >> 5) & 31)) == 0),
+ RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_IMM12:
+ CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+ b[pos++] = n;
+ break;
+ }
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK) return D->status;
+ {
+ int pc;
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC | pc;
+ }
+#endif
+
+ {
+ /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: p++; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+ case DASM_IMM: case DASM_IMM12: case DASM_IMM16:
+ case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break;
+ }
+ }
+stop: (void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st) ((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ char *base = (char *)buffer;
+ unsigned int *cp = (unsigned int *)buffer;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+ case DASM_STOP:
+ case DASM_SECTION:
+ goto stop;
+ case DASM_ESC:
+ //*cp++ = *p++; //jturnsek: do I need to swap this also?
+ *cp++ = ((*p >> 16) & 0x0000ffff) | ((*p << 16) & 0xffff0000); /* jturnsek: swap of half-words!!! */
+ p++;
+ break;
+ case DASM_REL_EXT:
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), !(ins & 2048));
+ goto patchrel;
+ case DASM_ALIGN:
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x8000f3af; /* jturnsek: NOP.W */
+ break;
+ case DASM_REL_LG:
+ CK(n >= 0, UNDEF_LG);
+ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
+patchrel:
+ if ((ins & 0x800) == 0) {
+ /* jturnsek: B or BL */
+ if (cp[-1] & 0x10000000) {
+ /* BL */
+ CK((n & 1) == 0 && ((n + 0x01000000) >> 25) == 0, RANGE_REL);
+ cp[-1] |= ((((n & 0x1000000) >> 24) & 0x1) << 10) |
+ (((~((n & 0x800000) >> 23) & 0x1) ^ (((n & 0x1000000) >> 24) & 0x1)) << 29) |
+ (((~((n & 0x400000) >> 22) & 0x1) ^ (((n & 0x1000000) >> 24) & 0x1)) << 27) |
+ ((n >> 12) & 0x3ff) |
+ (((n >> 1) & 0x7ff) << 16);
+ }
+ else {
+ /* B (T3) */
+ CK((n & 1) == 0 && ((n + 0x00100000) >> 21) == 0, RANGE_REL);
+ cp[-1] |= ((((n & 0x100000) >> 20) & 0x1) << 10) |
+ ((((n & 0x80000) >> 19) & 0x1) << 27) |
+ ((((n & 0x40000) >> 18) & 0x1) << 29) |
+ ((n >> 12) & 0x3f) |
+ (((n >> 1) & 0x7ff) << 16);
+ }
+ }
+ else if ((ins & 0x1000)) {
+ CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL);
+ goto patchimml8;
+ }
+ else if ((ins & 0x2000) == 0) {
+ CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL);
+ goto patchimml;
+ }
+ else {
+ CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL);
+ n >>= 2;
+ goto patchimmv;
+ }
+ break;
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins - 10] = (void *)(base + n);
+ break;
+ case DASM_LABEL_PC:
+ break;
+ case DASM_IMM:
+ if (((ins >> 5) & 31) == 2) {
+ /* 2 bit shift for load/store lsl */
+ cp[-1] |= ((n & 0x3) << 20);
+ }
+ else {
+ /* 5 bit shift */
+ cp[-1] |= ((n & 0x3) << 22) | ((n & 0x1c) << 26);
+ }
+ //cp[-1] |= ((n >> ((ins >> 10) & 31)) & ((1 << ((ins >> 5) & 31)) - 1)) << (ins & 31);
+ break;
+ case DASM_IMM12:
+ if (dasm_imm12((unsigned int)n) == -2) {
+ cp[-1] ^= 0x00000300;
+ cp[-1] &= ~0x00000010;
+ cp[-1] |= ((((n) & 0xff) << 16) | (((n) & 0x700) << 20) | (((n) & 0x800) >> 1));
+ }
+ else {
+ cp[-1] |= dasm_imm12((unsigned int)n);
+ }
+ break;
+ case DASM_IMM16:
+ cp[-1] |= ((n & 0xf000) >> 12) |
+ ((n & 0x0800) >> 1) |
+ ((n & 0x0700) << 20) |
+ ((n & 0x00ff) << 16);
+ break;
+ case DASM_IMML8:
+patchimml8:
+ cp[-1] |= n >= 0 ? (0x02000000 | ((n & 0xff) << 16)) : ((-n & 0xff) << 16);
+ break;
+ case DASM_IMML12:
+patchimml:
+ cp[-1] |= n >= 0 ? (0x00000080 | ((n & 0xfff) << 16)) : ((-n & 0xfff) << 16);
+ if (((cp[-1] & 0x0000000f) != 0x0000000f) && (n < 0)) {
+ CK(-255 <= n && n < 0, RANGE_I);
+ cp[-1] &= ~0x03000000;
+ cp[-1] |= 0x0c000000;
+ }
+ break;
+ case DASM_IMMV8:
+patchimmv:
+ cp[-1] |= n >= 0 ? (0x00000080 | ((n & 0xff) << 16)) : ((-n & 0xff) << 16);
+ break;
+ default:
+ *cp++ = ((ins >> 16) & 0x0000ffff) | ((ins << 16) & 0xffff0000); /* jturnsek: swap of half-words!!! */
+ break;
+ }
+ }
+stop: (void)0;
+ }
+ }
+
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc*sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
+ if (pos > 0) return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG | i; break; }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC | (D->section - D->sections);
+ return D->status;
+}
+#endif
+
diff --git a/dynasm/dasm_armv7m.lua b/dynasm/dasm_armv7m.lua
new file mode 100644
index 00000000..8e877d26
--- /dev/null
+++ b/dynasm/dasm_armv7m.lua
@@ -0,0 +1,1010 @@
+------------------------------------------------------------------------------
+-- DynASM ARMV7M module.
+--
+-- Copyright (C) 2018 Jernej Turnsek. All rights reserved.
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+ arch = "armv7m",
+ description = "DynASM ARMV7M module",
+ version = "1.4.0",
+ vernum = 10400,
+ release = "2018-12-07",
+ author = "Jernej Turnsek",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex = bit.ror, bit.tohex
+local bxor = bit.bxor
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+ "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+ map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n,name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+ for i = 1,nn-1 do
+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
+ end
+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+ local w = assert(map_action[action], "bad action name `"..action.."'")
+ wputxw(w * 0x10000 + (val or 0))
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ if #actlist == actargs[1] then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+ if n <= 0x000fffff then waction("ESC") end
+ wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+ local pos = #actlist+1
+ actlist[pos] = ""
+ return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ if n <= 0x000fffff then
+ insert(actlist, pos+1, n)
+ n = map_action.ESC * 0x10000
+ end
+ actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 2047 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=20,next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=20,next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=20,next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n > 2047 then werror("too many extern labels") end
+ next_extern = n + 1
+ t[name] = n
+ map_extern_[n] = name
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ out:write("Extern labels:\n")
+ for i=0,next_extern-1 do
+ out:write(format(" %s\n", map_extern_[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=0,next_extern-1 do
+ out:write(" \"", map_extern_[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { sp = "r13", lr = "r14", pc = "r15", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", }
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, }
+
+local map_cond = {
+ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+ hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+-- Template strings for ARM instructions.
+-- jturnsek: dasm_encode will do the swap of half-words!!!
+local map_op = {
+ and_3 = "ea000000DNPs",
+ eor_3 = "ea800000DNPs",
+ sub_3 = "eba00000DNPs",
+ rsb_3 = "ebc00000DNPs",
+ add_3 = "eb000000DNPs",
+ sbc_3 = "eb600000DNPs",
+ tst_2 = "ea100f00NP",
+ cmp_2 = "ebb00f00NP",
+ cmn_2 = "eb100f00NP",
+ orr_3 = "ea400000DNPs",
+ mov_2 = "ea4f0000DPs",
+ bic_3 = "ea200000DNPs",
+ mvn_2 = "ea6f0000DPs",
+
+ and_4 = "ea000000DNMps",
+ eor_4 = "ea800000DNMps",
+ sub_4 = "eba00000DNMps",
+ rsb_4 = "ebc00000DNMps",
+ add_4 = "eb000000DNMps",
+ sbc_4 = "eb600000DNMps",
+ tst_3 = "ea100f00NMp",
+ cmp_3 = "ebb00f00NMp",
+ cmn_3 = "eb100f00NMp",
+ orr_4 = "ea400000DNMps",
+ mov_3 = "ea4f0000DMps",
+ bic_4 = "ea200000DNMps",
+ mvn_3 = "ea6f0000DMps",
+
+ lsl_3 = "ea400000DNws",
+ lsr_3 = "ea400010DNws",
+ asr_3 = "ea400020DNws",
+ ror_3 = "ea400030DNws",
+
+ smull_4 = "fb800000SDNM",
+
+ clz_2 = "fab0f080Da", -- a is used for Consistent(M)
+ rbit_2 = "fa90f0a0Da", -- a is used for Consistent(M)
+
+ str_2 = "f8400000SL", str_3 = "f8400000SL", str_4 = "f8400000SL",
+ strb_2 = "f8000000SL", strb_3 = "f8000000SL", strb_4 = "f8000000SL",
+ ldr_2 = "f8500000SL", ldr_3 = "f8500000SL", ldr_4 = "f8500000SL",
+ ldrb_2 = "f8100000SL", ldrb_3 = "f8100000SL", ldrb_4 = "f8100000SL",
+ strh_2 = "f8200000SL", strh_3 = "f8200000SL",
+ ldrh_2 = "f8300000SL", ldrh_3 = "f8300000SL",
+ ldrd_3 = "e8500000SDL", ldrd_4 = "e8500000SDL",
+ strd_3 = "e8400000SDL", strd_4 = "e8400000SDL",
+
+ ldm_2 = "e8900000oR",
+ pop_1 = "e8bd0000R",
+ push_1 = "e92d0000R",
+
+ b_1 = "f0009000B",
+ bl_1 = "f000d000B",
+ bx_1 = "bf004700C",
+ blx_1 = "bf004780C",
+
+ nop_0 = "f3af8000",
+ bkpt_1 = "bf00be00K",
+
+ ["vadd.f64_3"] = "ee300b00Gdnm",
+ ["vsub.f64_3"] = "ee300b40Gdnm",
+ ["vmul.f64_3"] = "ee200b00Gdnm",
+ ["vdiv.f64_3"] = "ee800b00Gdnm",
+ ["vcmp.f64_2"] = "eeb40b40Gdm",
+ ["vcvt.f64.s32_2"] = "eeb80bc0GdFm",
+ ["vsqrt.f64_2"] = "eeb10bc0Gdm",
+
+ vldr_2 = "ed100a00dl|ed100b00Gdl",
+ vstr_2 = "ed000a00dl|ed000b00Gdl",
+ vldm_2 = "ec900a00or",
+ vpop_1 = "ecbd0a00r",
+ vstmdb_2 = "ed000a00or",
+ vpush_1 = "ed2d0a00r",
+
+ ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY",
+ vmov_2 = "ee100a10Sn|ee000a10nS",
+ vmov_3 = "ec500a10SNm|ec400a10mSN|ec500b10GSNm|ec400b10GmSN",
+ vmrs_0 = "eef1fa10",
+
+ it_1 = "bf00bf08c",
+ ite_1 = "bf00bf04c",
+ itt_1 = "bf00bf04c",
+ ittt_1 = "bf00bf02c",
+ itttt_1 = "bf00bf01c",
+ iteee_1 = "bf00bf01c",
+}
+
+-- Add mnemonics for "s" variants.
+do
+ local t = {}
+ for k,v in pairs(map_op) do
+ if sub(v, -1) == "s" then
+ local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2)
+ t[sub(k, 1, -3).."s"..sub(k, -2)] = v2
+ end
+ end
+ for k,v in pairs(t) do
+ map_op[k] = v
+ end
+end
+
+------------------------------------------------------------------------------
+
+local function parse_gpr(expr)
+ local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$")
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ if not reg then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ expr = reg
+ end
+ local r = match(expr, "^r(1?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 15 then return r, tp end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_gpr_pm(expr)
+ local pm, expr2 = match(expr, "^([+-]?)(.*)$")
+ return parse_gpr(expr2), (pm == "-")
+end
+
+local function parse_vr(expr, tp)
+ local t, r = match(expr, "^([sd])([0-9]+)$")
+ if t == tp then
+ r = tonumber(r)
+ if r <= 31 then
+ if t == "s" then return shr(r, 1), band(r, 1) end
+ return band(r, 15), shr(r, 4)
+ end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_reglist(reglist)
+ reglist = match(reglist, "^{%s*([^}]*)}$")
+ if not reglist then werror("register list expected") end
+ local rr = 0
+ for p in gmatch(reglist..",", "%s*([^,]*),") do
+ local rbit = shl(1, parse_gpr(gsub(p, "%s+$", "")))
+ if band(rr, rbit) ~= 0 then
+ werror("duplicate register `"..p.."'")
+ end
+ rr = rr + rbit
+ end
+ return rr
+end
+
+local function parse_vrlist(reglist)
+ local ta, ra, tb, rb = match(reglist,
+ "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$")
+ ra, rb = tonumber(ra), tonumber(rb)
+ if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then
+ local nr = rb + 1 - ra
+ if ta == "s" then
+ return shl(shr(ra, 1), 12) + shl(band(ra, 1), 22) + nr
+ else
+ return shl(band(ra, 15), 12) + shl(shr(ra, 4), 22) + nr * 2 + 0x100
+ end
+ end
+ werror("register list expected")
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = tonumber(imm)
+ if n then
+ local m = sar(n, scale)
+ if shl(m, scale) == n then
+ if signed then
+ local s = sar(m, bits-1)
+ if s == 0 then return shl(m, shift)
+ elseif s == -1 then return shl(m + shl(1, bits), shift) end
+ else
+ if sar(m, bits) == 0 then return shl(m, shift) end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+ return 0
+ end
+end
+
+local function parse_imm12(imm)
+ local n = tonumber(imm)
+ if n then
+ if n <= 255 then
+ return band(n, 0xff)
+ elseif band(n, 0xff00ff00) == 0 and band(shr(n, 16), 0xff) == band(n, 0xff) then
+ return band(n, 0xff) + shl(1, 12)
+ elseif band(n, 0x00ff00ff) == 0 and band(shr(n, 16), 0xff00) == band(n, 0xff00) then
+ return band(shr(n, 8), 0xff) + shl(2, 12)
+ elseif band(shr(n, 24), 0xff) == band(n, 0xff) and
+ band(shr(n, 16), 0xff) == band(n, 0xff) and
+ band(shr(n, 8), 0xff) == band(n, 0xff) then
+ return band(n, 0xff) + shl(3, 12)
+ else
+ for i=31, 8, -1 do
+ n = ror(n, 1)
+ if n >= 128 and n <= 255 then
+ return shl(band(i, 0x10), 22) + shl(band(i, 0x0e), 11) + shl(band(i, 0x01), 7) + band(n, 0x7f)
+ end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM12", 0, imm)
+ return 0
+ end
+end
+
+local function parse_imm16(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = tonumber(imm)
+ if n then
+ if shr(n, 16) == 0 then
+ return band(n, 0x00ff) + shl(band(n, 0x0700), 4) + shl(band(n, 0x0800), 15) + shl(band(n, 0xf000), 4)
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM16", 32*16, imm)
+ return 0
+ end
+end
+
+local function parse_imm_load(imm, ext, flags)
+ local n = tonumber(imm)
+ local p, w = match(flags, "P"), match(flags, "W")
+ if n then
+ if ext then
+ if n >= -1020 and n <= 1020 then
+ local up = 0x00800000
+ if n < 0 then n = -n; up = 0 end
+ return n/4 + up + (p and 0x01000000 or 0) + (w and 0x00200000 or 0)
+ end
+ else
+ if w then
+ if n >= -255 and n <= 255 then
+ if n >= 0 then
+ return n + 0x00000a00 + (p and 0x00000400 or 0) + (w and 0x00000100 or 0)
+ else
+ return -n + 0x00000800 + (p and 0x00000400 or 0) + (w and 0x00000100 or 0)
+ end
+ end
+ else
+ if n >= 0 and n <= 4095 then
+ return n + 0x00800000
+ elseif n >= -255 and n < 0 then
+ return -n + 0x00000800 + (p and 0x00000400 or 0)
+ end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction(ext and "IMMV8" or "IMML12", 32768 + (ext and 32*8 or 32*12), imm)
+ local pw = 0
+ if p then pw = (ext and 0x01000000 or 0) end
+ if w then pw = (ext and 0x00200000 or 0) end
+ return pw
+ end
+end
+
+local function parse_shift(shift)
+ if shift == "rrx" then
+ return 3 * 16
+ else
+ local s, s2 = match(shift, "^(%S+)%s*(.*)$")
+ s = map_shift[s]
+ if not s then werror("expected shift operand") end
+ if sub(s2, 1, 1) == "#" then
+ local imm = parse_imm(s2, 5, 0, 0, false)
+ return shl(band(imm, 0x1c), 10) + shl(band(imm, 0x03), 6) + shl(s, 4)
+ else
+ werror("expected immediate shift operand")
+ end
+ end
+end
+
+local function parse_shift_load(shift)
+ if not match(shift, "lsl", 1) then
+ werror("expected lsl shift operand")
+ else
+ local s, s2 = match(shift, "^(%S+)%s*(.*)$")
+ if not s then werror("expected shift operand") end
+ if sub(s2, 1, 1) == "#" then
+ return parse_imm(s2, 2, 4, 0, false)
+ else
+ werror("expected immediate shift operand")
+ end
+ end
+end
+
+local function parse_label(label, def)
+ local prefix = sub(label, 1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+ return "PC", 0, sub(label, 3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "LG", map_global[sub(label, 3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+ if match(label, "^[1-9]$") then
+ return "LG", 10+tonumber(label)
+ end
+ else
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(label, "^([<>])([1-9])$")
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
+ return "LG", lnum + (dir == ">" and 0 or 10)
+ end
+ -- extern label (extern label reference)
+ local extname = match(label, "^extern%s+(%S+)$")
+ if extname then
+ return "EXT", map_extern[extname]
+ end
+ end
+ werror("bad label `"..label.."'")
+end
+
+local function parse_load(params, nparams, n, op)
+ local ext = (band(op, 0x10000000) == 0)
+ local pn = params[n]
+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+ local p2 = params[n+1]
+ if not p1 then
+ if not p2 then
+ if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then
+ local mode, n, s = parse_label(pn, false)
+ waction("REL_"..mode, n + (ext and 0x2800 or 0x0800), s, 1)
+ return op + 15 * 65536 + (ext and 0x01000000 or 0) --set P if ext==true
+ end
+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local d, tp = parse_gpr(reg)
+ if tp then
+ waction(ext and "IMMV8" or "IMML12", 32768 + (ext and 32*8 or 32*12),
+ format(tp.ctypefmt, tailr))
+ return op + shl(d, 16) + (ext and 0x01000000 or 0) --set P if ext==true, using imm12 if ext==false
+ end
+ end
+ end
+ werror("expected address operand")
+ end
+ if p2 then
+ if wb == "!" then werror("bad use of '!'") end
+ local p3 = params[n+2]
+ op = op + shl(parse_gpr(p1), 16)
+ local imm = match(p2, "^#(.*)$")
+ if imm then
+ if p3 then werror("too many parameters") end
+ op = op + parse_imm_load(imm, ext, "W") --always imm8, set W
+ else
+ if ext then werror("not in ARMV7M") end
+ op = op + parse_gpr(p2)
+ if p3 then op = op + parse_shift_load(p3) end
+ end
+ else
+ local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$")
+ op = op + shl(parse_gpr(p1a), 16)
+ if p2 ~= "" then
+ local imm = match(p2, "^,%s*#(.*)$")
+ if imm then
+ op = op + parse_imm_load(imm, ext, (wb == "!" and "PW" or "P")) --set P (and W)
+ else
+ local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$")
+ if ext then werror("not in ARMV7M") end
+ op = op + parse_gpr(p2a)
+ if p3 ~= "" then
+ op = op + parse_shift_load(p3)
+ end
+ end
+ else
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + (ext and 0x01000000 or 0) + 0x00800000 --no imm, thus using imm12 if ext==false, set U
+ end
+ end
+ return op
+end
+
+local function parse_vload(q)
+ local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$")
+ if reg then
+ local d = shl(parse_gpr(reg), 16)
+ if imm == "" then return d end
+ imm = match(imm, "^,%s*#(.*)$")
+ if imm then
+ local n = tonumber(imm)
+ if n then
+ if n >= -1020 and n <= 1020 and n%4 == 0 then
+ return d + (n >= 0 and n/4+0x00800000 or -n/4)
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMMV8", 32768 + 32*8, imm)
+ return d
+ end
+ end
+ else
+ if match(q, "^[<>=%-]") or match(q, "^extern%s+") then
+ local mode, n, s = parse_label(q, false)
+ waction("REL_"..mode, n + 0x2800, s, 1)
+ return 15 * 65536
+ end
+ local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local d, tp = parse_gpr(reg)
+ if tp then
+ waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr))
+ return shl(d, 16)
+ end
+ end
+ end
+ werror("expected address operand")
+end
+
+local function parse_it(name, cond)
+ local mask, it = 0, match(name, "it", 1)
+ if not it then
+ werror("not IT instruction")
+ end
+ local it2 = sub(name, 3, -1)
+ if not it2 then
+ return shl(map_cond[cond], 4)
+ end
+ local shift = 3
+ for p in gmatch(it2, "[te]") do
+ if p == "t" then
+ mask = mask + shl(band(map_cond[cond], 1), shift)
+ elseif p == "e" then
+ mask = mask + shl(band(bxor(map_cond[cond], 15), 1), shift)
+ else
+ werror("wrong syntax")
+ end
+ if shift ~= 0 then shift = shift - 1 end
+ end
+ return shl(map_cond[cond], 4) + mask
+end
+
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+ local op = tonumber(sub(template, 1, 8), 16)
+ local n = 1
+ local vr = "s"
+
+ -- Process each character.
+ for p in gmatch(sub(template, 9), ".") do
+ local q = params[n]
+ if p == "D" then
+ op = op + shl(parse_gpr(q), 8); n = n + 1
+ elseif p == "N" then
+ op = op + shl(parse_gpr(q), 16); n = n + 1
+ elseif p == "S" then
+ op = op + shl(parse_gpr(q), 12); n = n + 1
+ elseif p == "M" then
+ op = op + parse_gpr(q); n = n + 1
+ elseif p == "a" then
+ local m = parse_gpr(q)
+ op = op + m + shl(m, 16); n = n + 1
+ elseif p == "d" then
+ local r,h = parse_vr(q, vr); op = op + shl(r, 12) + shl(h, 22); n = n + 1
+ elseif p == "n" then
+ local r,h = parse_vr(q, vr); op = op + shl(r, 16) + shl(h, 7); n = n + 1
+ elseif p == "m" then
+ local r,h = parse_vr(q, vr); op = op + r + shl(h, 5); n = n + 1
+ elseif p == "P" then
+ local imm = match(q, "^#(.*)$")
+ if imm then
+ op = op + 0x6000000 + parse_imm12(imm)
+ else
+ op = op + parse_gpr(q)
+ end
+ n = n + 1
+ elseif p == "p" then
+ op = op + parse_shift(q); n = n + 1
+ elseif p == "L" then
+ op = parse_load(params, nparams, n, op)
+ elseif p == "l" then
+ op = op + parse_vload(q)
+ elseif p == "B" then
+ local mode, n, s = parse_label(q, false)
+ waction("REL_"..mode, n, s, 1)
+ elseif p == "C" then -- blx gpr only
+ if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then
+ local r = parse_gpr(q)
+ op = op + shl(r, 3)
+ else
+ werror("not supported")
+ end
+ elseif p == "c" then
+ op = op + parse_it(params.op, q)
+ elseif p == "F" then
+ vr = "s"
+ elseif p == "G" then
+ vr = "d"
+ elseif p == "o" then
+ local r, wb = match(q, "^([^!]*)(!?)$")
+ op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0)
+ n = n + 1
+ elseif p == "R" then
+ if params[1] == "{r15}" and params.op == "pop" then
+ op = 0xf85dfb04; -- pop {pc} coded as T3
+ elseif params[1] == "{r12}" and params.op == "pop" then
+ op = 0xf85dcb04; -- pop {r12} coded as T3
+ elseif params[1] == "{r12}" and params.op == "push" then
+ op = 0xf84dcd04; -- push {r12} coded as T3
+ else
+ op = op + parse_reglist(q)
+ end
+ n = n + 1
+ elseif p == "r" then
+ op = op + parse_vrlist(q); n = n + 1
+ elseif p == "w" then
+ local imm = match(q, "^#(.*)$")
+ if imm then
+ local imm5 = parse_imm(q, 5, 0, 0, false)
+ local m = band(op, 0x000f0000)
+ op = op - m + 0x000f0000 + shr(m, 16) + shl(band(imm5, 0x1c), 10) + shl(band(imm5, 0x03), 6); n = n + 1
+ else
+ local type = band(op, 0x00000030)
+ op = op - 0xea400000 + 0xfa00f000 - type + shl(type, 17) + parse_gpr(q)
+ end
+ elseif p == "Y" then
+ local imm = tonumber(match(q, "^#(.*)$")); n = n + 1
+ if not imm or shr(imm, 8) ~= 0 then
+ werror("bad immediate operand")
+ end
+ op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f)
+ elseif p == "K" then
+ local imm = tonumber(match(q, "^#(.*)$")); n = n + 1
+ if not imm or shr(imm, 8) ~= 0 then
+ werror("bad immediate operand")
+ end
+ op = op + band(imm, 0x00ff)
+ elseif p == "s" then
+ -- Ignored.
+ else
+ assert(false)
+ end
+ end
+ wputpos(pos, op)
+end
+
+map_op[".template__"] = function(params, template, nparams)
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 3 positions.
+ if secpos+3 > maxsecpos then wflush() end
+ local pos = wpos()
+ local lpos, apos, spos = #actlist, #actargs, secpos
+
+ local ok, err
+ for t in gmatch(template, "[^|]+") do
+ ok, err = pcall(parse_template, params, t, nparams, pos)
+ if ok then return end
+ secpos = spos
+ actlist[lpos+1] = nil
+ actlist[lpos+2] = nil
+ actlist[lpos+3] = nil
+ actargs[apos+1] = nil
+ actargs[apos+2] = nil
+ actargs[apos+3] = nil
+ end
+ error(err, 0)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+ if mode == "EXT" then werror("bad label definition") end
+ waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+map_op[".long_*"] = function(params)
+ if not params then return "imm..." end
+ for _,p in ipairs(params) do
+ local n = tonumber(p)
+ if not n then werror("bad immediate `"..p.."'") end
+ if n < 0 then n = n + 2^32 end
+ wputw(n)
+ if secpos+2 > maxsecpos then wflush() end
+ end
+end
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ if secpos+1 > maxsecpos then wflush() end
+ local align = tonumber(params[1])
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _,name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION", num)
+ wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+
+local map_cond_b = {
+ eq = "f0008000B", ne = "f0408000B", cs = "f0808000B", cc = "f0c08000B", mi = "f1008000B", pl = "f1408000B", vs = "f1808000B", vc = "f1c08000B",
+ hi = "f2008000B", ls = "f2408000B", ge = "f2808000B", lt = "f2c08000B", gt = "f3008000B", le = "f3408000B", al = "f3808000B",
+ hs = "f0808000B", lo = "f0c08000B",
+}
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = function(t, k)
+ local v = map_coreop[k]
+ if v then return v end
+ local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$")
+ local cv = map_cond[cc]
+ if cv then
+ local v = rawget(t, k1..k2)
+ if type(v) == "string" and k1 == "b" then
+ local scv = map_cond_b[cc]
+ return scv
+ elseif type(v) == "string" then
+ return v
+ end
+ end
+ end })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/src/Makefile b/src/Makefile
old mode 100644
new mode 100755
index 30d64be2..83d592f3
--- a/src/Makefile
+++ b/src/Makefile
@@ -36,7 +36,8 @@ CC= $(DEFAULT_CC)
# to slow down the C part by not omitting it. Debugging, tracebacks and
# unwinding are not affected -- the assembler part has frame unwind
# information and GCC emits it where needed (x64) or with -g (see CCDEBUG).
-CCOPT= -O2 -fomit-frame-pointer
+#CCOPT= -O2 -fomit-frame-pointer
+CCOPT= -O2 -fomit-frame-pointer -D__ARM_ARCH_7M__ -DLUAJIT_NO_UNWIND -DLUAJIT_DISABLE_PROFILE
# Use this if you want to generate a smaller binary (but it's slower):
#CCOPT= -Os -fomit-frame-pointer
# Note: it's no longer recommended to use -O3 with GCC 4.x.
@@ -49,7 +50,7 @@ CCOPT= -O2 -fomit-frame-pointer
#
CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
CCOPT_x64=
-CCOPT_arm=
+CCOPT_arm= -mthumb -march=armv7e-m -mfloat-abi=hard -mfpu=fpv5-d16
CCOPT_arm64=
CCOPT_ppc=
CCOPT_mips=
@@ -71,7 +72,7 @@ CCWARN= -Wall
# as dynamic mode.
#
# Mixed mode creates a static + dynamic library and a statically linked luajit.
-BUILDMODE= mixed
+BUILDMODE= static
#
# Static mode creates a static library and a statically linked luajit.
#BUILDMODE= static
@@ -242,6 +243,9 @@ ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH)))
TARGET_LJARCH= x86
else
ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
+ ifneq (,$(findstring __ARM_ARCH_7M__ ,$(TARGET_TESTARCH)))
+ TARGET_ARCH= -D__ARM_ARCH_7M__=1
+ endif
TARGET_LJARCH= arm
else
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
@@ -443,6 +447,9 @@ ifeq (x64,$(TARGET_LJARCH))
endif
else
ifeq (arm,$(TARGET_LJARCH))
+ ifneq (,$(findstring __ARM_ARCH_7M__ ,$(TARGET_TESTARCH)))
+ DASM_ARCH= armv7m
+ endif
ifeq (iOS,$(TARGET_SYS))
DASM_AFLAGS+= -D IOS
endif
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index 9ee47ada..ca0ee47e 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -60,7 +60,11 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
#if LJ_TARGET_X86ORX64
#include "../dynasm/dasm_x86.h"
#elif LJ_TARGET_ARM
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+#include "../dynasm/dasm_armv7m.h"
+#else
#include "../dynasm/dasm_arm.h"
+#endif
#elif LJ_TARGET_ARM64
#include "../dynasm/dasm_arm64.h"
#elif LJ_TARGET_PPC
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 7baa011f..1fc72a9d 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -114,6 +114,20 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
emit_asm_words(ctx, p, n-4);
ins = *(uint32_t *)(p+n-4);
#if LJ_TARGET_ARM
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+ if ((ins & 0xd000f800) == 0xd000f000) {
+ fprintf(ctx->fp, "\tbl %s\n", sym);
+ }
+ else if ((ins & 0xd000f800) == 0x9000f000) {
+ fprintf(ctx->fp, "\tb %s\n", sym);
+ }
+ else {
+ fprintf(stderr,
+ "Error: unsupported opcode %08x for %s symbol relocation.\n",
+ ins, sym);
+ exit(1);
+ }
+#else
if ((ins & 0xff000000u) == 0xfa000000u) {
fprintf(ctx->fp, "\tblx %s\n", sym);
} else if ((ins & 0x0e000000u) == 0x0a000000u) {
@@ -125,6 +139,7 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
ins, sym);
exit(1);
}
+#endif
#elif LJ_TARGET_ARM64
if ((ins >> 26) == 0x25u) {
fprintf(ctx->fp, "\tbl %s\n", sym);
@@ -193,6 +208,16 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc
break;
}
#endif
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+ fprintf(ctx->fp,
+ "\n\t.globl %s\n"
+ "\t.thumb_func\n"
+ "\t.hidden %s\n"
+ "\t.type %s, " ELFASM_PX "%s\n"
+ "\t.size %s, %d\n"
+ "%s:\n",
+ name,name,name,isfunc ? "function" : "object",name,size,name);
+#else
fprintf(ctx->fp,
"\n\t.globl %s\n"
"\t.hidden %s\n"
@@ -200,6 +225,7 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc
"\t.size %s, %d\n"
"%s:\n",
name, name, name, isfunc ? "function" : "object", name, size, name);
+#endif
break;
case BUILD_coffasm:
fprintf(ctx->fp, "\n\t.globl %s\n", name);
@@ -242,8 +268,16 @@ void emit_asm(BuildCtx *ctx)
int i, rel;
fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+ fprintf(ctx->fp, "\t.syntax unified\n");
+ fprintf(ctx->fp, "\t.thumb\n");
+#endif
fprintf(ctx->fp, "\t.text\n");
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+ emit_asm_align(ctx, 2);
+#else
emit_asm_align(ctx, 4);
+#endif
#if LJ_TARGET_PS3
emit_asm_label(ctx, ctx->beginsym, ctx->codesz, 0);
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index 20e60493..4cadd673 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -38,10 +38,17 @@
#define MAX_SIZE_T (~(size_t)0)
#define MALLOC_ALIGNMENT ((size_t)8U)
+#if LJ_TARGET_NUTTX
+#define DEFAULT_GRANULARITY ((size_t)32U * (size_t)1024U)
+#define DEFAULT_TRIM_THRESHOLD ((size_t)512U * (size_t)1024U)
+#define DEFAULT_MMAP_THRESHOLD ((size_t)32U * (size_t)1024U)
+#define MAX_RELEASE_CHECK_RATE 255
+#else
#define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U)
#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
#define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U)
#define MAX_RELEASE_CHECK_RATE 255
+#endif
/* ------------------- size_t and alignment properties -------------------- */
@@ -90,9 +97,36 @@
#include <errno.h>
/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
+#if LJ_TARGET_NUTTX
+#include <nuttx/config.h>
+#include <nuttx/mm/mm.h>
+
+static struct mm_heap_s *g_alloc_heap;
+
+static void init_mmap(void)
+{
+ static uint8_t buffer[CONFIG_LUAJIT_ALLOC_SIZE]
+ locate_data(CONFIG_LUAJIT_ALLOC_SECTION_NAME);
+ g_alloc_heap = mm_initialize("alloc",
+ (void *)buffer,
+ CONFIG_LUAJIT_ALLOC_SIZE);
+}
+#define INIT_MMAP() init_mmap()
+
+#define CALL_MMAP(prng, size) mm_malloc(g_alloc_heap, (size_t)size)
+#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s)
+
+static int CALL_MUNMAP(void *ptr, size_t size)
+{
+ if (ptr == NULL) return -1;
+ mm_free(g_alloc_heap, ptr);
+ return 0;
+}
+#else
#include <sys/mman.h>
#define LJ_ALLOC_MMAP 1
+#endif
#if LJ_64
diff --git a/src/lj_arch.h b/src/lj_arch.h
index 882c99cb..92fbae5e 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -39,6 +39,7 @@
#define LUAJIT_OS_OSX 3
#define LUAJIT_OS_BSD 4
#define LUAJIT_OS_POSIX 5
+#define LUAJIT_OS_NUTTX 6
/* Number mode. */
#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
@@ -110,6 +111,8 @@
#define LJ_OS_NAME "BSD"
#elif LUAJIT_OS == LUAJIT_OS_POSIX
#define LJ_OS_NAME "POSIX"
+#elif LUAJIT_OS == LUAJIT_OS_NUTTX
+#define LJ_OS_NAME "NUTTX"
#else
#define LJ_OS_NAME "Other"
#endif
@@ -119,6 +122,7 @@
#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD)
#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
+#define LJ_TARGET_NUTTX (LUAJIT_OS == LUAJIT_OS_NUTTX)
#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
#if TARGET_OS_IPHONE
@@ -214,8 +218,11 @@
#endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
-
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+#define LJ_ARCH_NAME "armv7m"
+#else
#define LJ_ARCH_NAME "arm"
+#endif
#define LJ_ARCH_BITS 32
#define LJ_ARCH_ENDIAN LUAJIT_LE
#if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
@@ -227,8 +234,12 @@
#define LJ_ABI_EABI 1
#define LJ_TARGET_ARM 1
#define LJ_TARGET_EHRETREG 0
-#define LJ_TARGET_EHRAREG 14
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+/* No need to test jump address range, because we use veeners. */
+//#define LJ_TARGET_JUMPRANGE 24 /* +-2^24 = +-16MB */
+#else
#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
+#endif
#define LJ_TARGET_MASKSHIFT 0
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
@@ -236,7 +247,7 @@
#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#define LJ_ARCH_VERSION 80
-#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
+#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
#define LJ_ARCH_VERSION 70
#elif __ARM_ARCH_6T2__
#define LJ_ARCH_VERSION 61
@@ -479,7 +490,7 @@
#if defined(__ARMEB__)
#error "No support for big-endian ARM"
#endif
-#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
+#if __ARM_ARCH_6M__ /*|| __ARM_ARCH_7M__ || __ARM_ARCH_7EM__*/
#error "No support for Cortex-M CPUs"
#endif
#if !(__ARM_EABI__ || LJ_TARGET_IOS)
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 6f5e0c45..429aa161 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -178,7 +178,11 @@ IRFLDEF(FLOFS)
#if LJ_TARGET_X86ORX64
#include "lj_emit_x86.h"
#elif LJ_TARGET_ARM
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+#include "lj_emit_armv7m.h"
+#else
#include "lj_emit_arm.h"
+#endif
#elif LJ_TARGET_ARM64
#include "lj_emit_arm64.h"
#elif LJ_TARGET_PPC
@@ -1655,7 +1659,11 @@ static void asm_loop(ASMState *as)
#if LJ_TARGET_X86ORX64
#include "lj_asm_x86.h"
#elif LJ_TARGET_ARM
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+#include "lj_asm_armv7m.h"
+#else
#include "lj_asm_arm.h"
+#endif
#elif LJ_TARGET_ARM64
#include "lj_asm_arm64.h"
#elif LJ_TARGET_PPC
diff --git a/src/lj_asm_armv7m.h b/src/lj_asm_armv7m.h
new file mode 100644
index 00000000..1bdd4a8a
--- /dev/null
+++ b/src/lj_asm_armv7m.h
@@ -0,0 +1,2520 @@
+/*
+** ARM IR assembler (SSA IR -> machine code).
+** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+ Reg r = IR(ref)->r;
+ if (ra_noreg(r)) {
+ if (!ra_hashint(r) && !iscrossref(as, ref))
+ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
+ r = ra_allocref(as, ref, allow);
+ }
+ ra_noweak(as, r);
+ return r;
+}
+
+/* Allocate a scratch register pair. */
+static Reg ra_scratchpair(ASMState *as, RegSet allow)
+{
+ RegSet pick1 = as->freeset & allow;
+ RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN;
+ Reg r;
+ if (pick2) {
+ r = rset_picktop(pick2);
+ }
+ else {
+ RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN;
+ if (pick) {
+ r = rset_picktop(pick);
+ ra_restore(as, regcost_ref(as->cost[r + 1]));
+ }
+ else {
+ pick = pick1 & (allow << 1) & RSET_GPRODD;
+ if (pick) {
+ r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick) - 1]));
+ }
+ else {
+ r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN);
+ ra_restore(as, regcost_ref(as->cost[r + 1]));
+ }
+ }
+ }
+ lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r);
+ ra_modified(as, r);
+ ra_modified(as, r + 1);
+ RA_DBGX((as, "scratchpair $r $r", r, r + 1));
+ return r;
+}
+
+#if !LJ_SOFTFP
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+ Reg left = irl->r, right = irr->r;
+ if (ra_hasreg(left)) {
+ ra_noweak(as, left);
+ if (ra_noreg(right))
+ right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
+ else
+ ra_noweak(as, right);
+ }
+ else if (ra_hasreg(right)) {
+ ra_noweak(as, right);
+ left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
+ }
+ else if (ra_hashint(right)) {
+ right = ra_allocref(as, ir->op2, allow);
+ left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
+ }
+ else {
+ left = ra_allocref(as, ir->op1, allow);
+ right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
+ }
+ return left | (right << 8);
+}
+#endif
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Generate an exit stub group at the bottom of the reserved MCode memory. */
+static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
+{
+ MCode *mxp = as->mcbot;
+ int i;
+ if (mxp + 8 * 4 + 4*EXITSTUBS_PER_GROUP >= as->mctop)
+ asm_mclimit(as);
+ /* lj_vm_exit_interp_veneer */
+ *mxp++ = ARMI_LDR | ARMF_N(RID_PC) | ARMF_T(RID_PC); /* LDR.W PC, [PC, #-0] */
+ *mxp++ = (MCode)lj_vm_exit_interp;
+ /* lj_vm_exit_handler_veneer */
+ *mxp++ = ARMI_LDR | ARMF_N(RID_PC) | ARMF_T(RID_PC); /* LDR.W PC, [PC, #-0] */
+ *mxp++ = (MCode)lj_vm_exit_handler;
+ /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */
+ *mxp++ = ARMI_STR | ARMI_LSX_U | ARMF_T(RID_LR) | ARMF_N(RID_SP);
+ *mxp = ARMI_BL | ARMC_BL((-4) << 1); /* lj_vm_exit_handler_veneer */
+ mxp++;
+ *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */
+ *mxp++ = group*EXITSTUBS_PER_GROUP;
+ for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
+ *mxp++ = ARMI_B_T4 | ARMC_BL((-5 - i) << 1);
+ lj_mcode_sync(as->mcbot, mxp);
+ lj_mcode_commitbot(as->J, mxp);
+ as->mcbot = mxp;
+ as->mclim = as->mcbot + MCLIM_REDZONE;
+ return mxp - EXITSTUBS_PER_GROUP;
+}
+
+/* Setup all needed exit stubs. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+ ExitNo i;
+ if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
+ lj_trace_err(as->J, LJ_TRERR_SNAPOV);
+ for (i = 0; i < (nexits + EXITSTUBS_PER_GROUP - 1) / EXITSTUBS_PER_GROUP; i++)
+ if (as->J->exitstubgroup[i] == NULL)
+ as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
+}
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guardcc(ASMState *as, ARMCC cc)
+{
+ MCode *target = exitstub_addr(as->J, as->snapno);
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+ *p = ARMI_BL | ARMC_BL((target - p - 1) << 1);
+ emit_branch(as, ARMF_CC(ARMI_B, cc ^ 1), p);
+ return;
+ }
+ emit_branchlink(as, ARMI_BL, target);
+ ARMI_IT(cc);
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM 31
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+ IRIns *ir = as->ir;
+ IRRef i = as->curins;
+ if (i > ref + CONFLICT_SEARCH_LIM)
+ return 0; /* Give up, ref is too far away. */
+ while (--i > ref)
+ if (ir[i].o == conflict)
+ return 0; /* Conflict found. */
+ return 1; /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+ !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+ return (int32_t)sizeof(GCtab);
+ return 0;
+}
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, int lim)
+{
+ IRIns *ir = IR(ref);
+ if (ra_noreg(ir->r)) {
+ if (ir->o == IR_AREF) {
+ if (mayfuse(as, ref)) {
+ if (irref_isk(ir->op2)) {
+ IRRef tab = IR(ir->op1)->op1;
+ int32_t ofs = asm_fuseabase(as, tab);
+ IRRef refa = ofs ? tab : ir->op1;
+ ofs += 8*IR(ir->op2)->i;
+ if (ofs > -lim && ofs < lim) {
+ *ofsp = ofs;
+ return ra_alloc1(as, refa, allow);
+ }
+ }
+ }
+ }
+ else if (ir->o == IR_HREFK) {
+ if (mayfuse(as, ref)) {
+ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+ if (ofs < lim) {
+ *ofsp = ofs;
+ return ra_alloc1(as, ir->op1, allow);
+ }
+ }
+ }
+ else if (ir->o == IR_UREFC) {
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
+ *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
+ return ra_allock(as, (ofs & ~255), allow);
+ }
+ } else if (ir->o == IR_TMPREF) {
+ *ofsp = 0;
+ return RID_SP;
+ }
+ }
+ *ofsp = 0;
+ return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse m operand into arithmetic/logic instructions. */
+static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow, uint32_t *rs)
+{
+ IRIns *ir = IR(ref);
+ if (ra_hasreg(ir->r)) {
+ ra_noweak(as, ir->r);
+ return ARMF_M(ir->r);
+ }
+ else if (irref_isk(ref)) {
+ uint32_t k = emit_isk12(ai, ir->i);
+ if (k)
+ return k;
+ }
+ else if (mayfuse(as, ref)) {
+ if (ir->o >= IR_BSHL && ir->o <= IR_BROR) {
+ Reg m = ra_alloc1(as, ir->op1, allow);
+ ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL :
+ ir->o == IR_BSHR ? ARMSH_LSR :
+ ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR;
+ if (irref_isk(ir->op2)) {
+ return ARMF_M(m) | ARMF_SH(sh, (IR(ir->op2)->i & 31));
+ }
+ else {
+ Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m));
+ *rs = ARMF_RSH(sh, s);
+ return ARMF_M(m);
+ }
+ }
+ else if (ir->o == IR_ADD && ir->op1 == ir->op2) {
+ Reg m = ra_alloc1(as, ir->op1, allow);
+ return ARMF_M(m) | ARMF_SH(ARMSH_LSL, 1);
+ }
+ }
+ return ARMF_M(ra_allocref(as, ref, allow));
+}
+
+/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */
+static IRRef asm_fuselsl2(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL &&
+ irref_isk(ir->op2) && IR(ir->op2)->i == 2)
+ return ir->op1;
+ return 0; /* No fusion. */
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as,
+ ARMIns ai,
+ Reg rd,
+ IRRef ref,
+ RegSet allow,
+ int32_t ofs)
+{
+ IRIns *ir = IR(ref);
+ Reg base;
+ if (ra_noreg(ir->r) && canfuse(as, ir)) {
+ int32_t lim = (!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00)) ? 1024 :
+ (ai & 0x00000080) ? 4096 : 256;
+ if (ir->o == IR_ADD) {
+ int32_t ofs2;
+ if (irref_isk(ir->op2) &&
+ (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim &&
+ (!(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00)) || !(ofs2 & 3))) {
+ ofs = ofs2;
+ ref = ir->op1;
+ }
+ else if (ofs == 0 && !(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00))) {
+ IRRef lref = ir->op1, rref = ir->op2;
+ Reg rn, rm;
+ IRRef sref = asm_fuselsl2(as, rref);
+ if (sref) {
+ rref = sref;
+ ai |= ARMF_LSL(2);
+ }
+ else if ((sref = asm_fuselsl2(as, lref)) != 0) {
+ lref = rref;
+ rref = sref;
+ ai |= ARMF_LSL(2);
+ }
+ rn = ra_alloc1(as, lref, allow);
+ rm = ra_alloc1(as, rref, rset_exclude(allow, rn));
+ emit_tnm(as, ai, rd, rn, rm);
+ return;
+ }
+ }
+ else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00))) {
+ lj_assertA(ofs == 0, "bad usage");
+ ofs = (int32_t)sizeof(GCstr);
+ if (irref_isk(ir->op2)) {
+ ofs += IR(ir->op2)->i;
+ ref = ir->op1;
+ }
+ else if (irref_isk(ir->op1)) {
+ ofs += IR(ir->op1)->i;
+ ref = ir->op2;
+ }
+ else {
+ /* NYI: Fuse ADD with constant. */
+ Reg rn = ra_alloc1(as, ir->op1, allow);
+ uint32_t rs = 0;
+ uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn), &rs);
+ if ((ai & 0x0000fe00) == 0x0000f800)
+ emit_lso(as, ai, rd, rd, ofs);
+ else
+ emit_lsox(as, ai, rd, rd, ofs);
+ emit_dn(as, ARMI_ADD ^ m, rd, rn);
+ if (rs)
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16));
+ return;
+ }
+ if (ofs <= -lim || ofs >= lim) {
+ Reg rn = ra_alloc1(as, ref, allow);
+ Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
+ emit_tnm(as, ai, rd, rn, rm);
+ return;
+ }
+ }
+ }
+ base = ra_alloc1(as, ref, allow);
+#if !LJ_SOFTFP
+ if ((ai & 0x0000ff00) == 0x0000ed00)
+ emit_vlso(as, ai, rd, base, ofs);
+ else
+#endif
+ if ((ai & 0x0000fe00) == 0x0000f800)
+ emit_lso(as, ai, rd, base, ofs);
+ else
+ emit_lsox(as, ai, rd, base, ofs);
+}
+
+#if !LJ_SOFTFP
+/* Fuse to multiply-add/sub instruction. */
+static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
+{
+ IRRef lref = ir->op1, rref = ir->op2;
+ IRIns *irm;
+ if (lref != rref &&
+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+ ra_noreg(irm->r)) ||
+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+ (rref = lref, ai = air, ra_noreg(irm->r))))) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+ Reg right, left = ra_alloc2(as,
+ irm,
+ rset_exclude(rset_exclude(RSET_FPR, dest), add));
+ right = (left >> 8); left &= 255;
+ emit_tnm(as, ai, (dest & 15), (left & 15), (right & 15));
+ if (dest != add) emit_tm(as, ARMI_VMOV_D, (dest & 15), (add & 15));
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+ uint32_t n, nargs = CCI_XNARGS(ci);
+ int32_t ofs = 0;
+#if LJ_SOFTFP
+ Reg gpr = REGARG_FIRSTGPR;
+#else
+ Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0;
+#endif
+ if ((void *)ci->func)
+ emit_call(as, (void *)ci->func);
+#if !LJ_SOFTFP
+ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
+ as->cost[gpr] = REGCOST(~0u, ASMREF_L);
+ gpr = REGARG_FIRSTGPR;
+#endif
+ for (n = 0; n < nargs; n++) {
+ /* Setup args. */
+ IRRef ref = args[n];
+ IRIns *ir = IR(ref);
+#if !LJ_SOFTFP
+ if (ref && irt_isfp(ir->t)) {
+ RegSet of = as->freeset;
+ Reg src;
+ if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {
+ if (irt_isnum(ir->t)) {
+ if (fpr <= REGARG_LASTFPR) {
+ ra_leftov(as, fpr, ref);
+ fpr++;
+ continue;
+ }
+ }
+ else if (fprodd) {
+ /* Ick. */
+ src = ra_alloc1(as, ref, RSET_FPR);
+ emit_tm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00000040);
+ fprodd = 0;
+ continue;
+ }
+ else if (fpr <= REGARG_LASTFPR) {
+ ra_leftov(as, fpr, ref);
+ fprodd = fpr++;
+ continue;
+ }
+ /* Workaround to protect argument GPRs from being used for remat. */
+ as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1);
+ src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */
+ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1));
+ fprodd = 0;
+ goto stackfp;
+ }
+ /* Workaround to protect argument GPRs from being used for remat. */
+ as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1);
+ src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */
+ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1));
+ if (irt_isnum(ir->t)) gpr = (gpr + 1) & ~1u;
+ if (gpr <= REGARG_LASTGPR) {
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Must have been evicted. */
+ if (irt_isnum(ir->t)) {
+ lj_assertA(rset_test(as->freeset, gpr+1),
+ "reg %d not free", gpr+1); /* Ditto. */
+ emit_tnm(as, ARMI_VMOV_RR_D, gpr, gpr + 1, (src & 15));
+ gpr += 2;
+ }
+ else {
+ emit_tn(as, ARMI_VMOV_R_S, gpr, (src & 15));
+ gpr++;
+ }
+ }
+ else {
+stackfp:
+ if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
+ emit_spstore(as, ir, src, ofs);
+ ofs += irt_isnum(ir->t) ? 8 : 4;
+ }
+ }
+ else
+#endif
+ {
+ if (gpr <= REGARG_LASTGPR) {
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Must have been evicted. */
+ if (ref) ra_leftov(as, gpr, ref);
+ gpr++;
+ }
+ else {
+ if (ref) {
+ Reg r = ra_alloc1(as, ref, RSET_GPR);
+ emit_spstore(as, ir, r, ofs);
+ }
+ ofs += 4;
+ }
+ }
+ }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+ RegSet drop = RSET_SCRATCH;
+ int hiop = ((ir + 1)->o == IR_HIOP && !irt_isnil((ir + 1)->t));
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+ if (hiop && ra_hasreg((ir + 1)->r))
+ rset_clear(drop, (ir + 1)->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
+ if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64 | CCI_VARARG))) {
+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
+ if (irt_isnum(ir->t))
+ emit_tnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest);
+ else
+ emit_tn(as, ARMI_VMOV_S_R, RID_RET, dest);
+ }
+ else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
+ }
+ else if (hiop) {
+ ra_destpair(as, ir);
+ }
+ else {
+ ra_destreg(as, ir, RID_RET);
+ }
+ }
+ UNUSED(ci);
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+ IRRef args[CCI_NARGS_MAX * 2];
+ CCallInfo ci;
+ IRRef func;
+ IRIns *irf;
+ ci.flags = asm_callx_flags(as, ir);
+ asm_collectargs(as, ir, &ci, args);
+ asm_setupresult(as, ir, &ci);
+ func = ir->op2; irf = IR(func);
+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+ if (irref_isk(func)) {
+ /* Call to constant address. */
+ ci.func = (ASMFunction)(void *)(irf->i);
+ }
+ else {
+ /* Need a non-argument register for indirect calls. */
+ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12 + 1));
+ emit_m(as, ARMI_BLXr, freg);
+ ci.func = (ASMFunction)(void *)0;
+ }
+ asm_gencall(as, &ci, args);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+ void *pc = ir_kptr(IR(ir->op2));
+ int32_t delta = 1 + LJ_FR2 + bc_a(*((const BCIns *)pc - 1));
+ as->topslot -= (BCReg)delta;
+ if ((int32_t)as->topslot < 0) as->topslot = 0;
+ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
+ /* Need to force a spill on REF_BASE now to update the stack slot. */
+ emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE)));
+ emit_setgl(as, base, jit_base);
+ emit_addptr(as, base, -8*delta);
+ asm_guardcc(as, CC_NE);
+ emit_nm(as,
+ ARMI_CMP,
+ RID_TMP,
+ ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
+ emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
+}
+
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+ IRIns irgc;
+ int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+ //if ((as->flags & JIT_F_ARMV6T2)) { //jturnsek!!!
+ // emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
+ //} else {
+ emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
+ emit_dn(as, ARMC_K12(ARMI_AND, 255), tmp, tmp);
+ //}
+ emit_lso(as, ARMI_LDR, RID_TMP,
+ ra_allock(as, (addr & ~4095),
+ rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
+ (addr & 4095));
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
+/* -- Type conversions ---------------------------------------------------- */
+
+#if !LJ_SOFTFP
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ asm_guardcc(as, CC_NE);
+ emit_t(as, ARMI_VMRS, 0);
+ emit_tm(as, ARMI_VCMP_D, (tmp & 15), (left & 15));
+ emit_tm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15));
+ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
+ emit_tm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15));
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_FPR;
+ Reg left = ra_alloc1(as, ir->op1, allow);
+ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+ Reg tmp = ra_scratch(as, rset_clear(allow, right));
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
+ emit_tnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
+}
+#endif
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if !LJ_SOFTFP
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+#endif
+ IRRef lref = ir->op1;
+ /* 64 bit integer conversions are handled by SPLIT. */
+ lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64),
+ "IR %04d has unsplit 64 bit type",
+ (int)(ir - as->ir) - REF_BIAS);
+#if LJ_SOFTFP
+ /* FP conversions are handled by SPLIT. */
+ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
+ "IR %04d has FP type",
+ (int)(ir - as->ir) - REF_BIAS);
+ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
+#else
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) {
+ /* FP to FP conversion. */
+ emit_tm(as,
+ st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32,
+ (dest & 15),
+ (ra_alloc1(as, lref, RSET_FPR) & 15));
+ }
+ else {
+ /* Integer to FP conversion. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ ARMIns ai = irt_isfloat(ir->t) ?
+ (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) :
+ (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32);
+ emit_tm(as, ai, (dest & 15), (dest & 15));
+ emit_tn(as, ARMI_VMOV_S_R, left, (dest & 15));
+ }
+ }
+ else if (stfp) {
+ /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ }
+ else {
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ ARMIns ai;
+ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
+ ai = irt_isint(ir->t) ?
+ (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :
+ (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);
+ emit_tm(as, ai, (tmp & 15), (left & 15));
+ }
+ }
+ else
+#endif
+ {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (st >= IRT_I8 && st <= IRT_U16) {
+ /* Extend to 32 bit integer. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
+ if ((as->flags & JIT_F_ARMV6)) {
+ ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
+ st == IRT_U8 ? ARMI_UXTB :
+ st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH;
+ emit_dm(as, ai, dest, left);
+ }
+ else if (st == IRT_U8) {
+ emit_dn(as, ARMC_K12(ARMI_AND, 255), dest, left);
+ }
+ else {
+ uint32_t shift = st == IRT_I8 ? 24 : 16;
+ ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR;
+ emit_dm(as, ARMI_MOV | ARMF_SH(sh, shift), dest, RID_TMP);
+ emit_dm(as, ARMI_MOV | ARMF_SH(ARMSH_LSL, shift), RID_TMP, left);
+ }
+ }
+ else {
+ /* Handle 32/32 bit no-op (cast). */
+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
+ }
+ }
+}
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+ IRRef args[2];
+ Reg rlo = 0, rhi = 0, tmp;
+ int destused = ra_used(ir);
+ int32_t ofs = 0;
+ ra_evictset(as, RSET_SCRATCH);
+#if LJ_SOFTFP
+ if (destused) {
+ if (ra_hasspill(ir->s) && ra_hasspill((ir + 1)->s) &&
+ (ir->s & 1) == 0 && ir->s + 1 == (ir + 1)->s) {
+ int i;
+ for (i = 0; i < 2; i++) {
+ Reg r = (ir + i)->r;
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ ra_modified(as, r);
+ emit_spload(as, ir + i, r, sps_scale((ir + i)->s));
+ }
+ }
+ ofs = sps_scale(ir->s);
+ destused = 0;
+ }
+ else {
+ rhi = ra_dest(as, ir + 1, RSET_GPR);
+ rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
+ }
+ }
+ asm_guardcc(as, CC_EQ);
+ if (destused) {
+ emit_lso(as, ARMI_LDR, rhi, RID_SP, 4);
+ emit_lso(as, ARMI_LDR, rlo, RID_SP, 0);
+ }
+#else
+ UNUSED(rhi);
+ if (destused) {
+ if (ra_hasspill(ir->s)) {
+ ofs = sps_scale(ir->s);
+ destused = 0;
+ if (ra_hasreg(ir->r)) {
+ ra_free(as, ir->r);
+ ra_modified(as, ir->r);
+ emit_spload(as, ir, ir->r, ofs);
+ }
+ }
+ else {
+ rlo = ra_dest(as, ir, RSET_FPR);
+ }
+ }
+ asm_guardcc(as, CC_EQ);
+ if (destused)
+ emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0);
+#endif
+ emit_n(as, ARMC_K12(ARMI_CMP, 0), RID_RET); /* Test return status. */
+ args[0] = ir->op1; /* GCstr *str */
+ args[1] = ASMREF_TMP1; /* TValue *n */
+ asm_gencall(as, ci, args);
+ tmp = ra_releasetmp(as, ASMREF_TMP1);
+ if (ofs == 0)
+ emit_dm(as, ARMI_MOV, tmp, RID_SP);
+ else
+ emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+{
+ if ((mode & IRTMPREF_IN1)) {
+ IRIns *ir = IR(ref);
+ if (irt_isnum(ir->t)) {
+ if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+ lj_assertA(irref_isk(ref), "unsplit FP op");
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+ emit_lso(as, ARMI_STR,
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+ RID_SP, 0);
+ emit_lso(as, ARMI_STR,
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+ RID_SP, 4);
+#else
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+ emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
+#endif
+ } else if (irref_isk(ref)) {
+ /* Use the number constant itself as a TValue. */
+ ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+ } else {
+#if LJ_SOFTFP
+ lj_assertA(0, "unsplit FP op");
+#else
+ /* Otherwise force a spill and use the spill slot. */
+ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+#endif
+ }
+ } else {
+ /* Otherwise use [sp] and [sp+4] to hold the TValue.
+ ** This assumes the following call has max. 4 args.
+ */
+ Reg type;
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_GPR);
+ emit_lso(as, ARMI_STR, src, RID_SP, 0);
+ }
+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+ type = ra_alloc1(as, ref+1, RSET_GPR);
+ else
+ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+ emit_lso(as, ARMI_STR, type, RID_SP, 4);
+ }
+ }
+ else {
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+ }
+}
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg idx, base;
+ if (irref_isk(ir->op2)) {
+ IRRef tab = IR(ir->op1)->op1;
+ int32_t ofs = asm_fuseabase(as, tab);
+ IRRef refa = ofs ? tab : ir->op1;
+ uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i);
+ if (k) {
+ base = ra_alloc1(as, refa, RSET_GPR);
+ emit_dn(as, ARMI_ADD ^ k, dest, base);
+ return;
+ }
+ }
+ base = ra_alloc1(as, ir->op1, RSET_GPR);
+ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 3), dest, base, idx);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+** Node *n = hashkey(t, key);
+** do {
+** if (lj_obj_equal(&n->key, key)) return &n->val;
+** } while ((n = nextnode(n)));
+** return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+{
+ RegSet allow = RSET_GPR;
+ int destused = ra_used(ir);
+ Reg dest = ra_dest(as, ir, allow);
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+ Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP;
+ IRRef refkey = ir->op2;
+ IRIns *irkey = IR(refkey);
+ IRType1 kt = irkey->t;
+ int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt));
+ uint32_t khash;
+ MCLabel l_end, l_loop;
+ rset_clear(allow, tab);
+ if (!irref_isk(refkey) || irt_isstr(kt)) {
+#if LJ_SOFTFP
+ key = ra_alloc1(as, refkey, allow);
+ rset_clear(allow, key);
+ if (irkey[1].o == IR_HIOP) {
+ if (ra_hasreg((irkey + 1)->r)) {
+ keynumhi = (irkey + 1)->r;
+ keyhi = RID_TMP;
+ ra_noweak(as, keynumhi);
+ }
+ else {
+ keyhi = keynumhi = ra_allocref(as, refkey + 1, allow);
+ }
+ rset_clear(allow, keynumhi);
+ khi = 0;
+ }
+#else
+ if (irt_isnum(kt)) {
+ key = ra_scratch(as, allow);
+ rset_clear(allow, key);
+ keyhi = keynumhi = ra_scratch(as, allow);
+ rset_clear(allow, keyhi);
+ khi = 0;
+ }
+ else {
+ key = ra_alloc1(as, refkey, allow);
+ rset_clear(allow, key);
+ }
+#endif
+ }
+ else if (irt_isnum(kt)) {
+ int32_t val = (int32_t)ir_knum(irkey)->u32.lo;
+ k = emit_isk12(ARMI_CMP, val);
+ if (!k) {
+ key = ra_allock(as, val, allow);
+ rset_clear(allow, key);
+ }
+ val = (int32_t)ir_knum(irkey)->u32.hi;
+ khi = emit_isk12(ARMI_CMP, val);
+ if (!khi) {
+ keyhi = ra_allock(as, val, allow);
+ rset_clear(allow, keyhi);
+ }
+ }
+ else if (!irt_ispri(kt)) {
+ k = emit_isk12(ARMI_CMP, irkey->i);
+ if (!k) {
+ key = ra_alloc1(as, refkey, allow);
+ rset_clear(allow, key);
+ }
+ }
+ if (!irt_ispri(kt))
+ tmp = ra_scratchpair(as, allow);
+
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
+ l_end = emit_label(as);
+ as->invmcp = NULL;
+ if (merge == IR_NE)
+ asm_guardcc(as, CC_AL);
+ else if (destused)
+ emit_loada(as, dest, niltvg(J2G(as->J)));
+
+ /* Follow hash chain until the end. */
+ l_loop = --as->mcp;
+ emit_n(as, ARMC_K12(ARMI_CMP, 0), dest);
+ emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next));
+
+ /* Type and value comparison. */
+ if (merge == IR_EQ)
+ asm_guardcc(as, CC_EQ);
+ else {
+ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
+ }
+ if (!irt_ispri(kt)) {
+ emit_nm(as, ARMI_CMP ^ k, tmp, key);
+ ARMI_IT(CC_EQ);
+ emit_nm(as, ARMI_CMP ^ khi, tmp + 1, keyhi);
+ emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key));
+ }
+ else {
+ emit_n(as, ARMI_CMP ^ khi, tmp);
+ emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it));
+ }
+ *l_loop = ARMF_CC(ARMI_B, CC_NE) | ARMC_B((as->mcp - l_loop - 1) << 1);
+
+ /* Load main position relative to tab->node into dest. */
+ khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+ }
+ else {
+ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
+ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
+ if (irt_isstr(kt)) {
+ /* Fetch of str->sid is cheaper than ra_allock. */
+ emit_dnm(as, ARMI_AND, tmp, tmp + 1, RID_TMP);
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+ emit_lso(as, ARMI_LDR, tmp + 1, key, (int32_t)offsetof(GCstr, sid));
+ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
+ }
+ else if (irref_isk(refkey)) {
+ emit_opk(as,
+ ARMI_AND,
+ tmp,
+ RID_TMP,
+ (int32_t)khash,
+ rset_exclude(rset_exclude(RSET_GPR, tab), dest));
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
+ }
+ else {
+ /* Must match with hash*() in lj_tab.c. */
+ if (ra_hasreg(keynumhi)) {
+ /* Canonicalize +-0.0 to 0.0. */
+ if (keyhi == RID_TMP) {
+ emit_dm(as, ARMI_MOV, keyhi, keynumhi);
+ ARMI_IT(CC_NE);
+ }
+ emit_d(as, ARMC_K12(ARMI_MOV, 0), keyhi);
+ ARMI_IT(CC_EQ);
+ }
+ emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP);
+ emit_dnm(as, ARMI_SUB | ARMF_SH(ARMSH_ROR, 32 - HASH_ROT3), tmp, tmp, tmp + 1);
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
+ emit_dnm(as,
+ ARMI_EOR|ARMF_SH(ARMSH_ROR, 32 - ((HASH_ROT2 + HASH_ROT1) & 31)),
+ tmp,
+ tmp+1,
+ tmp);
+ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
+ emit_dnm(as, ARMI_SUB | ARMF_SH(ARMSH_ROR, 32 - HASH_ROT1), tmp + 1, tmp + 1, tmp);
+ if (ra_hasreg(keynumhi)) {
+ emit_dnm(as, ARMI_EOR, tmp + 1, tmp, key);
+ emit_dnm(as, ARMI_ORR | ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */
+ emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi);
+#if !LJ_SOFTFP
+ emit_tnm(as,
+ ARMI_VMOV_RR_D,
+ key,
+ keynumhi,
+ (ra_alloc1(as, refkey, RSET_FPR) & 15));
+#endif
+ }
+ else {
+ emit_dnm(as, ARMI_EOR, tmp + 1, tmp, key);
+ emit_opk(as,
+ ARMI_ADD,
+ tmp,
+ key,
+ (int32_t)HASH_BIAS,
+ rset_exclude(rset_exclude(RSET_GPR, tab), key));
+ }
+ }
+ }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+ IRIns *kslot = IR(ir->op2);
+ IRIns *irkey = IR(kslot->op1);
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+ Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg key = RID_NONE, type = RID_TMP, idx = node;
+ RegSet allow = rset_exclude(RSET_GPR, node);
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (ofs > 4095) {
+ idx = dest;
+ rset_clear(allow, dest);
+ kofs = (int32_t)offsetof(Node, key);
+ }
+ else if (ra_hasreg(dest)) {
+ emit_opk(as, ARMI_ADD, dest, node, ofs, allow);
+ }
+ asm_guardcc(as, CC_NE);
+ if (!irt_ispri(irkey->t)) {
+ RegSet even = (as->freeset & allow);
+ even = even & (even >> 1) & RSET_GPREVEN;
+ if (even) {
+ key = ra_scratch(as, even);
+ if (rset_test(as->freeset, key + 1)) {
+ type = key + 1;
+ ra_modified(as, type);
+ }
+ }
+ else {
+ key = ra_scratch(as, allow);
+ }
+ rset_clear(allow, key);
+ }
+ rset_clear(allow, type);
+ if (irt_isnum(irkey->t)) {
+ emit_opk(as,
+ ARMI_CMP,
+ 0,
+ type,
+ (int32_t)ir_knum(irkey)->u32.hi,
+ allow);
+ ARMI_IT(CC_EQ);
+ emit_opk(as,
+ ARMI_CMP,
+ 0,
+ key,
+ (int32_t)ir_knum(irkey)->u32.lo,
+ allow);
+ }
+ else {
+ if (ra_hasreg(key)) {
+ emit_opk(as, ARMI_CMP, 0, key, irkey->i, allow);
+ ARMI_IT(CC_EQ);
+ }
+ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype(irkey->t)), type);
+ }
+ emit_lso(as, ARMI_LDR, type, idx, kofs + 4);
+ if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs);
+ if (ofs > 4095)
+ emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+ emit_lsptr(as, ARMI_LDR, dest, v);
+ }
+ else {
+ Reg uv = ra_scratch(as, RSET_GPR);
+ Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->o == IR_UREFC) {
+ asm_guardcc(as, CC_NE);
+ emit_n(as, ARMC_K12(ARMI_CMP, 1), RID_TMP);
+ emit_opk(as,
+ ARMI_ADD,
+ dest,
+ uv,
+ (int32_t)offsetof(GCupval, tv),
+ RSET_GPR);
+ emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+ }
+ else {
+ emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
+ }
+ emit_lso(as,
+ ARMI_LDR,
+ uv,
+ func,
+ (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
+ }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+ UNUSED(as); UNUSED(ir);
+ lj_assertA(!ra_used(ir), "unfused FREF");
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ IRRef ref = ir->op2, refk = ir->op1;
+ Reg r;
+ if (irref_isk(ref)) {
+ IRRef tmp = refk; refk = ref; ref = tmp;
+ }
+ else if (!irref_isk(refk)) {
+ uint32_t k, m = ARMC_K12(0, sizeof(GCstr));
+ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
+ IRIns *irr = IR(ir->op2);
+ if (ra_hasreg(irr->r)) {
+ ra_noweak(as, irr->r);
+ right = irr->r;
+ }
+ else if (mayfuse(as, irr->op2) &&
+ irr->o == IR_ADD && irref_isk(irr->op2) &&
+ (k = emit_isk12(ARMI_ADD,
+ (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) {
+ m = k;
+ right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
+ }
+ else {
+ right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
+ }
+ emit_dn(as, ARMI_ADD ^ m, dest, dest);
+ emit_dnm(as, ARMI_ADD, dest, left, right);
+ return;
+ }
+ r = ra_alloc1(as, ref, RSET_GPR);
+ emit_opk(as,
+ ARMI_ADD,
+ dest,
+ r,
+ sizeof(GCstr) + IR(refk)->i,
+ rset_exclude(RSET_GPR, r));
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static ARMIns asm_fxloadins(ASMState *as, IRIns *ir)
+{
+ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: return ARMI_LDRSB;
+ case IRT_U8: return ARMI_LDRB;
+ case IRT_I16: return ARMI_LDRSH;
+ case IRT_U16: return ARMI_LDRH;
+ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D;
+ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */
+ default: return ARMI_LDR;
+ }
+}
+
+static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir)
+{
+ UNUSED(as);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: case IRT_U8: return ARMI_STRB;
+ case IRT_I16: case IRT_U16: return ARMI_STRH;
+ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D;
+ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */
+ default: return ARMI_STR;
+ }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ ARMIns ai = asm_fxloadins(as, ir);
+ Reg idx;
+ int32_t ofs;
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR);
+ ofs = 0;
+ } else {
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) {
+ /* Turn the t->array load into an add for colocated arrays. */
+ emit_dn(as, ARMC_K12(ARMI_ADD, ofs), dest, idx);
+ return;
+ }
+ }
+ ofs = field_ofs[ir->op2];
+ }
+ emit_lso(as, ai, dest, idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+ if (ir->r != RID_SINK) {
+ Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+ ARMIns ai = asm_fxstoreins(as, ir);
+ emit_lso(as, ai, src, idx, ofs);
+ }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as,
+ ir,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
+ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
+}
+
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
+{
+ if (ir->r != RID_SINK) {
+ Reg src = ra_alloc1(as,
+ ir->op2,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+ asm_fusexref(as,
+ asm_fxstoreins(as, ir),
+ src,
+ ir->op1,
+ rset_exclude(RSET_GPR, src),
+ ofs);
+ }
+}
+
+#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP);
+ IRType t = hiop ? IRT_NUM : irt_type(ir->t);
+ Reg dest = RID_NONE, type = RID_NONE, idx;
+ RegSet allow = RSET_GPR;
+ int32_t ofs = 0;
+ if (hiop && ra_used(ir + 1)) {
+ type = ra_dest(as, ir + 1, allow);
+ rset_clear(allow, type);
+ }
+ if (ra_used(ir)) {
+ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+ irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad load type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ }
+ idx = asm_fuseahuref(as,
+ ir->op1,
+ &ofs,
+ allow,
+ (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
+ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+ if (!hiop || type == RID_NONE) {
+ rset_clear(allow, idx);
+ if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
+ rset_test((as->freeset & allow), dest + 1)) {
+ type = dest + 1;
+ ra_modified(as, type);
+ }
+ else {
+ type = RID_TMP;
+ }
+ }
+ asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
+ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype_(t)), type);
+ if (ra_hasreg(dest)) {
+#if !LJ_SOFTFP
+ if (t == IRT_NUM)
+ emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs);
+ else
+#endif
+ emit_lso(as, ARMI_LDR, dest, idx, ofs);
+ }
+ emit_lso(as, ARMI_LDR, type, idx, ofs + 4);
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+ if (ir->r != RID_SINK) {
+ RegSet allow = RSET_GPR;
+ Reg idx, src = RID_NONE, type = RID_NONE;
+ int32_t ofs = 0;
+#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ src = ra_alloc1(as, ir->op2, RSET_FPR);
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024);
+ emit_vlso(as, ARMI_VSTR_D, src, idx, ofs);
+ }
+ else
+#endif
+ {
+ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP);
+ if (!irt_ispri(ir->t)) {
+ src = ra_alloc1(as, ir->op2, allow);
+ rset_clear(allow, src);
+ }
+ if (hiop)
+ type = ra_alloc1(as, (ir + 1)->op2, allow);
+ else
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096);
+ if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
+ emit_lso(as, ARMI_STR, type, idx, ofs + 4);
+ }
+ }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+ int32_t ofs = 8*((int32_t)ir->op1 - 1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
+ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP);
+ IRType t = hiop ? IRT_NUM : irt_type(ir->t);
+ Reg dest = RID_NONE, type = RID_NONE, base;
+ RegSet allow = RSET_GPR;
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
+ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
+ "inconsistent SLOAD variant");
+#if LJ_SOFTFP
+ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
+ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
+ if (hiop && ra_used(ir + 1)) {
+ type = ra_dest(as, ir + 1, allow);
+ rset_clear(allow, type);
+ }
+#else
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) {
+ dest = ra_scratch(as, RSET_FPR);
+ asm_tointg(as, ir, dest);
+ t = IRT_NUM; /* Continue with a regular number type check. */
+ }
+ else
+#endif
+ if (ra_used(ir)) {
+ Reg tmp = RID_NONE;
+ if ((ir->op2 & IRSLOAD_CONVERT))
+ tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
+ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+ irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad SLOAD type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
+ rset_clear(allow, dest);
+ base = ra_alloc1(as, REF_BASE, allow);
+ if ((ir->op2 & IRSLOAD_CONVERT)) {
+ if (t == IRT_INT) {
+ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
+ emit_tm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15));
+ t = IRT_NUM; /* Check for original type. */
+ }
+ else {
+ emit_tm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15));
+ emit_tn(as, ARMI_VMOV_S_R, tmp, (dest & 15));
+ t = IRT_INT; /* Check for original type. */
+ }
+ dest = tmp;
+ }
+ goto dotypecheck;
+ }
+ base = ra_alloc1(as, REF_BASE, allow);
+dotypecheck:
+ rset_clear(allow, base);
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+ if (ra_noreg(type)) {
+ if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
+ rset_test((as->freeset & allow), dest + 1)) {
+ type = dest + 1;
+ ra_modified(as, type);
+ }
+ else {
+ type = RID_TMP;
+ }
+ }
+ asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
+ if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+ emit_n(as, ARMC_K12(ARMI_CMN, 1), type);
+ emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type);
+ } else {
+ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype_(t)), type);
+ }
+ }
+ if (ra_hasreg(dest)) {
+#if !LJ_SOFTFP
+ if (t == IRT_NUM) {
+ if (ofs < 1024) {
+ emit_vlso(as, ARMI_VLDR_D, dest, base, ofs);
+ }
+ else {
+ if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs + 4);
+ emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0);
+ emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow);
+ return;
+ }
+ }
+ else
+#endif
+ emit_lso(as, ARMI_LDR, dest, base, ofs);
+ }
+ if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs + 4);
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+ CTState *cts = ctype_ctsG(J2G(as->J));
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+ IRRef args[4];
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+ RegSet drop = RSET_SCRATCH;
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
+ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+ ra_evictset(as, drop);
+ if (ra_used(ir))
+ ra_destreg(as, ir, RID_RET); /* GCcdata * */
+
+ /* Initialize immutable cdata object. */
+ if (ir->o == IR_CNEWI) {
+ int32_t ofs = sizeof(GCcdata);
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ if (sz == 8) {
+ ofs += 4; ir++;
+ lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI");
+ }
+ for (;;) {
+ Reg r = ra_alloc1(as, ir->op2, allow);
+ emit_lso(as, ARMI_STR, r, RID_RET, ofs);
+ rset_clear(allow, r);
+ if (ofs == sizeof(GCcdata)) break;
+ ofs -= 4; ir--;
+ }
+ }
+ else if (ir->op2 != REF_NIL) {
+ /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* CTypeID id */
+ args[2] = ir->op2; /* CTSize sz */
+ args[3] = ASMREF_TMP1; /* CTSize align */
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+ return;
+ }
+
+ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+ {
+ uint32_t k = emit_isk12(ARMI_MOV, id);
+ Reg r = k ? RID_R1 : ra_allock(as, id, allow);
+ emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
+ emit_lso(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
+ emit_d(as, ARMC_K12(ARMI_MOV, ~LJ_TCDATA), RID_TMP);
+ if (k) emit_d(as, ARMI_MOV ^ k, RID_R1);
+ }
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* MSize size */
+ asm_gencall(as, ci, args);
+ ra_allockreg(as,
+ (int32_t)(sz + sizeof(GCcdata)),
+ ra_releasetmp(as, ASMREF_TMP1));
+}
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+ Reg gr = ra_allock(as,
+ i32ptr(J2G(as->J)),
+ rset_exclude(rset_exclude(RSET_GPR, tab), link));
+ Reg mark = RID_TMP;
+ MCLabel l_end = emit_label(as);
+ emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist));
+ emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+ emit_lso(as,
+ ARMI_STR,
+ tab,
+ gr,
+ (int32_t)offsetof(global_State, gc.grayagain));
+ emit_dn(as, ARMC_K12(ARMI_BIC, LJ_GC_BLACK), mark, mark);
+ emit_lso(as,
+ ARMI_LDR,
+ link,
+ gr,
+ (int32_t)offsetof(global_State, gc.grayagain));
+ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
+ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_BLACK), mark);
+ emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+ IRRef args[2];
+ MCLabel l_end;
+ Reg obj, val, tmp;
+ /* No need for other object barriers (yet). */
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ir->op1; /* TValue *tv */
+ asm_gencall(as, ci, args);
+ //if ((l_end[-1] >> 28) == CC_AL) jturnsek!!!
+ // l_end[-1] = ARMF_CC(l_end[-1], CC_NE);
+ //else {
+ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
+ //}
+ ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1));
+ obj = IR(ir->op1)->r;
+ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
+ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_BLACK), tmp);
+ ARMI_IT(CC_NE);
+ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_WHITES), RID_TMP);
+ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+ emit_lso(as,
+ ARMI_LDRB,
+ tmp,
+ obj,
+ (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+ emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+#if !LJ_SOFTFP
+static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai)
+{
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
+ emit_tnm(as, ai, (dest & 15), (left & 15), (right & 15));
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
+{
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+ emit_tm(as, ai, (dest & 15), (left & 15));
+}
+
+static void asm_callround(ASMState *as, IRIns *ir, int id)
+{
+ /* The modified regs must match with the *.dasc implementation. */
+ RegSet drop = RID2RSET(RID_R0) | RID2RSET(RID_R1) | RID2RSET(RID_R2) |
+ RID2RSET(RID_R3) | RID2RSET(RID_R12);
+ RegSet of;
+ Reg dest, src;
+ ra_evictset(as, drop);
+ dest = ra_dest(as, ir, RSET_FPR);
+ emit_tnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
+ emit_call(as,
+ id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
+ id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
+ (void *)lj_vm_trunc_sf);
+ /* Workaround to protect argument GPRs from being used for remat. */
+ of = as->freeset;
+ as->freeset &= ~RSET_RANGE(RID_R0, RID_R1 + 1);
+ as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
+ src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
+ as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1 + 1));
+ emit_tnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+ if (ir->op2 <= IRFPM_TRUNC)
+ asm_callround(as, ir, ir->op2);
+ else if (ir->op2 == IRFPM_SQRT)
+ asm_fpunary(as, ir, ARMI_VSQRT_D);
+ else
+ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
+}
+#endif
+
+static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
+{
+ IRIns *ir;
+ if (irref_isk(rref))
+ return 0; /* Don't swap constants to the left. */
+ if (irref_isk(lref))
+ return 1; /* But swap constants to the right. */
+ ir = IR(rref);
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
+ (ir->o == IR_ADD && ir->op1 == ir->op2))
+ return 0; /* Don't swap fusable operands to the left. */
+ ir = IR(lref);
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
+ (ir->o == IR_ADD && ir->op1 == ir->op2))
+ return 1; /* But swap fusable operands to the right. */
+ return 0; /* Otherwise don't swap. */
+}
+
+static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
+{
+ IRRef lref = ir->op1, rref = ir->op2;
+ Reg left, dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t m, rs = 0;
+ if (asm_swapops(as, lref, rref)) {
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC)
+ ai ^= (ARMI_SUB ^ ARMI_RSB);
+ }
+ left = ra_hintalloc(as, lref, dest, RSET_GPR);
+ m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left), &rs);
+ if (irt_isguard(ir->t)) {
+ /* For IR_ADDOV etc. */
+ asm_guardcc(as, CC_VS);
+ ai |= ARMI_S;
+ }
+ emit_dn(as, ai ^ m, dest, left);
+ if (rs)
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16));
+}
+
+/* Try to drop cmp r, #0. */
+static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai) //jturnsek!!!
+{
+ if (as->flagmcp == as->mcp) {
+ uint32_t cc = ((as->mcp[1] >> 20) & 0xf);
+ as->flagmcp = NULL;
+ if (cc <= CC_NE) {
+ as->mcp++;
+ ai |= ARMI_S;
+ }
+ else if (cc == CC_GE) {
+ * ++as->mcp ^= ((CC_GE ^ CC_PL) << 20);
+ ai |= ARMI_S;
+ }
+ else if (cc == CC_LT) {
+ * ++as->mcp ^= ((CC_LT ^ CC_MI) << 20);
+ ai |= ARMI_S;
+ } /* else: other conds don't work in general. */
+ }
+ return ai;
+}
+
+static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
+{
+ asm_intop(as, ir, asm_drop_cmp0(as, ai));
+}
+
+static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ emit_dn(as, ARMC_K12(ai, 0), dest, left);
+}
+
+/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
+static void asm_intmul(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ Reg tmp = RID_NONE;
+ /* ARMv5 restriction: dest != left and dest_hi != left. */
+ if (dest == left && left != right) { left = right; right = dest; }
+ if (irt_isguard(ir->t)) {
+ /* IR_MULOV */
+ if (!(as->flags & JIT_F_ARMV6) && dest == left)
+ tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left));
+ asm_guardcc(as, CC_NE);
+ emit_nm(as, ARMI_TEQ | ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest);
+ emit_dnm(as, ARMI_SMULL | ARMF_T(dest), RID_TMP, left, right);
+ }
+ else {
+ if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP;
+ emit_dnm(as, ARMI_MUL, dest, left, right);
+ }
+ /* Only need this for the dest == left == right case. */
+ if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right);
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D))
+ asm_fparith(as, ir, ARMI_VADD_D);
+ return;
+ }
+#endif
+ asm_intop_s(as, ir, ARMI_ADD);
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D))
+ asm_fparith(as, ir, ARMI_VSUB_D);
+ return;
+ }
+#endif
+ asm_intop_s(as, ir, ARMI_SUB);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ asm_fparith(as, ir, ARMI_VMUL_D);
+ return;
+ }
+#endif
+ asm_intmul(as, ir);
+}
+
+#define asm_addov(as, ir) asm_add(as, ir)
+#define asm_subov(as, ir) asm_sub(as, ir)
+#define asm_mulov(as, ir) asm_mul(as, ir)
+
+#if !LJ_SOFTFP
+#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
+#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
+#endif
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+ if (irt_isnum(ir->t)) {
+ asm_fpunary(as, ir, ARMI_VNEG_D);
+ return;
+ }
+#endif
+ asm_intneg(as, ir, ARMI_RSB);
+}
+
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
+{
+ ai = asm_drop_cmp0(as, ai);
+ if (ir->op2 == 0) {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t rs = 0;
+ uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR, &rs);
+ emit_d(as, ai ^ m, dest);
+ if (rs)
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16));
+ }
+ else {
+ /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
+ asm_intop(as, ir, ai);
+ }
+}
+
+#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
+
+static void asm_bswap(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ if ((as->flags & JIT_F_ARMV6)) {
+ emit_dnm(as, ARMI_REV, dest, left, left);
+ }
+ else {
+ Reg tmp2 = dest;
+ if (tmp2 == left)
+ tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left));
+ emit_dnm(as, ARMI_EOR | ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP);
+ emit_dm(as, ARMI_MOV | ARMF_SH(ARMSH_ROR, 8), tmp2, left);
+ emit_dn(as, ARMC_K12(ARMI_BIC, 256 * 8 | 127), RID_TMP, RID_TMP);
+ emit_dnm(as, ARMI_EOR | ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left);
+ }
+}
+
+#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
+#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
+#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
+
+static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
+{
+ if (irref_isk(ir->op2)) {
+ /* Constant shifts. */
+ /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */
+ /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ int32_t shift = (IR(ir->op2)->i & 31);
+ emit_dm(as, ARMI_MOV | ARMF_SH(sh, shift), dest, left);
+ }
+ else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | ARMF_RSH(sh, right), dest, left);
+ }
+}
+
+#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
+#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
+#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
+#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
+#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
+{
+ uint32_t kcmp = 0, kmov = 0;
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ Reg right = 0;
+ if (irref_isk(ir->op2)) {
+ kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i);
+ if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i);
+ }
+ if (!kmov) {
+ kcmp = 0;
+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ }
+ if (kmov || dest != right) {
+ emit_dm(as, ARMI_MOV ^ kmov, dest, right);
+ ARMI_IT(cc);
+ cc ^= 1; /* Must use opposite conditions for paired moves. */
+ }
+ else {
+ cc ^= (CC_LT ^ CC_GT); /* Otherwise may swap CC_LT <-> CC_GT. */
+ }
+ if (dest != left) {
+ emit_dm(as, ARMI_MOV, dest, left);
+ ARMI_IT(cc);
+ }
+ emit_nm(as, ARMI_CMP ^ kcmp, left, right);
+}
+
+#if LJ_SOFTFP
+static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+ RegSet drop = RSET_SCRATCH;
+ Reg r;
+ IRRef args[4];
+ args[0] = ir->op1; args[1] = (ir + 1)->op1;
+ args[2] = ir->op2; args[3] = (ir + 1)->op2;
+ /* __aeabi_cdcmple preserves r0-r3. */
+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
+ if (ra_hasreg((ir + 1)->r)) rset_clear(drop, (ir + 1)->r);
+ if (!rset_test(as->freeset, RID_R2) &&
+ regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2);
+ if (!rset_test(as->freeset, RID_R3) &&
+ regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3);
+ ra_evictset(as, drop);
+ ra_destpair(as, ir);
+ emit_dm(as, ARMI_MOV, RID_RETHI, RID_R3);
+ ARMI_IT(cc);
+ emit_dm(as, ARMI_MOV, RID_RETLO, RID_R2);
+ ARMI_IT(cc);
+ emit_call(as, (void *)ci->func);
+ for (r = RID_R0; r <= RID_R3; r++)
+ ra_leftov(as, r, args[r - RID_R0]);
+}
+#else
+static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc)
+{
+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = ((left >> 8) & 15); left &= 15;
+ if (dest != left) {
+ emit_tm(as, ARMI_VMOV_D, dest, left);
+ ARMI_IT(cc ^ 1);
+ }
+ if (dest != right) {
+ emit_tm(as, ARMI_VMOV_D, dest, right);
+ ARMI_IT(cc);
+ }
+ emit_t(as, ARMI_VMRS, 0);
+ emit_tm(as, ARMI_VCMP_D, left, right);
+}
+#endif
+
+static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
+{
+#if LJ_SOFTFP
+ UNUSED(fcc);
+#else
+ if (irt_isnum(ir->t))
+ asm_fpmin_max(as, ir, fcc);
+ else
+#endif
+ asm_intmin_max(as, ir, cc);
+}
+
+#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL)
+#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE)
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* Map of comparisons to flags. ORDER IR. */
+static const uint8_t asm_compmap[IR_ABC + 1] = {
+ /* op FP swp int cc FP cc */
+ /* LT */ CC_GE + (CC_HS << 4),
+ /* GE x */ CC_LT + (CC_HI << 4),
+ /* LE */ CC_GT + (CC_HI << 4),
+ /* GT x */ CC_LE + (CC_HS << 4),
+ /* ULT x */ CC_HS + (CC_LS << 4),
+ /* UGE */ CC_LO + (CC_LO << 4),
+ /* ULE x */ CC_HI + (CC_LO << 4),
+ /* UGT */ CC_LS + (CC_LS << 4),
+ /* EQ */ CC_NE + (CC_NE << 4),
+ /* NE */ CC_EQ + (CC_EQ << 4),
+ /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */
+};
+
+#if LJ_SOFTFP
+/* FP comparisons. */
+static void asm_sfpcomp(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+ RegSet drop = RSET_SCRATCH;
+ Reg r;
+ IRRef args[4];
+ int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1);
+ args[swp ^ 0] = ir->op1; args[swp ^ 1] = (ir + 1)->op1;
+ args[swp ^ 2] = ir->op2; args[swp ^ 3] = (ir + 1)->op2;
+ /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */
+ for (r = RID_R0; r <= RID_R3; r++)
+ if (!rset_test(as->freeset, r) &&
+ regcost_ref(as->cost[r]) == args[r - RID_R0]) rset_clear(drop, r);
+ ra_evictset(as, drop);
+ asm_guardcc(as, (asm_compmap[ir->o] >> 4));
+ emit_call(as, (void *)ci->func);
+ for (r = RID_R0; r <= RID_R3; r++)
+ ra_leftov(as, r, args[r - RID_R0]);
+}
+#else
+/* FP comparisons. */
+static void asm_fpcomp(ASMState *as, IRIns *ir)
+{
+ Reg left, right;
+ ARMIns ai;
+ int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
+ if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
+ left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15);
+ right = 0;
+ ai = ARMI_VCMPZ_D;
+ }
+ else {
+ left = ra_alloc2(as, ir, RSET_FPR);
+ if (swp) {
+ right = (left & 15); left = ((left >> 8) & 15);
+ }
+ else {
+ right = ((left >> 8) & 15); left &= 15;
+ }
+ ai = ARMI_VCMP_D;
+ }
+ asm_guardcc(as, (asm_compmap[ir->o] >> 4));
+ emit_t(as, ARMI_VMRS, 0);
+ emit_tm(as, ai, left, right);
+}
+#endif
+
+/* Integer comparisons. */
+static void asm_intcomp(ASMState *as, IRIns *ir)
+{
+ ARMCC cc = (asm_compmap[ir->o] & 15);
+ IRRef lref = ir->op1, rref = ir->op2;
+ Reg left;
+ uint32_t m, rs = 0;
+ int cmpprev0 = 0;
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
+ "bad comparison data type %d", irt_type(ir->t));
+ if (asm_swapops(as, lref, rref)) {
+ Reg tmp = lref; lref = rref; rref = tmp;
+ if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
+ else if(cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
+ }
+ if (irref_isk(rref) && IR(rref)->i == 0) {
+ IRIns *irl = IR(lref);
+ cmpprev0 = (irl + 1 == ir);
+ /* Combine comp(BAND(left, right), 0) into tst left, right. */
+ if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
+ IRRef blref = irl->op1, brref = irl->op2;
+ uint32_t m2 = 0;
+ Reg bleft;
+ if (asm_swapops(as, blref, brref)) {
+ Reg tmp = blref; blref = brref; brref = tmp;
+ }
+ if (irref_isk(brref)) {
+ m2 = emit_isk12(ARMI_AND, IR(brref)->i);
+ if ((m2 & (ARMI_AND ^ ARMI_BIC)))
+ goto notst; /* Not beneficial if we miss a constant operand. */
+ }
+ if (cc == CC_GE) cc = CC_PL;
+ else if (cc == CC_LT) cc = CC_MI;
+ else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */
+ bleft = ra_alloc1(as, blref, RSET_GPR);
+ if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft), &rs);
+ asm_guardcc(as, cc);
+ emit_n(as, ARMI_TST ^ m2, bleft);
+ if (rs)
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m2 >> 16), (m2 >> 16));
+ return;
+ }
+ }
+notst:
+ left = ra_alloc1(as, lref, RSET_GPR);
+ m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left), &rs);
+ asm_guardcc(as, cc);
+ emit_n(as, ARMI_CMP ^ m, left);
+ if (rs)
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16));
+ /* Signed comparison with zero and referencing previous ins? */
+ if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE))
+ as->flagmcp = as->mcp; /* Allow elimination of the compare. */
+}
+
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+ if (irt_isnum(ir->t))
+ asm_fpcomp(as, ir);
+ else
+#endif
+ asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir) asm_comp(as, ir)
+
+#if LJ_HASFFI
+/* 64 bit integer comparisons. */
+static void asm_int64comp(ASMState *as, IRIns *ir)
+{
+ int signedcomp = (ir->o <= IR_GT);
+ ARMCC cclo, cchi;
+ Reg leftlo, lefthi;
+ uint32_t mlo, mhi, rslo = 0, rshi = 0;
+ RegSet allow = RSET_GPR, oldfree;
+
+ /* Always use unsigned comparison for loword. */
+ cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15;
+ leftlo = ra_alloc1(as, ir->op1, allow);
+ oldfree = as->freeset;
+ mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo), &rslo);
+ allow &= ~(oldfree & ~as->freeset); /* Update for allocs of asm_fuseopm. */
+
+ /* Use signed or unsigned comparison for hiword. */
+ cchi = asm_compmap[ir->o] & 15;
+ lefthi = ra_alloc1(as, (ir + 1)->op1, allow);
+ mhi = asm_fuseopm(as, ARMI_CMP, (ir + 1)->op2, rset_clear(allow, lefthi), &rshi);
+
+ /* All register allocations must be performed _before_ this point. */
+ if (signedcomp) {
+ MCLabel l_around = emit_label(as);
+ asm_guardcc(as, cclo);
+ emit_n(as, ARMI_CMP ^ mlo, leftlo);
+ if (rslo)
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rslo, (mlo >> 16), (mlo >> 16));
+ emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around);
+ if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6; /* GE -> GT, LE -> LT */
+ asm_guardcc(as, cchi);
+ }
+ else {
+ asm_guardcc(as, cclo);
+ emit_n(as, ARMI_CMP ^ mlo, leftlo);
+ if (rslo)
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rslo, (mlo >> 16), (mlo >> 16));
+ ARMI_IT(CC_EQ);
+ }
+ emit_n(as, ARMI_CMP ^ mhi, lefthi);
+ if (rshi)
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rshi, (mhi >> 16), (mhi >> 16));
+}
+#endif
+
+/* -- Split register ops -------------------------------------------------- */
+
+/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+ /* HIOP is marked as a store because it needs its own DCE logic. */
+ int uselo = ra_used(ir - 1), usehi = ra_used(ir); /* Loword/hiword used? */
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_HASFFI || LJ_SOFTFP
+ if ((ir - 1)->o <= IR_NE) {
+ /* 64 bit integer or FP comparisons. ORDER IR. */
+ as->curins--; /* Always skip the loword comparison. */
+#if LJ_SOFTFP
+ if (!irt_isint(ir->t)) {
+ asm_sfpcomp(as, ir - 1);
+ return;
+ }
+#endif
+#if LJ_HASFFI
+ asm_int64comp(as, ir - 1);
+#endif
+ return;
+#if LJ_SOFTFP
+ }
+ else if ((ir - 1)->o == IR_MIN || (ir - 1)->o == IR_MAX) {
+ as->curins--; /* Always skip the loword min/max. */
+ if (uselo || usehi)
+ asm_sfpmin_max(as, ir - 1, (ir - 1)->o == IR_MIN ? CC_PL : CC_LE);
+ return;
+#elif LJ_HASFFI
+ }
+ else if ((ir - 1)->o == IR_CONV) {
+ as->curins--; /* Always skip the CONV. */
+ if (usehi || uselo)
+ asm_conv64(as, ir);
+ return;
+#endif
+ }
+ else if ((ir - 1)->o == IR_XSTORE) {
+ if ((ir - 1)->r != RID_SINK)
+ asm_xstore_(as, ir, 4);
+ return;
+ }
+#endif
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
+ switch ((ir - 1)->o) {
+#if LJ_HASFFI
+ case IR_ADD:
+ as->curins--;
+ asm_intop(as, ir, ARMI_ADC);
+ asm_intop(as, ir - 1, ARMI_ADD | ARMI_S);
+ break;
+ case IR_SUB:
+ as->curins--;
+ asm_intop(as, ir, ARMI_SBC);
+ asm_intop(as, ir - 1, ARMI_SUB | ARMI_S);
+ break;
+ case IR_NEG:
+ as->curins--;
+ {
+ /* asm_intnegr */
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ emit_dn(as, ARMC_K12(ARMI_SBC, 0), left, dest);
+ }
+ asm_intneg(as, ir - 1, ARMI_RSB | ARMI_S);
+ break;
+ case IR_CNEWI:
+ /* Nothing to do here. Handled by lo op itself. */
+ break;
+#endif
+#if LJ_SOFTFP
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+ case IR_STRTO:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
+ break;
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+ /* Nothing to do here. Handled by lo op itself. */
+ break;
+#endif
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+ UNUSED(ir);
+ asm_guardcc(as, CC_NE);
+ emit_n(as, ARMC_K12(ARMI_TST, HOOK_PROFILE), RID_TMP);
+ emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as,
+ BCReg topslot,
+ IRIns *irp,
+ RegSet allow,
+ ExitNo exitno)
+{
+ Reg pbase;
+ uint32_t k;
+ if (irp) {
+ if (!ra_hasspill(irp->s)) {
+ pbase = irp->r;
+ lj_assertA(ra_hasreg(pbase), "base reg lost");
+ }
+ else if (allow) {
+ pbase = rset_pickbot(allow);
+ }
+ else {
+ pbase = RID_RET;
+ emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
+ }
+ }
+ else {
+ pbase = RID_BASE;
+ }
+ emit_branchlink(as, ARMI_BL, exitstub_addr(as->J, exitno));
+ ARMI_IT(CC_LS);
+ k = emit_isk12(0, (int32_t)(8*topslot));
+ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
+ emit_n(as, ARMI_CMP ^ k, RID_TMP);
+ emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
+ emit_lso(as,
+ ARMI_LDR,
+ RID_TMP,
+ RID_TMP,
+ (int32_t)offsetof(lua_State, maxstack));
+ if (irp) {
+ /* Must not spill arbitrary registers in head of side trace. */
+ int32_t i = i32ptr(&J2G(as->J)->cur_L);
+ if (ra_hasspill(irp->s))
+ emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
+ emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
+ if (ra_hasspill(irp->s) && !allow)
+ emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
+ emit_loadi(as, RID_TMP, (i & ~4095));
+ }
+ else {
+ emit_getgl(as, RID_TMP, cur_L);
+ }
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap) - 1];
+ MSize n, nent = snap->nent;
+ /* Store the value of all modified slots to the Lua stack. */
+ for (n = 0; n < nent; n++) {
+ SnapEntry sn = map[n];
+ BCReg s = snap_slot(sn);
+ int32_t ofs = 8*((int32_t)s - 1);
+ IRRef ref = snap_ref(sn);
+ IRIns *ir = IR(ref);
+ if ((sn & SNAP_NORESTORE))
+ continue;
+ if (irt_isnum(ir->t)) {
+#if LJ_SOFTFP
+ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
+ Reg tmp;
+ /* LJ_SOFTFP: must be a number constant. */
+ lj_assertA(irref_isk(ref), "unsplit FP op");
+ tmp = ra_allock(as,
+ (int32_t)ir_knum(ir)->u32.lo,
+ rset_exclude(RSET_GPREVEN, RID_BASE));
+ emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
+ if (rset_test(as->freeset, tmp + 1)) odd = RID2RSET(tmp + 1);
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd);
+ emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs + 4);
+#else
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
+#endif
+ }
+ else {
+ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
+ Reg type;
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
+ "restore of IR type %d", irt_type(ir->t));
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
+ emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
+ if (rset_test(as->freeset, src + 1)) odd = RID2RSET(src + 1);
+ }
+ if ((sn & (SNAP_CONT | SNAP_FRAME))) {
+ if (s == 0) continue; /* Do not overwrite link to previous frame. */
+ type = ra_allock(as, (int32_t)(*flinks--), odd);
+#if LJ_SOFTFP
+ }
+ else if ((sn & SNAP_SOFTFPNUM)) {
+ type = ra_alloc1(as, ref + 1, rset_exclude(RSET_GPRODD, RID_BASE));
+#endif
+ } else if ((sn & SNAP_KEYINDEX)) {
+ type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
+ }
+ else {
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
+ }
+ emit_lso(as, ARMI_STR, type, RID_BASE, ofs + 4);
+ }
+ checkmclim(as);
+ }
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Marker to prevent patching the GC check exit. */
+#define ARM_NOPATCH_GC_CHECK (ARMC_K12(ARMI_BIC, 0))
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+ IRRef args[2];
+ MCLabel l_end;
+ Reg tmp1, tmp2;
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
+ * --as->mcp = ARM_NOPATCH_GC_CHECK;
+ emit_n(as, ARMC_K12(ARMI_CMP, 0), RID_RET);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ASMREF_TMP2; /* MSize steps */
+ asm_gencall(as, ci, args);
+ tmp1 = ra_releasetmp(as, ASMREF_TMP1);
+ tmp2 = ra_releasetmp(as, ASMREF_TMP2);
+ emit_loadi(as, tmp2, as->gcsteps);
+ /* Jump around GC step if GC total < GC threshold. */
+ emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end);
+ emit_nm(as, ARMI_CMP, RID_TMP, tmp2);
+ emit_lso(as,
+ ARMI_LDR,
+ tmp2,
+ tmp1,
+ (int32_t)offsetof(global_State, gc.threshold));
+ emit_lso(as,
+ ARMI_LDR,
+ RID_TMP,
+ tmp1,
+ (int32_t)offsetof(global_State, gc.total));
+ ra_allockreg(as, i32ptr(J2G(as->J)), tmp1);
+ as->gcsteps = 0;
+ checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+ MCode *p = as->mctop;
+ MCode *target = as->mcp;
+ if (as->loopinv) {
+ /* Inverted loop branch? */
+ /* asm_guardcc already inverted the bcc and patched the final bl. */
+ p[-2] |= ARMC_B((uint32_t)((target - p + 1) << 1));
+ }
+ else {
+ p[-1] = ARMI_B_T4 | ARMC_BL((uint32_t)((target - p) << 1));
+ }
+}
+
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+ UNUSED(as); /* Nothing to do. */
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Reload L register from g->cur_L. */
+static void asm_head_lreg(ASMState *as)
+{
+ IRIns *ir = IR(ASMREF_L);
+ if (ra_used(ir)) {
+ Reg r = ra_dest(as, ir, RSET_GPR);
+ emit_getgl(as, r, cur_L);
+ ra_evictk(as);
+ }
+}
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+ IRIns *ir;
+ asm_head_lreg(as);
+ ir = IR(REF_BASE);
+ if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+ ra_spill(as, ir);
+ ra_destreg(as, ir, RID_BASE);
+}
+
+/* Coalesce BASE register for a side trace. */
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+{
+ IRIns *ir;
+ asm_head_lreg(as);
+ ir = IR(REF_BASE);
+ if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+ ra_spill(as, ir);
+ if (ra_hasspill(irp->s)) {
+ rset_clear(allow, ra_dest(as, ir, allow));
+ }
+ else {
+ Reg r = irp->r;
+ lj_assertA(ra_hasreg(r), "base reg lost");
+ rset_clear(allow, r);
+ if (r != ir->r && !rset_test(as->freeset, r))
+ ra_restore(as, regcost_ref(as->cost[r]));
+ ra_destreg(as, ir, r);
+ }
+ return allow;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+ MCode *p = as->mctop;
+ MCode *target;
+ int32_t spadj = as->T->spadjust;
+ if (spadj == 0) {
+ as->mctop = --p;
+ }
+ else {
+ /* Patch stack adjustment. */
+ uint32_t k = emit_isk12(ARMI_ADD, spadj);
+ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
+ p[-2] = (ARMI_ADD ^ k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ }
+ /* Patch exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)as->J->exitstubgroup[0] - 8;//lj_vm_exit_interp;
+ p[-1] = ARMI_B_T4 | ARMC_BL((target - p) << 1);
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+ MCode *p = as->mctop - 1; /* Leave room for exit branch. */
+ if (as->loopref) {
+ as->invmcp = as->mcp = p;
+ }
+ else {
+ as->mcp = p - 1; /* Leave room for stack pointer adjustment. */
+ as->invmcp = NULL;
+ }
+ *p = 0; /* Prevent load/store merging. */
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+ IRRef args[CCI_NARGS_MAX * 2];
+ uint32_t i, nargs = CCI_XNARGS(ci);
+ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
+ asm_collectargs(as, ir, ci, args);
+ for (i = 0; i < nargs; i++) {
+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
+ if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {
+ if (irt_isnum(IR(args[i])->t)) {
+ if (nfpr > 0) nfpr--;
+ else fprodd = 0, nslots = (nslots + 3) & ~1;
+ }
+ else {
+ if (fprodd) fprodd--;
+ else if (nfpr > 0) fprodd = 1, nfpr--;
+ else nslots++;
+ }
+ }
+ else if (irt_isnum(IR(args[i])->t)) {
+ ngpr &= ~1;
+ if (ngpr > 0) ngpr -= 2;
+ else nslots += 2;
+ }
+ else {
+ if (ngpr > 0) ngpr--;
+ else nslots++;
+ }
+ }
+ else {
+ if (ngpr > 0) ngpr--;
+ else nslots++;
+ }
+ }
+ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
+ as->evenspill = nslots;
+ return REGSP_HINT(RID_RET);
+}
+
+static void asm_setup_target(ASMState *as)
+{
+ /* May need extra exit for asm_stack_check on side traces. */
+ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
+}
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+ MCode *p = T->mcode;
+ MCode *pe = (MCode *)((char *)p + T->szmcode);
+ MCode *cstart = NULL, *cend = p;
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
+ MCode *px = exitstub_addr(J, exitno) - 1;
+ for (; p < pe; p++) {
+ /* Look for bl_cc exitstub, replace with b_cc target. */
+ uint32_t ins = *p;
+ if ((ins & 0xd000f800u) == 0xd000f000u &&
+ (((ARMC_BL_READ(ins) >> 1) ^ (px - p)) & 0x007fffffu) == 0 &&
+ p[-1] != ARM_NOPATCH_GC_CHECK) {
+ *p = ARMI_B_T4 | ARMC_BL((uint32_t)(((target - p) - 1) << 1));
+ cend = p + 1;
+ if (!cstart) cstart = p;
+ }
+ }
+ lj_assertJ(cstart != NULL, "exit stub %d not found", exitno);
+ lj_mcode_sync(cstart, cend);
+ lj_mcode_patch(J, mcarea, 1);
+}
+
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 43e44305..2e6b6fd4 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -151,7 +151,7 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
MSize slot;
- /* This must match with the saveregs macro in buildvm_arm.dasc. */
+ /* This must match with the saveregs macro in buildvm_arm.dasc. */ //jturnsek!!!
*p++ = ARMI_SUB|ARMF_D(RID_R12)|ARMF_N(RID_R12)|ARMF_M(RID_PC);
*p++ = ARMI_PUSH|ARMF_N(RID_SP)|RSET_RANGE(RID_R4,RID_R11+1)|RID2RSET(RID_LR);
*p++ = ARMI_SUB|ARMI_K12|ARMF_D(RID_R12)|ARMF_N(RID_R12)|CALLBACK_MCODE_HEAD;
@@ -295,7 +295,7 @@ static void callback_mcode_new(CTState *cts)
DWORD oprot;
LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
}
-#elif LJ_TARGET_POSIX
+#elif LJ_TARGET_POSIX && !LJ_TARGET_NUTTX
mprotect(p, sz, (PROT_READ|PROT_EXEC));
#endif
}
diff --git a/src/lj_clib.c b/src/lj_clib.c
index f0ef6edd..0fc6419b 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -50,6 +50,8 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L)
#define CLIB_SOEXT "%s.dylib"
#elif LJ_TARGET_CYGWIN
#define CLIB_SOEXT "%s.dll"
+#elif LJ_TARGET_NUTTX
+#define CLIB_SOEXT "%s"
#else
#define CLIB_SOEXT "%s.so"
#endif
@@ -428,7 +430,11 @@ void lj_clib_unload(CLibrary *cl)
void lj_clib_default(lua_State *L, GCtab *mt)
{
CLibrary *cl = clib_new(L, mt);
+#if LJ_TARGET_NUTTX
+ cl->handle = clib_loadlib(L, "c", 0);
+#else
cl->handle = CLIB_DEFHANDLE;
+#endif
}
#endif
diff --git a/src/lj_def.h b/src/lj_def.h
index b61297aa..03f60c3f 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -87,6 +87,7 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_EXITSTUBGR 16 /* Max. # of exit stub groups. */
/* Various macros. */
+#undef UNUSED /* NuttX UNUSED macro is giving us problems. Use our own. */
#ifndef UNUSED
#define UNUSED(x) ((void)(x)) /* to avoid warnings */
#endif
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index 52762eea..909d4d5a 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -87,25 +87,23 @@ typedef uint16_t HotCount;
/* Global state, main thread and extra fields are allocated together. */
typedef struct GG_State {
- lua_State L; /* Main thread. */
- global_State g; /* Global state. */
-#if LJ_TARGET_ARM && !LJ_TARGET_NX
- /* Make g reachable via K12 encoded DISPATCH-relative addressing. */
- uint8_t align1[(16-sizeof(global_State))&15];
-#endif
+ lua_State L; /* Main thread. */
#if LJ_TARGET_MIPS
- ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */
+ ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */
#endif
#if LJ_HASJIT
- jit_State J; /* JIT state. */
- HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
+ jit_State J; /* JIT state. */
+ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
#if LJ_TARGET_ARM && !LJ_TARGET_NX
/* Ditto for J. */
- uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
+ uint8_t align1[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
#endif
#endif
- ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */
- BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */
+ global_State g; /* Global state. */ /* jturnsek: moved here in order to avoid excessive negative offsets when LJ_HASJIT */
+ /* Make g reachable via K12 encoded DISPATCH-relative addressing. */
+ uint8_t align2[(16-sizeof(global_State))&15];
+ ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */
+ BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */
} GG_State;
#define GG_OFS(field) ((int)offsetof(GG_State, field))
diff --git a/src/lj_emit_armv7m.h b/src/lj_emit_armv7m.h
new file mode 100644
index 00000000..5381df8b
--- /dev/null
+++ b/src/lj_emit_armv7m.h
@@ -0,0 +1,474 @@
+/*
+** ARMv7-M instruction emitter.
+** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* -- Constant encoding --------------------------------------------------- */
+
+#define INVAI_MASK 0xfbe0
+
+static uint32_t emit_invai[16] = {
+ /* AND, TST */ ((ARMI_AND ^ 0x1a00) ^ (ARMI_BIC ^ 0x1a00)) & INVAI_MASK,
+ /* BIC */ ((ARMI_BIC ^ 0x1a00) ^ (ARMI_AND ^ 0x1a00)) & INVAI_MASK,
+ /* MOV, ORR */ ((ARMI_MOV ^ 0x1a00) ^ (ARMI_MVN ^ 0x1a00)) & INVAI_MASK,
+ /* MVN, ORN */ ((ARMI_MVN ^ 0x1a00) ^ (ARMI_MOV ^ 0x1a00)) & INVAI_MASK,
+ /* EOR, TEQ */ 0,
+ 0,
+ 0,
+ 0,
+ /* ADD, CMN */ ((ARMI_ADD ^ 0x1a00) ^ (ARMI_SUB ^ 0x1a00)) & INVAI_MASK,
+ 0,
+ /* ADC */ ((ARMI_ADC ^ 0x1a00) ^ (ARMI_SBC ^ 0x1a00)) & INVAI_MASK,
+ /* SBC */ ((ARMI_SBC ^ 0x1a00) ^ (ARMI_ADC ^ 0x1a00)) & INVAI_MASK,
+ 0,
+ /* SUB, CMP */ ((ARMI_SUB ^ 0x1a00) ^ (ARMI_ADD ^ 0x1a00)) & INVAI_MASK,
+ /* RSB */ 0,
+ 0
+};
+
+/* Encode constant in K12 format for data processing instructions. */
+static unsigned int emit_isk12(ARMIns ai, signed int n)
+{
+ unsigned int invai, i, m = (unsigned int)n;
+ /* K12: 1bcdefgh value, rotated in steps of one bit. */
+ if (m <= 255) {
+ /* i:imm3 = 0000 */
+ return ARMC_K12(0, m);
+ }
+ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) {
+ /* i:imm3 = 0001 */
+ return ARMC_K12(0, 0x100 | (m & 0xff));
+ }
+ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) {
+ /* i:imm3 = 0010 */
+ return ARMC_K12(0, 0x200 | (m >> 8 & 0xff));
+ }
+ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) {
+ /* i:imm3 = 0011 */
+ return ARMC_K12(0, 0x300 | (m & 0xff));
+ }
+ else {
+ for (i = 0; i < 4096; i += 128, m = lj_rol(m, 1)) {
+ if (m <= 255) {
+ if ((m & 0x80) && (i >= 128 * 8))
+ return ARMC_K12(0, i | (m & 0x7f));
+ else
+ continue;
+ }
+ }
+ }
+
+ /* Otherwise try negation/complement with the inverse instruction. */
+ invai = emit_invai[(ai >> 5) & 0xf];
+ if (!invai) return 0; /* Failed. No inverse instruction. */
+ m = ~(unsigned int)n;
+ if (invai == (((ARMI_SUB ^ 0x1a00) ^ (ARMI_ADD ^ 0x1a00)) & INVAI_MASK) ||
+ invai == (((ARMI_CMP ^ 0x1a00) ^ (ARMI_CMN ^ 0x1a00)) & INVAI_MASK)) m++;
+ if (m <= 255) {
+ /* i:imm3 = 0000 */
+ return ARMC_K12(invai, m);
+ }
+ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) {
+ /* i:imm3 = 0001 */
+ return ARMC_K12(invai, 0x100 | (m & 0xff));
+ }
+ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) {
+ /* i:imm3 = 0010 */
+ return ARMC_K12(invai, 0x200 | (m >> 8 & 0xff));
+ }
+ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) {
+ /* i:imm3 = 0011 */
+ return ARMC_K12(invai, 0x300 | (m & 0xff));
+ }
+ else {
+ for (i = 0; i < 4096; i += 128, m = lj_rol(m, 1)) {
+ if (m <= 255) {
+ if ((m & 0x80) && (i >= 128 * 8))
+ return ARMC_K12(invai, i | (m & 0x7f));
+ else
+ continue;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* -- Emit basic instructions --------------------------------------------- */
+
+static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm)
+{
+ * --as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm);
+}
+
+static void emit_tnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm)
+{
+ * --as->mcp = ai | ARMF_T(rd) | ARMF_N(rn) | ARMF_M(rm);
+}
+
+static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm)
+{
+ * --as->mcp = ai | ARMF_D(rd) | ARMF_M(rm);
+}
+
+static void emit_tm(ASMState *as, ARMIns ai, Reg rd, Reg rm)
+{
+ * --as->mcp = ai | ARMF_T(rd) | ARMF_M(rm);
+}
+
+static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn)
+{
+ * --as->mcp = ai | ARMF_D(rd) | ARMF_N(rn);
+}
+
+static void emit_tn(ASMState *as, ARMIns ai, Reg rd, Reg rn)
+{
+ * --as->mcp = ai | ARMF_T(rd) | ARMF_N(rn);
+}
+
+static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm)
+{
+ * --as->mcp = ai | ARMF_N(rn) | ARMF_M(rm);
+}
+
+static void emit_d(ASMState *as, ARMIns ai, Reg rd)
+{
+ * --as->mcp = ai | ARMF_D(rd);
+}
+
+static void emit_t(ASMState *as, ARMIns ai, Reg rd)
+{
+ * --as->mcp = ai | ARMF_T(rd);
+}
+
+static void emit_n(ASMState *as, ARMIns ai, Reg rn)
+{
+ * --as->mcp = ai | ARMF_N(rn);
+}
+
+static void emit_m(ASMState *as, ARMIns ai, Reg rm)
+{
+ * --as->mcp = ai | ARMF_M(rm);
+}
+
+static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
+{
+ lj_assertA(ofs >= -1020 && ofs <= 1020,
+ "load/store offset %d out of range", ofs);
+ if (ofs < 0) ofs = -ofs;
+ else ai |= ARMI_LSX_U;
+ * --as->mcp = ai | ARMI_LSX_P | ARMF_T(rd) | ARMF_D(rd + 1) | ARMF_N(rn) |
+ (((ofs >> 2) & 0xff) << 16); /* imm multiples of 4 */
+}
+
+static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
+{
+ lj_assertA(ofs >= -255 && ofs <= 4095,
+ "load/store offset %d out of range", ofs);
+ /* Combine LDR/STR pairs to LDRD/STRD. */
+ if (*as->mcp == (ai | ARMI_LS_1 | ARMI_LS_P | ARMI_LS_U | ARMF_T(rd ^ 1) | ARMF_N(rn) | (ofs ^ 4)) &&
+ (ai & ~(ARMI_LDR ^ ARMI_STR)) == ARMI_STR && rd != rn &&
+ (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >> 2)) & 1) &&
+ as->mcp != as->mcloop) {
+ as->mcp++;
+ emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd & ~1, rn, ofs & ~4);
+ return;
+ }
+ if (ofs > 255) {
+ * --as->mcp = ai | ARMI_LS_I | ARMF_T(rd) | ARMF_N(rn) | ((ofs & 0xfff) << 16);
+ return;
+ }
+ if (ofs < 0) ofs = -ofs;
+ else ai |= ARMI_LS_U;
+ * --as->mcp = ai | ARMI_LS_1 | ARMI_LS_P | ARMF_T(rd) | ARMF_N(rn) | ((ofs & 0xff) << 16);
+}
+
+#if !LJ_SOFTFP
+static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
+{
+ lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs & 3) == 0,
+ "load/store offset %d out of range", ofs);
+ if (ofs < 0) ofs = -ofs;
+ else ai |= ARMI_LSX_U;
+ * --as->mcp = ai | ARMF_T(rd & 15) | ARMF_N(rn) | ((ofs >> 2) << 16);
+}
+#endif
+
+/* -- Emit loads/stores --------------------------------------------------- */
+
+/* Prefer spills of BASE/L. */
+#define emit_canremat(ref) ((ref) < ASMREF_L)
+
+/* Try to find a one step delta relative to another constant. */
+static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
+{
+ RegSet work = ~as->freeset & RSET_GPR;
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+ lj_assertA(r != d, "dest reg not free");
+ if (emit_canremat(ref)) {
+ int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
+ uint32_t k = emit_isk12(ARMI_ADD, delta);
+ if (k) {
+ if (k == ARMI_K12)
+ emit_dm(as, ARMI_MOV, d, r);
+ else
+ emit_dn(as, ARMI_ADD ^ k, d, r);
+ return 1;
+ }
+ }
+ rset_clear(work, r);
+ }
+ return 0; /* Failed. */
+}
+
+/* Try to find a two step delta relative to another constant. */
+static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
+{
+ RegSet work = ~as->freeset & RSET_GPR;
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+ lj_assertA(r != rd, "dest reg %d not free", rd);
+ if (emit_canremat(ref)) {
+ int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i;
+ if (other) {
+ int32_t delta = i - other;
+ uint32_t sh, inv = 0, k2, k;
+ if (delta < 0) { delta = -delta; inv = (ARMI_ADD ^ 0x1a00) ^ (ARMI_SUB ^ 0x1a00); }
+ sh = lj_ffs(delta) & ~1;
+ k2 = emit_isk12(0, delta & (255 << sh));
+ k = emit_isk12(0, delta & ~(255 << sh));
+ if (k) {
+ emit_dn(as, ARMI_ADD ^ k2 ^ inv, rd, rd);
+ emit_dn(as, ARMI_ADD ^ k ^ inv, rd, r);
+ return 1;
+ }
+ }
+ }
+ rset_clear(work, r);
+ }
+ return 0; /* Failed. */
+}
+
+/* Load a 32 bit constant into a GPR. */
+static void emit_loadi(ASMState *as, Reg rd, int32_t i)
+{
+ uint32_t k = emit_isk12(ARMI_MOV, i);
+ lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP,
+ "dest reg %d not free", rd);
+ if (k) {
+ /* Standard K12 constant. */
+ emit_d(as, ARMI_MOV ^ k, rd);
+ }
+ else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
+ /* 16 bit loword constant for ARMv6T2. */
+ emit_d(as, ARMI_MOVW | ((i & 0xff) << 16) | ((i & 0x700) << 20) | ((i & 0x800) >> 1) | ((i & 0xf000) >> 12), rd);
+ }
+ else if (emit_kdelta1(as, rd, i)) {
+ /* One step delta relative to another constant. */
+ }
+ else if ((as->flags & JIT_F_ARMV6T2)) {
+ /* 32 bit hiword/loword constant for ARMv6T2. */
+ emit_d(as, ARMI_MOVT | (((i >> 16) & 0xff) << 16) | (((i >> 16) & 0x700) << 20) | (((i >> 16) & 0x800) >> 1) | (((i >> 16) & 0xf000) >> 12), rd);
+ emit_d(as, ARMI_MOVW | ((i & 0xff) << 16) | ((i & 0x700) << 20) | ((i & 0x800) >> 1) | ((i & 0xf000) >> 12), rd);
+ }
+ else if (emit_kdelta2(as, rd, i)) {
+ /* Two step delta relative to another constant. */
+ }
+ else {
+ /* Otherwise construct the constant with up to 4 instructions. */
+ /* NYI: use mvn+bic, use pc-relative loads. */
+ for (;;) {
+ uint32_t sh = lj_ffs(i) & ~1;
+ int32_t m = i & (255 << sh);
+ i &= ~(255 << sh);
+ if (i == 0) {
+ emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd);
+ break;
+ }
+ emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd);
+ }
+ }
+}
+
+#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr)))
+
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
+
+/* Get/set from constant pointer. */
+static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
+{
+ int32_t i = i32ptr(p);
+ emit_lso(as,
+ ai,
+ r,
+ ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)),
+ (i & 4095));
+}
+
+#if !LJ_SOFTFP
+/* Load a number constant into an FPR. */
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
+{
+ cTValue *tv = ir_knum(ir);
+ int32_t i;
+ if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
+ uint32_t hi = tv->u32.hi;
+ uint32_t b = ((hi >> 22) & 0x1ff);
+ if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) {
+ * --as->mcp = ARMI_VMOVI_D | ARMF_T(r & 15) |
+ ((((tv->u32.hi >> 12) & 0x00080000) |
+ ((tv->u32.hi >> 4) & 0x00070000)) >> 16) |
+ (((tv->u32.hi >> 16) & 0x0000000f) << 16);
+ return;
+ }
+ }
+ i = i32ptr(tv);
+ emit_vlso(as,
+ ARMI_VLDR_D,
+ r,
+ ra_allock(as, (i & ~1020), RSET_GPR),
+ (i & 1020));
+}
+#endif
+
+/* Get/set global_State fields. */
+#define emit_getgl(as, r, field) \
+ emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field)
+#define emit_setgl(as, r, field) \
+ emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field)
+
+/* Trace number is determined from pc of exit instruction. */
+#define emit_setvmstate(as, i) UNUSED(i)
+
+/* -- Emit control-flow instructions -------------------------------------- */
+
+/* Label for internal jumps. */
+typedef MCode *MCLabel;
+
+/* Return label pointing to current PC. */
+#define emit_label(as) ((as)->mcp)
+
+static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
+{
+ MCode *p = as->mcp;
+ ptrdiff_t delta = (target - p) << 1;
+ lj_assertA(((delta + 0x0080000) >> 20) == 0, "branch target out of range");
+ * --p = ai | ARMC_B((uint32_t)delta & 0x00fffffu);
+ as->mcp = p;
+}
+
+static void emit_branchlink(ASMState *as, ARMIns ai, MCode *target)
+{
+ MCode *p = as->mcp;
+ ptrdiff_t delta = (target - p) << 1;
+ * --p = ai | ARMC_BL((uint32_t)delta & 0x0ffffffu);
+ as->mcp = p;
+}
+
+static void emit_jmp(ASMState *as, MCode *target)
+{
+ MCode *p = as->mcp;
+ ptrdiff_t delta = (target - p) << 1;
+ lj_assertA(((delta + 0x0800000) >> 24) == 0, "jump target out of range");
+ * --p = ARMI_B_T4 | ARMC_BL((uint32_t)delta & 0x00ffffffu);
+ as->mcp = p;
+}
+
+static void emit_call(ASMState *as, void *target)
+{
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = ((char *)target - (char *)p) - 4;
+ if ((((delta >> 1) + 0x00100000) >> 21) == 0) {
+ /* Only Thumb code is allowed */
+ *p = ARMI_BL | ARMC_BL((uint32_t)(delta >> 1));
+ }
+ else {
+ /* Target out of range: need indirect call. But don't use R0-R3. */
+ Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12 + 1));
+ *p = ARMI_BLXr | ARMF_M2(r);
+ }
+}
+
+/* -- Emit generic operations --------------------------------------------- */
+
+/* Generic move between two regs. */
+static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
+{
+#if LJ_SOFTFP
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
+#else
+ if (dst >= RID_MAX_GPR) {
+ emit_tm(as,
+ irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
+ (dst & 15),
+ (src & 15));
+ return;
+ }
+#endif
+ if (as->mcp != as->mcloop) {
+ /* Swap early registers for loads/stores. */
+ MCode ins = *as->mcp, swp = (src ^ dst);
+ if ((ins & 0x0fc0ff80) == 0x0000f800) {
+ if (!((ins ^ dst) & 0x0000000f))
+ *as->mcp = ins ^ swp; /* Swap N in load/store. */
+ if (!(ins & 0x00000010) && !((ins ^ (dst << 28)) & 0xf0000000))
+ *as->mcp = ins ^ (swp << 28); /* Swap D in store. */
+ }
+ }
+ emit_dm(as, ARMI_MOV, dst, src);
+}
+
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+{
+#if LJ_SOFTFP
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
+#else
+ if (r >= RID_MAX_GPR)
+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
+ else
+#endif
+ emit_lso(as, ARMI_LDR, r, base, ofs);
+}
+
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+{
+#if LJ_SOFTFP
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
+#else
+ if (r >= RID_MAX_GPR)
+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
+ else
+#endif
+ emit_lso(as, ARMI_STR, r, base, ofs);
+}
+
+/* Emit an arithmetic/logic operation with a constant operand. */
+static void emit_opk(ASMState *as,
+ ARMIns ai,
+ Reg dest,
+ Reg src,
+ int32_t i,
+ RegSet allow)
+{
+ uint32_t k = emit_isk12(ai, i);
+ if (k)
+ emit_dn(as, ai ^ k, dest, src);
+ else
+ emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
+}
+
+/* Add offset to pointer. */
+static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
+{
+ if (ofs)
+ emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r));
+}
+
+#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
+
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 32b3861a..10644724 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -107,7 +107,7 @@
#define JIT_P_sizemcode_DEFAULT 64
#else
/* Could go as low as 4K, but the mmap() overhead would be rather high. */
-#define JIT_P_sizemcode_DEFAULT 32
+#define JIT_P_sizemcode_DEFAULT 8
#endif
/* Optimization parameters and their defaults. Length is a char in octal! */
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 163aada4..7ea1fe2f 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -45,6 +45,8 @@ void lj_mcode_sync(void *start, void *end)
sys_icache_invalidate(start, (char *)end-(char *)start);
#elif LJ_TARGET_PPC
lj_vm_cachesync(start, end);
+#elif LJ_TARGET_NUTTX
+ up_invalidate_icache_all();
#elif defined(__GNUC__) || defined(__clang__)
__clear_cache(start, end);
#else
@@ -86,6 +88,50 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
}
+#elif LJ_TARGET_NUTTX
+
+#include <nuttx/config.h>
+#include <nuttx/mm/mm.h>
+
+static bool initialized = false;
+static struct mm_heap_s *g_mcode_heap;
+
+#define MCPROT_RW 0
+#define MCPROT_RX 0
+#define MCPROT_RWX 0
+
+static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
+{
+ UNUSED(J); UNUSED(prot);
+
+ if (!initialized) {
+ static uint8_t buffer[CONFIG_LUAJIT_MCODE_SIZE]
+ locate_data(CONFIG_LUAJIT_MCODE_SECTION_NAME);
+ g_mcode_heap = mm_initialize("mcode",
+ (void *)buffer,
+ CONFIG_LUAJIT_MCODE_SIZE);
+ initialized = true;
+ }
+
+ void *p = mm_malloc(g_mcode_heap, sz);
+ if (p == NULL) {
+ if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
+ }
+ return p;
+}
+
+static void mcode_free(jit_State *J, void *p, size_t sz)
+{
+ UNUSED(J); UNUSED(sz);
+ mm_free(g_mcode_heap, p);
+}
+
+static int mcode_setprot(void *p, size_t sz, int prot)
+{
+ UNUSED(p); UNUSED(sz); UNUSED(prot);
+ return 0;
+}
+
#elif LJ_TARGET_POSIX
#include <sys/mman.h>
diff --git a/src/lj_target.h b/src/lj_target.h
index 19716928..8cee29ea 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -137,7 +137,11 @@ typedef uint32_t RegCost;
#if LJ_TARGET_X86ORX64
#include "lj_target_x86.h"
#elif LJ_TARGET_ARM
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
+#include "lj_target_armv7m.h"
+#else
#include "lj_target_arm.h"
+#endif
#elif LJ_TARGET_ARM64
#include "lj_target_arm64.h"
#elif LJ_TARGET_PPC
diff --git a/src/lj_target_armv7m.h b/src/lj_target_armv7m.h
new file mode 100755
index 00000000..5dc6d488
--- /dev/null
+++ b/src/lj_target_armv7m.h
@@ -0,0 +1,315 @@
+/*
+** Definitions for ARMv7-M CPUs.
+** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_ARMV7M_H
+#define _LJ_TARGET_ARMV7M_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#define GPRDEF(_) \
+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
+ _(R8) _(R9) _(R10) _(R11) _(R12) _(SP) _(LR) _(PC)
+#if LJ_SOFTFP
+#define FPRDEF(_)
+#else
+#define FPRDEF(_) \
+ _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
+ _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15)
+#endif
+#define VRIDDEF(_)
+
+#define RIDENUM(name) RID_##name,
+
+enum {
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
+ RID_MAX,
+ RID_TMP = RID_LR,
+
+ /* Calling conventions. */
+ RID_RET = RID_R0,
+ RID_RETLO = RID_R0,
+ RID_RETHI = RID_R1,
+#if LJ_SOFTFP
+ RID_FPRET = RID_R0,
+#else
+ RID_FPRET = RID_D0,
+#endif
+
+ /* These definitions must match with the *.dasc file(s): */
+ RID_BASE = RID_R9, /* Interpreter BASE. */
+ RID_LPC = RID_R6, /* Interpreter PC. */
+ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
+ RID_LREG = RID_R8, /* Interpreter L. */
+
+ /* Register ranges [min, max) and number of registers. */
+ RID_MIN_GPR = RID_R0,
+ RID_MAX_GPR = RID_PC + 1,
+ RID_MIN_FPR = RID_MAX_GPR,
+#if LJ_SOFTFP
+ RID_MAX_FPR = RID_MIN_FPR,
+#else
+ RID_MAX_FPR = RID_D15 + 1,
+#endif
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
+};
+
+#define RID_NUM_KREF RID_NUM_GPR
+#define RID_MIN_KREF RID_R0
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* Make use of all registers, except sp, lr and pc. */
+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
+#define RSET_GPREVEN \
+ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
+ RID2RSET(RID_R8)|RID2RSET(RID_R10))
+#define RSET_GPRODD \
+ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
+ RID2RSET(RID_R9)|RID2RSET(RID_R11))
+#if LJ_SOFTFP
+#define RSET_FPR 0
+#else
+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
+#endif
+#define RSET_ALL (RSET_GPR|RSET_FPR)
+#define RSET_INIT RSET_ALL
+
+/* ABI-specific register sets. lr is an implicit scratch register. */
+#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
+#ifdef __APPLE__
+#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
+#else
+#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
+#endif
+#if LJ_SOFTFP
+#define RSET_SCRATCH_FPR 0
+#else
+#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
+#endif
+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
+#define REGARG_FIRSTGPR RID_R0
+#define REGARG_LASTGPR RID_R3
+#define REGARG_NUMGPR 4
+#if LJ_ABI_SOFTFP
+#define REGARG_FIRSTFPR 0
+#define REGARG_LASTFPR 0
+#define REGARG_NUMFPR 0
+#else
+#define REGARG_FIRSTFPR RID_D0
+#define REGARG_LASTFPR RID_D7
+#define REGARG_NUMFPR 8
+#endif
+
+/* -- Spill slots --------------------------------------------------------- */
+
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
+**
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
+** This definition must match with the *.dasc file(s).
+**
+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
+*/
+#define SPS_FIXED 2
+#define SPS_FIRST 2
+
+#define SPOFS_TMP 0
+
+#define sps_scale(slot) (4 * (int32_t)(slot))
+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
+
+/* -- Exit state ---------------------------------------------------------- */
+
+/* This definition must match with the *.dasc file(s). */
+typedef struct {
+#if !LJ_SOFTFP
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
+#endif
+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ int32_t spill[256]; /* Spill slots. */
+} ExitState;
+
+/* PC after instruction that caused an exit. Used to find the trace number. */
+#define EXITSTATE_PCREG RID_PC
+/* Highest exit + 1 indicates stack check. */
+#define EXITSTATE_CHECKEXIT 1
+
+#define EXITSTUB_SPACING 4
+#define EXITSTUBS_PER_GROUP 32
+
+/* -- Instructions -------------------------------------------------------- */
+
+/* Instruction fields. */
+#define ARMF_CC(ai, cc) ((ai) | ((cc) << 6))
+#define ARMF_N(r) ((r)<<0)
+#define ARMF_T(r) ((r)<<28)
+#define ARMF_D(r) ((r)<<24)
+#define ARMF_M(r) ((r)<<16)
+#define ARMF_M2(r) ((r)<<19) // BLXr
+#define ARMF_SH(sh, n) (((sh)<<20)|(((n)&0x3)<<22)|((((n)>>2)&0x7)<<28))
+#define ARMF_LSL(n) ((n&0x3)<<20)
+#define ARMF_RSH(sh, r) (0xf0000000|((sh)<<5)|ARMF_M(r))
+
+/* Instruction compositing */
+#define ARMC_K12(arg1, arg2) (((arg1)^ARMI_K12)| \
+ (((arg2)&0xff)<<16)| \
+ (((arg2)&0x700)<<20)| \
+ (((arg2)&0x800)>>1))
+#define ARMC_B(arg) ((((arg)&0x7ff)<<16)| \
+ (((arg)&0x1f800)>>11)| \
+ (((arg)&0x20000)<<12)| \
+ (((arg)&0x40000)<<9)| \
+ (((arg)&0x80000)>>9))
+#define ARMC_BL(arg) ((((arg)&0x7ff)<<16)| \
+ (((arg)&0x1ff800)>>11)| \
+ (((~(((arg)&0x200000)>>21)&0x1)^((((arg)&0x800000)>>23)&0x1))<<27)| \
+ (((~(((arg)&0x400000)>>22)&0x1)^((((arg)&0x800000)>>23)&0x1))<<29)| \
+ (((((arg)&0x800000)>>23)&0x1)<<10))
+#define ARMC_BL_READ(ins) (((((ins)&0x07ff0000u)>>16))| \
+ (((ins)&0x000003ffu)<<11)| \
+ (((~((((ins)&0x08000000u)>>27)^(((ins)&0x00000400u)>>10)))&0x1)<<21)| \
+ (((~((((ins)&0x20000000u)>>29)^(((ins)&0x00000400u)>>10)))&0x1)<<22)| \
+ ((((ins)&0x00000400u)>>10)<<23))
+#define ARMI_IT(cc) *--as->mcp = (0xbf08bf00u|(((cc)&0xf)<<20))
+
+
+typedef enum ARMIns {
+ ARMI_CCAL = 0x000003c0,
+ ARMI_S = 0x00000010,
+ ARMI_K12 = 0x00001a00,
+
+ ARMI_LS_W = 0x01000000,
+ ARMI_LS_U = 0x02000000,
+ ARMI_LS_P = 0x04000000,
+ ARMI_LS_1 = 0x08000000,
+ ARMI_LS_I = 0x00000080,
+ ARMI_LSX_W = 0x00000020,
+ ARMI_LSX_U = 0x00000080,
+ ARMI_LSX_P = 0x00000100,
+
+ ARMI_AND = 0x0000ea00,
+ ARMI_EOR = 0x0000ea80,
+ ARMI_SUB = 0x0000eba0,
+ ARMI_RSB = 0x0000ebc0,
+ ARMI_ADD = 0x0000eb00,
+ ARMI_ADC = 0x0000eb40,
+ ARMI_SBC = 0x0000eb60,
+ // ARMI_RSC = 0xe0e00000,
+ ARMI_TST = 0x0f00ea10,
+ ARMI_TEQ = 0x0f00ea90,
+ ARMI_CMP = 0x0f00ebb0,
+ ARMI_CMN = 0x0f00eb10,
+ ARMI_ORR = 0x0000ea40,
+ ARMI_MOV = 0x0000ea4f,
+ ARMI_BIC = 0x0000ea20,
+ ARMI_MVN = 0x0000ea6f,
+ ARMI_NOP = 0xbf00bf00,
+
+ ARMI_MUL = 0xf000fb00,
+ ARMI_SMULL = 0x0000fb80,
+
+ ARMI_LDR = 0x0000f850,
+ ARMI_LDRB = 0x0000f810,
+ ARMI_LDRH = 0x0000f830,
+ ARMI_LDRSB = 0x0000f910,
+ ARMI_LDRSH = 0x0000f930,
+ ARMI_LDRD = 0x0000e850,
+ ARMI_STR = 0x0000f840,
+ ARMI_STRB = 0x0000f800,
+ ARMI_STRH = 0x0000f820,
+ ARMI_STRD = 0x0000e840,
+ ARMI_PUSH = 0x0000e92d,
+
+ ARMI_B = 0x8000f000,
+ ARMI_B_T4 = 0x9000f000,
+ ARMI_BL = 0xd000f000,
+ ARMI_BLXr = 0x4780bf00,
+
+ /* ARMv6 */
+ ARMI_REV = 0xf080fa90,
+ ARMI_SXTB = 0xf080fa4f,
+ ARMI_SXTH = 0xf080fa0f,
+ ARMI_UXTB = 0xf080fa5f,
+ ARMI_UXTH = 0xf080fa1f,
+
+ /* ARMv6T2 */
+ ARMI_MOVW = 0x0000f240,
+ ARMI_MOVT = 0x0000f2c0,
+
+ /* VFP */
+ ARMI_VMOV_D = 0x0b40eeb0,
+ ARMI_VMOV_S = 0x0a40eeb0,
+ ARMI_VMOVI_D = 0x0b00eeb0,
+
+ ARMI_VMOV_R_S = 0x0a10ee10,
+ ARMI_VMOV_S_R = 0x0a10ee00,
+ ARMI_VMOV_RR_D = 0x0b10ec50,
+ ARMI_VMOV_D_RR = 0x0b10ec40,
+
+ ARMI_VADD_D = 0x0b00ee30,
+ ARMI_VSUB_D = 0x0b40ee30,
+ ARMI_VMUL_D = 0x0b00ee20,
+ ARMI_VMLA_D = 0x0b00ee00,
+ ARMI_VMLS_D = 0x0b40ee00,
+ ARMI_VNMLS_D = 0x0b00ee10,
+ ARMI_VDIV_D = 0x0b00ee80,
+
+ ARMI_VABS_D = 0x0bc0eeb0,
+ ARMI_VNEG_D = 0x0b40eeb1,
+ ARMI_VSQRT_D = 0x0bc0eeb1,
+
+ ARMI_VCMP_D = 0x0b40eeb4,
+ ARMI_VCMPZ_D = 0x0b40eeb5,
+
+ ARMI_VMRS = 0xfa10eef1,
+
+ ARMI_VCVT_S32_F32 = 0x0ac0eebd,
+ ARMI_VCVT_S32_F64 = 0x0bc0eebd,
+ ARMI_VCVT_U32_F32 = 0x0ac0eebc,
+ ARMI_VCVT_U32_F64 = 0x0bc0eebc,
+ ARMI_VCVT_F32_S32 = 0x0ac0eeb8,
+ ARMI_VCVT_F64_S32 = 0x0bc0eeb8,
+ ARMI_VCVT_F32_U32 = 0x0a40eeb8,
+ ARMI_VCVT_F64_U32 = 0x0b40eeb8,
+ ARMI_VCVT_F32_F64 = 0x0bc0eeb7,
+ ARMI_VCVT_F64_F32 = 0x0ac0eeb7,
+
+ ARMI_VLDR_S = 0x0a00ed10,
+ ARMI_VLDR_D = 0x0b00ed10,
+ ARMI_VSTR_S = 0x0a00ed00,
+ ARMI_VSTR_D = 0x0b00ed00,
+} ARMIns;
+
+typedef enum ARMShift {
+ ARMSH_LSL,
+ ARMSH_LSR,
+ ARMSH_ASR,
+ ARMSH_ROR
+} ARMShift;
+
+/* ARM condition codes. */
+typedef enum ARMCC {
+ CC_EQ,
+ CC_NE,
+ CC_CS,
+ CC_CC,
+ CC_MI,
+ CC_PL,
+ CC_VS,
+ CC_VC,
+ CC_HI,
+ CC_LS,
+ CC_GE,
+ CC_LT,
+ CC_GT,
+ CC_LE,
+ CC_AL,
+ CC_HS = CC_CS,
+ CC_LO = CC_CC
+} ARMCC;
+
+#endif
diff --git a/src/vm_armv7m.dasc b/src/vm_armv7m.dasc
new file mode 100755
index 00000000..13266007
--- /dev/null
+++ b/src/vm_armv7m.dasc
@@ -0,0 +1,4901 @@
+|// Low-level VM code for ARMV7M CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h
+|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch armv7m
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|// Note: The ragged indentation of the instructions is intentional.
+|// The starting columns indicate data dependencies.
+|
+|//-----------------------------------------------------------------------
+|
+|.macro ldrd_i, rt, rt2, rn, rm
+| add rt, rn, rm
+| ldm rt, {rt, rt2}
+|.endmacro
+|.macro ldrd_iw, rt, rt2, rn, rm
+| add rn, rn, rm
+| ldrd rt, rt2, [rn]
+|.endmacro
+|
+|.macro ldrdlo_i, rt, rt2, rn, rm
+| itt lo
+| addlo rt, rn, rm
+| ldmlo rt, {rt, rt2}
+|.endmacro
+|.macro ldrdlo_iw, rt, rt2, rn, rm
+| itt lo
+| addlo rn, rn, rm
+| ldrdlo rt, rt2, [rn]
+|.endmacro
+|
+|.macro strd_i, rt, rt2, rn, rm
+| add rn, rn, rm
+| strd rt, rt2, [rn]
+| sub rn, rn, rm
+|.endmacro
+|
+|.macro strdne_i, rt, rt2, rn, rm
+| ittt ne
+| addne rn, rn, rm
+| strdne rt, rt2, [rn]
+| subne rn, rn, rm
+|.endmacro
+|.macro strdls_i, rt, rt2, rn, rm
+| ittt ls
+| addls rn, rn, rm
+| strdls rt, rt2, [rn]
+| subls rn, rn, rm
+|.endmacro
+|.macro strdhi_i, rt, rt2, rn, rm
+| ittt hi
+| addhi rn, rn, rm
+| strdhi rt, rt2, [rn]
+| subhi rn, rn, rm
+|.endmacro
+|
+|// Fixed register assignments for the interpreter.
+|
+|// The following must be C callee-save.
+|.define MASKR8, r4 // 255*8 constant for fast bytecode decoding.
+|.define KBASE, r5 // Constants of current Lua function.
+|.define PC, r6 // Next PC.
+|.define DISPATCH,r7 // Opcode dispatch table.
+|.define LREG, r8 // Register holding lua_State (also in SAVE_L).
+|
+|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+.
+|.define BASE, r9 // Base of current Lua stack frame.
+|
+|// The following temporaries are not saved across C calls, except for RA/RC.
+|.define RA, r10 // Callee-save.
+|.define RC, r11 // Callee-save.
+|.define RB, r12
+|.define OP, r12 // Overlaps RB, must not be lr.
+|.define INS, lr
+|
+|// Calling conventions. Also used as temporaries.
+|.define CARG1, r0
+|.define CARG2, r1
+|.define CARG3, r2
+|.define CARG4, r3
+|
+|.define CRET1, r0
+|.define CRET2, r1
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.define SAVE_R4, [sp, #28]
+|.define CFRAME_SPACE, #28
+|.define SAVE_ERRF, [sp, #24]
+|.define SAVE_NRES, [sp, #20]
+|.define SAVE_CFRAME, [sp, #16]
+|.define SAVE_L, [sp, #12]
+|.define SAVE_PC, [sp, #8]
+|.define SAVE_MULTRES, [sp, #4]
+|.define ARG5, [sp]
+|
+|.define TMPDhi, [sp, #4]
+|.define TMPDlo, [sp]
+|.define TMPD, [sp]
+|.define TMPDp, sp
+|
+|.if FPU
+|.macro saveregs
+| push {r5, r6, r7, r8, r9, r10, r11, lr}
+| vpush {d8-d15}
+| sub sp, sp, CFRAME_SPACE+4
+| str r4, SAVE_R4
+|.endmacro
+|.macro restoreregs_ret
+| ldr r4, SAVE_R4
+| add sp, sp, CFRAME_SPACE+4
+| vpop {d8-d15}
+| pop {r5, r6, r7, r8, r9, r10, r11, pc}
+|.endmacro
+|.else
+|.macro saveregs
+| push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+| sub sp, sp, CFRAME_SPACE
+|.endmacro
+|.macro restoreregs_ret
+| add sp, sp, CFRAME_SPACE
+| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+|.endmacro
+|.endif
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State, LREG
+|.type GL, global_State
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS8, int
+|.type TRACE, GCtrace
+|.type SBUF, SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Trap for not-yet-implemented parts.
+|.macro NYI; bkpt #0; .endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Access to frame relative to BASE.
+|.define FRAME_FUNC, #-8
+|.define FRAME_PC, #-4
+|
+|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro
+|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro
+|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro
+|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro
+|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro
+|
+|// Instruction fetch.
+|.macro ins_NEXT1
+| ldrb OP, [PC]
+|.endmacro
+|.macro ins_NEXT2
+| ldr INS, [PC], #4
+|.endmacro
+|// Instruction decode+dispatch.
+|.macro ins_NEXT3
+| ldr OP, [DISPATCH, OP, lsl #2]
+| decode_RA8 RA, INS
+| decode_RD RC, INS
+| bx OP
+|.endmacro
+|.macro ins_NEXT
+| ins_NEXT1
+| ins_NEXT2
+| ins_NEXT3
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+| .define ins_next1, ins_NEXT1
+| .define ins_next2, ins_NEXT2
+| .define ins_next3, ins_NEXT3
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| // Affects only certain kinds of benchmarks (and only with -j off).
+| .macro ins_next
+| b ->ins_next
+| .endmacro
+| .macro ins_next1
+| .endmacro
+| .macro ins_next2
+| .endmacro
+| .macro ins_next3
+| b ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|// Avoid register name substitution for field name.
+#define field_pc pc
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+| ldr PC, LFUNC:CARG3->field_pc
+| ldrb OP, [PC] // STALL: load PC. early PC.
+| ldr INS, [PC], #4
+| ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP.
+| decode_RA8 RA, INS
+| add RA, RA, BASE
+| bx OP
+|.endmacro
+|
+|.macro ins_call
+| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
+| str PC, [BASE, FRAME_PC]
+| ins_callt // STALL: locked PC.
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to test operand types.
+|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro
+|.macro checktpeq, reg, tp; it eq; cmneq reg, #-tp; .endmacro
+|.macro checktpne, reg, tp; it ne; cmnne reg, #-tp; .endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|.macro hotcheck, delta
+| lsr CARG1, PC, #1
+| and CARG1, CARG1, #126
+| sub CARG1, CARG1, #-GG_DISP2HOT
+| ldrh CARG2, [DISPATCH, CARG1]
+| subs CARG2, CARG2, #delta
+| strh CARG2, [DISPATCH, CARG1]
+|.endmacro
+|
+|.macro hotloop
+| hotcheck HOTCOUNT_LOOP
+| blo ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall
+| hotcheck HOTCOUNT_CALL
+| blo ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state.
+|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro
+|.macro st_vmstate, reg; push {r12}; sub r12, DISPATCH, #-DISPATCH_GL(vmstate); str reg, [r12]; pop {r12}; .endmacro
+|
+|// Move table write barrier back. Overwrites mark and tmp.
+|.macro barrierback, tab, mark, tmp
+| sub tmp, DISPATCH, #-DISPATCH_GL(gc.grayagain)
+| ldr tmp, [tmp]
+| str tmp, tab->gclist
+| sub tmp, DISPATCH, #-DISPATCH_GL(gc.grayagain)
+| bic mark, mark, #LJ_GC_BLACK // black2gray(tab)
+| str tab, [tmp]
+| strb mark, tab->marked
+|.endmacro
+|
+|.macro .IOS, a, b
+|.if IOS
+| a, b
+|.endif
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+#if !LJ_DUALNUM
+#error "Only dual-number mode supported for ARM target"
+#endif
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Return handling ----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+ | // See vm_return. Also: RB = previous base.
+ | tst PC, #FRAME_P
+ | beq ->cont_dispatch
+ |
+ | // Return from pcall or xpcall fast func.
+ | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
+ | mvn CARG2, #~LJ_TTRUE
+ | mov BASE, RB
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | str CARG2, [RA, FRAME_PC] // Prepend true to results.
+ | sub RA, RA, #8
+ |
+ |->vm_returnc:
+ | adds RC, RC, #8 // RC = (nresults+1)*8.
+ | mov CRET1, #LUA_YIELD
+ | beq ->vm_unwind_c_eh
+ | str RC, SAVE_MULTRES
+ | ands CARG1, PC, #FRAME_TYPE
+ | beq ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
+ | // CARG1 = PC & FRAME_TYPE
+ | bic RB, PC, #FRAME_TYPEP
+ | cmp CARG1, #FRAME_C
+ | sub RB, BASE, RB // RB = previous base.
+ | bne ->vm_returnp
+ |
+ | str RB, L->base
+ | ldr KBASE, SAVE_NRES
+ | mv_vmstate CARG4, C
+ | sub BASE, BASE, #8
+ | subs CARG3, RC, #8
+ | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8
+ | st_vmstate CARG4
+ | beq >2
+ |1:
+ | subs CARG3, CARG3, #8
+ | ldrd CARG1, CARG2, [RA], #8
+ | strd CARG1, CARG2, [BASE], #8
+ | bne <1
+ |2:
+ | cmp KBASE, RC // More/less results wanted?
+ | bne >6
+ |3:
+ | str BASE, L->top // Store new top.
+ |
+ |->vm_leave_cp:
+ | ldr RC, SAVE_CFRAME // Restore previous C frame.
+ | mov CRET1, #0 // Ok return status for vm_pcall.
+ | str RC, L->cframe
+ |
+ |->vm_leave_unw:
+ | restoreregs_ret
+ |
+ |6:
+ | blt >7 // Less results wanted?
+ | // More results wanted. Check stack size and fill up results with nil.
+ | ldr CARG3, L->maxstack
+ | mvn CARG2, #~LJ_TNIL
+ | cmp BASE, CARG3
+ | bhs >8
+ | str CARG2, [BASE, #4]
+ | add RC, RC, #8
+ | add BASE, BASE, #8
+ | b <2
+ |
+ |7: // Less results wanted.
+ | sub CARG1, RC, KBASE
+ | cmp KBASE, #0 // LUA_MULTRET+1 case?
+ | it ne
+ | subne BASE, BASE, CARG1 // Either keep top or shrink it.
+ | b <3
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | str BASE, L->top // Save current top held in BASE (yes).
+ | lsr CARG2, KBASE, #3
+ | mov CARG1, L
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldr BASE, L->top // Need the (realloced) L->top in BASE.
+ | b <2
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | mov sp, CARG1
+ | mov CRET1, CARG2
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | ldr L, SAVE_L
+ | mv_vmstate CARG4, C
+ | ldr GL:CARG3, L->glref
+ | str CARG4, GL:CARG3->vmstate
+ | b ->vm_leave_unw
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | bic CARG1, CARG1, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated.
+ | mov sp, CARG1
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | ldr L, SAVE_L
+ | mov MASKR8, #255
+ | mov RC, #16 // 2 results: false + error message.
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
+ | ldr BASE, L->base
+ | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
+ | mvn CARG1, #~LJ_TFALSE
+ | sub RA, BASE, #8 // Results start at BASE-8.
+ | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
+ | add DISPATCH, DISPATCH, #GG_G2DISP
+ | mv_vmstate CARG2, INTERP
+ | str CARG1, [BASE, #-4] // Prepend false to error message.
+ | st_vmstate CARG2
+ | b ->vm_returnc
+ |
+ |->vm_unwind_ext: // Complete external unwind.
+#if !LJ_NO_UNWIND
+ | push {r0, r1, r2, lr}
+ | bl extern _Unwind_Complete
+ | ldr r0, [sp]
+ | bl extern _Unwind_DeleteException
+ | pop {r0, r1, r2, lr}
+ | mov r0, r1
+ | bx r2
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+ | // CARG1 = L
+ | mov CARG2, #LUA_MINSTACK
+ | b >2
+ |
+ |->vm_growstack_l: // Grow stack for Lua function.
+ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
+ | add RC, BASE, RC
+ | sub RA, RA, BASE
+ | mov CARG1, L
+ | str BASE, L->base
+ | add PC, PC, #4 // Must point after first instruction.
+ | str RC, L->top
+ | lsr CARG2, RA, #3
+ |2:
+ | // L->base = new base, L->top = top
+ | str PC, SAVE_PC
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldr BASE, L->base
+ | ldr RC, L->top
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | sub NARGS8:RC, RC, BASE
+ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | mov L, CARG1
+ | ldr DISPATCH, L:CARG1->glref // Setup pointer to dispatch table.
+ | mov BASE, CARG2
+ | add DISPATCH, DISPATCH, #GG_G2DISP
+ | str L, SAVE_L
+ | mov PC, #FRAME_CP
+ | str CARG3, SAVE_NRES
+ | add CARG2, sp, #CFRAME_RESUME
+ | ldrb CARG1, L->status
+ | str CARG3, SAVE_ERRF
+ | str L, SAVE_PC // Any value outside of bytecode is ok.
+ | str CARG3, SAVE_CFRAME
+ | cmp CARG1, #0
+ | str CARG2, L->cframe
+ | beq >3
+ |
+ | // Resume after yield (like a return).
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
+ | mov RA, BASE
+ | ldr BASE, L->base
+ | ldr CARG1, L->top
+ | mov MASKR8, #255
+ | strb CARG3, L->status
+ | sub RC, CARG1, BASE
+ | ldr PC, [BASE, FRAME_PC]
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
+ | mv_vmstate CARG2, INTERP
+ | add RC, RC, #8
+ | ands CARG1, PC, #FRAME_TYPE
+ | st_vmstate CARG2
+ | str RC, SAVE_MULTRES
+ | beq ->BC_RET_Z
+ | b ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | mov PC, #FRAME_CP
+ | str CARG4, SAVE_ERRF
+ | b >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1)
+ | saveregs
+ | mov PC, #FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | ldr RC, L:CARG1->cframe
+ | str CARG3, SAVE_NRES
+ | mov L, CARG1
+ | str CARG1, SAVE_L
+ | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
+ | mov BASE, CARG2
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | str RC, SAVE_CFRAME
+ | add DISPATCH, DISPATCH, #GG_G2DISP
+ | str sp, L->cframe // Add our C frame to cframe chain.
+ |
+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
+ | ldr RB, L->base // RB = old base (for vmeta_call).
+ | ldr CARG1, L->top
+ | mov MASKR8, #255
+ | add PC, PC, BASE
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
+ | sub PC, PC, RB // PC = frame delta + frame type
+ | mv_vmstate CARG2, INTERP
+ | sub NARGS8:RC, CARG1, BASE
+ | st_vmstate CARG2
+ |
+ |->vm_call_dispatch:
+ | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
+ | ldrd CARG3, CARG4, [BASE, FRAME_FUNC]
+ | checkfunc CARG4, ->vmeta_call
+ |
+ |->vm_call_dispatch_f:
+ | ins_call
+ | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+ | mov L, CARG1
+ | ldr RA, L:CARG1->stack
+ | str CARG1, SAVE_L
+ | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
+ | ldr RB, L->top
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | ldr RC, L->cframe
+ | add DISPATCH, DISPATCH, #GG_G2DISP
+ | sub RA, RA, RB // Compute -savestack(L, L->top).
+ | mov RB, #0
+ | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | str RB, SAVE_ERRF // No error function.
+ | str RC, SAVE_CFRAME
+ | str sp, L->cframe // Add our C frame to cframe chain.
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
+ | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ | movs BASE, CRET1
+ | mov PC, #FRAME_CP
+ | bne <3 // Else continue with the call.
+ | b ->vm_leave_cp // No base? Just remove C frame.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
+ | ldr LFUNC:CARG3, [RB, FRAME_FUNC]
+ | ldr CARG1, [BASE, #-16] // Get continuation.
+ | mov CARG4, BASE
+ | mov BASE, RB // Restore caller BASE.
+ |.if FFI
+ | cmp CARG1, #1
+ |.endif
+ | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC].
+ | mvn INS, #~LJ_TNIL
+ | add CARG2, RA, RC
+ | str INS, [CARG2, #-4] // Ensure one valid arg.
+ |.if FFI
+ | bls >1
+ |.endif
+ | ldr CARG3, LFUNC:CARG3->field_pc
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | // BASE = base, RA = resultptr, CARG4 = meta base
+ | bx CARG1
+ |
+ |.if FFI
+ |1:
+ | beq ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: tailcall from C function.
+ | sub CARG4, CARG4, #16
+ | sub RC, CARG4, BASE
+ | b ->vm_call_tail
+ |.endif
+ |
+ |->cont_cat: // RA = resultptr, CARG4 = meta base
+ | ldr INS, [PC, #-4]
+ | sub CARG2, CARG4, #16
+ | ldrd CARG3, CARG4, [RA]
+ | str BASE, L->base
+ | decode_RB8 RC, INS
+ | decode_RA8 RA, INS
+ | add CARG1, BASE, RC
+ | subs CARG1, CARG2, CARG1
+ | itt ne
+ | strdne CARG3, CARG4, [CARG2]
+ | movne CARG3, CARG1
+ | bne ->BC_CAT_Z
+ | strd_i CARG3, CARG4, BASE, RA
+ | b ->cont_nop
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets1:
+ | add CARG2, BASE, RB
+ | b >2
+ |
+ |->vmeta_tgets:
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv)
+ | mvn CARG4, #~LJ_TTAB
+ | str TAB:RB, [CARG2]
+ | str CARG4, [CARG2, #4]
+ |2:
+ | mvn CARG4, #~LJ_TSTR
+ | str STR:RC, TMPDlo
+ | str CARG4, TMPDhi
+ | mov CARG3, TMPDp
+ | b >1
+ |
+ |->vmeta_tgetb: // RC = index
+ | decode_RB8 RB, INS
+ | str RC, TMPDlo
+ | mvn CARG4, #~LJ_TISNUM
+ | add CARG2, BASE, RB
+ | str CARG4, TMPDhi
+ | mov CARG3, TMPDp
+ | b >1
+ |
+ |->vmeta_tgetv:
+ | add CARG2, BASE, RB
+ | add CARG3, BASE, RC
+ |1:
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | .IOS ldr BASE, L->base
+ | cmp CRET1, #0
+ | beq >3
+ | ldrd CARG3, CARG4, [CRET1]
+ | ins_next1
+ | ins_next2
+ | strd_i CARG3, CARG4, BASE, RA
+ | ins_next3
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | rsb CARG1, BASE, #FRAME_CONT
+ | ldr BASE, L->top
+ | mov NARGS8:RC, #16 // 2 args for func(t, k).
+ | str PC, [BASE, #-12] // [cont|PC]
+ | add PC, CARG1, BASE
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+ | .IOS mov RC, BASE
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | .IOS mov BASE, RC
+ | cmp CRET1, #0
+ | ite ne
+ | ldrdne CARG1, CARG2, [CRET1]
+ | mvneq CARG2, #~LJ_TNIL
+ | b ->BC_TGETR_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets1:
+ | add CARG2, BASE, RB
+ | b >2
+ |
+ |->vmeta_tsets:
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv)
+ | mvn CARG4, #~LJ_TTAB
+ | str TAB:RB, [CARG2]
+ | str CARG4, [CARG2, #4]
+ |2:
+ | mvn CARG4, #~LJ_TSTR
+ | str STR:RC, TMPDlo
+ | str CARG4, TMPDhi
+ | mov CARG3, TMPDp
+ | b >1
+ |
+ |->vmeta_tsetb: // RC = index
+ | decode_RB8 RB, INS
+ | str RC, TMPDlo
+ | mvn CARG4, #~LJ_TISNUM
+ | add CARG2, BASE, RB
+ | str CARG4, TMPDhi
+ | mov CARG3, TMPDp
+ | b >1
+ |
+ |->vmeta_tsetv:
+ | add CARG2, BASE, RB
+ | add CARG3, BASE, RC
+ |1:
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | .IOS ldr BASE, L->base
+ | cmp CRET1, #0
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | beq >3
+ | ins_next1
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | strd CARG3, CARG4, [CRET1]
+ | ins_next2
+ | ins_next3
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | rsb CARG1, BASE, #FRAME_CONT
+ | ldr BASE, L->top
+ | mov NARGS8:RC, #24 // 3 args for func(t, k, v).
+ | strd CARG3, CARG4, [BASE, #16] // Copy value to third argument.
+ | str PC, [BASE, #-12] // [cont|PC]
+ | add PC, CARG1, BASE
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+ | str BASE, L->base
+ | .IOS mov RC, BASE
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
+ | .IOS mov BASE, RC
+ | b ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | mov CARG1, L
+ | sub PC, PC, #4
+ | mov CARG2, RA
+ | str BASE, L->base
+ | mov CARG3, RC
+ | str PC, SAVE_PC
+ | decode_OP CARG4, INS
+ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // Returns 0/1 or TValue * (metamethod).
+ |3:
+ | .IOS ldr BASE, L->base
+ | cmp CRET1, #1
+ | bhi ->vmeta_binop
+ |4:
+ | ldrh RB, [PC, #2]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | it hs
+ | subhs PC, RB, #0x20000
+ |->cont_nop:
+ | ins_next
+ |
+ |->cont_ra: // RA = resultptr
+ | ldr INS, [PC, #-4]
+ | ldrd CARG1, CARG2, [RA]
+ | decode_RA8 CARG3, INS
+ | strd_i CARG1, CARG2, BASE, CARG3
+ | b ->cont_nop
+ |
+ |->cont_condt: // RA = resultptr
+ | ldr CARG2, [RA, #4]
+ | mvn CARG1, #~LJ_TTRUE
+ | cmp CARG1, CARG2 // Branch if result is true.
+ | b <4
+ |
+ |->cont_condf: // RA = resultptr
+ | ldr CARG2, [RA, #4]
+ | checktp CARG2, LJ_TFALSE // Branch if result is false.
+ | b <4
+ |
+ |->vmeta_equal:
+ | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |
+ |->vmeta_equal_cd:
+ |.if FFI
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | mov CARG2, INS
+ | str PC, SAVE_PC
+ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |.endif
+ |
+ |->vmeta_istype:
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | lsr CARG2, RA, #3
+ | mov CARG3, RC
+ | str PC, SAVE_PC
+ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | .IOS ldr BASE, L->base
+ | b ->cont_nop
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vn:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | add CARG3, BASE, RB
+ | add CARG4, KBASE, RC
+ | b >1
+ |
+ |->vmeta_arith_nv:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | add CARG4, BASE, RB
+ | add CARG3, KBASE, RC
+ | b >1
+ |
+ |->vmeta_unm:
+ | ldr INS, [PC, #-8]
+ | sub PC, PC, #4
+ | add CARG3, BASE, RC
+ | add CARG4, BASE, RC
+ | b >1
+ |
+ |->vmeta_arith_vv:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | add CARG3, BASE, RB
+ | add CARG4, BASE, RC
+ |1:
+ | decode_OP OP, INS
+ | add CARG2, BASE, RA
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | str OP, ARG5
+ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | .IOS ldr BASE, L->base
+ | cmp CRET1, #0
+ | beq ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
+ | sub CARG2, CRET1, BASE
+ | str PC, [CRET1, #-12] // [cont|PC]
+ | add PC, CARG2, #FRAME_CONT
+ | mov BASE, CRET1
+ | mov NARGS8:RC, #16 // 2 args for func(o1, o2).
+ | b ->vm_call_dispatch
+ |
+ |->vmeta_len:
+ | add CARG2, BASE, RC
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_len // (lua_State *L, TValue *o)
+ | // Returns NULL (retry) or TValue * (metamethod base).
+ | .IOS ldr BASE, L->base
+#if LJ_52
+ | cmp CRET1, #0
+ | bne ->vmeta_binop // Binop call for compatibility.
+ | ldr TAB:CARG1, [BASE, RC]
+ | b ->BC_LEN_Z
+#else
+ | b ->vmeta_binop // Binop call for compatibility.
+#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // RB = old base, BASE = new base, RC = nargs*8
+ | mov CARG1, L
+ | str RB, L->base // This is the callers base!
+ | sub CARG2, BASE, #8
+ | str PC, SAVE_PC
+ | add CARG3, BASE, NARGS8:RC
+ | .IOS mov RA, BASE
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | .IOS mov BASE, RA
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
+ | ins_call
+ |
+ |->vmeta_callt: // Resolve __call for BC_CALLT.
+ | // BASE = old base, RA = new base, RC = nargs*8
+ | mov CARG1, L
+ | str BASE, L->base
+ | sub CARG2, RA, #8
+ | str PC, SAVE_PC
+ | add CARG3, RA, NARGS8:RC
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | .IOS ldr BASE, L->base
+ | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here.
+ | ldr PC, [BASE, FRAME_PC]
+ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
+ | b ->BC_CALLT2_Z
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, RA
+ | str PC, SAVE_PC
+ | bl extern lj_meta_for // (lua_State *L, TValue *base)
+ | .IOS ldr BASE, L->base
+ |.if JIT
+ | ldrb OP, [PC, #-4]
+ |.endif
+ | ldr INS, [PC, #-4]
+ |.if JIT
+ | cmp OP, #BC_JFORI
+ |.endif
+ | decode_RA8 RA, INS
+ | decode_RD RC, INS
+ |.if JIT
+ | beq =>BC_JFORI
+ |.endif
+ | b =>BC_FORI
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | ldrd CARG1, CARG2, [BASE]
+ | cmp NARGS8:RC, #8
+ | blo ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | ldrd CARG1, CARG2, [BASE]
+ | ldrd CARG3, CARG4, [BASE, #8]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+ | .ffunc_1 name
+ | checktp CARG2, LJ_TISNUM
+ | bhs ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ | .ffunc_2 name
+ | checktp CARG2, LJ_TISNUM
+ | it lo
+ | cmnlo CARG4, #-LJ_TISNUM
+ | bhs ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_d, name
+ | .ffunc name
+ | ldr CARG2, [BASE, #4]
+ | cmp NARGS8:RC, #8
+ | vldr d0, [BASE]
+ | blo ->fff_fallback
+ | checktp CARG2, LJ_TISNUM
+ | bhs ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_dd, name
+ | .ffunc name
+ | ldr CARG2, [BASE, #4]
+ | ldr CARG4, [BASE, #12]
+ | cmp NARGS8:RC, #16
+ | vldr d0, [BASE]
+ | vldr d1, [BASE, #8]
+ | blo ->fff_fallback
+ | checktp CARG2, LJ_TISNUM
+ | it lo
+ | cmnlo CARG4, #-LJ_TISNUM
+ | bhs ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
+ |.macro ffgccheck
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.total)
+ | ldr CARG1, [CARG1]
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(gc.threshold)
+ | ldr CARG2, [CARG2]
+ | cmp CARG1, CARG2
+ | it ge
+ | blge ->fff_gcstep
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+ | checktp CARG2, LJ_TTRUE
+ | bhi ->fff_fallback
+ | ldr PC, [BASE, FRAME_PC]
+ | strd CARG1, CARG2, [BASE, #-8]
+ | mov RB, BASE
+ | subs RA, NARGS8:RC, #8
+ | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8.
+ | beq ->fff_res // Done if exactly 1 argument.
+ |1:
+ | ldrd CARG1, CARG2, [RB, #8]
+ | subs RA, RA, #8
+ | strd CARG1, CARG2, [RB], #8
+ | bne <1
+ | b ->fff_res
+ |
+ |.ffunc type
+ | ldr CARG2, [BASE, #4]
+ | cmp NARGS8:RC, #8
+ | blo ->fff_fallback
+ | checktp CARG2, LJ_TISNUM
+ | it lo
+ | mvnlo CARG2, #~LJ_TISNUM
+ | rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1
+ | lsl CARG4, CARG4, #3
+ | ldrd_i CARG1, CARG2, CFUNC:CARG3, CARG4
+ | b ->fff_restv
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | checktp CARG2, LJ_TTAB
+ | it ne
+ | cmnne CARG2, #-LJ_TUDATA
+ | bne >6
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | ldr TAB:RB, TAB:CARG1->metatable
+ |2:
+ | mvn CARG2, #~LJ_TNIL
+ | ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])]
+ | cmp TAB:RB, #0
+ | beq ->fff_restv
+ | ldr CARG3, TAB:RB->hmask
+ | ldr CARG4, STR:RC->sid
+ | ldr NODE:INS, TAB:RB->node
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
+ | add CARG3, CARG3, CARG3, lsl #1
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | ldrd CARG3, CARG4, NODE:INS->key // STALL: early NODE:INS.
+ | ldrd CARG1, CARG2, NODE:INS->val
+ | ldr NODE:INS, NODE:INS->next
+ | checktp CARG4, LJ_TSTR
+ | it eq
+ | cmpeq CARG3, STR:RC
+ | beq >5
+ | cmp NODE:INS, #0
+ | bne <3
+ |4:
+ | mov CARG1, RB // Use metatable as default result.
+ | mvn CARG2, #~LJ_TTAB
+ | b ->fff_restv
+ |5:
+ | checktp CARG2, LJ_TNIL
+ | bne ->fff_restv
+ | b <4
+ |
+ |6:
+ | checktp CARG2, LJ_TISNUM
+ | ite hs
+ | mvnhs CARG2, CARG2
+ | movlo CARG2, #~LJ_TISNUM
+ | add CARG4, DISPATCH, CARG2, lsl #2
+ | ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])]
+ | b <2
+ |
+ |.ffunc_2 setmetatable
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | checktp CARG2, LJ_TTAB
+ | it eq
+ | ldreq TAB:RB, TAB:CARG1->metatable
+ | checktpeq CARG4, LJ_TTAB
+ | it eq
+ | ldrbeq CARG4, TAB:CARG1->marked
+ | it eq
+ | cmpeq TAB:RB, #0
+ | bne ->fff_fallback
+ | tst CARG4, #LJ_GC_BLACK // isblack(table)
+ | str TAB:CARG3, TAB:CARG1->metatable
+ | beq ->fff_restv
+ | barrierback TAB:CARG1, CARG4, CARG3
+ | b ->fff_restv
+ |
+ |.ffunc rawget
+ | ldrd CARG3, CARG4, [BASE]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ | mov CARG2, CARG3
+ | checktab CARG4, ->fff_fallback
+ | mov CARG1, L
+ | add CARG3, BASE, #8
+ | .IOS mov RA, BASE
+ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // Returns cTValue *.
+ | .IOS mov BASE, RA
+ | ldrd CARG1, CARG2, [CRET1]
+ | b ->fff_restv
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | ldrd CARG1, CARG2, [BASE]
+ | cmp NARGS8:RC, #8
+ | bne ->fff_fallback
+ | checktp CARG2, LJ_TISNUM
+ | bls ->fff_restv
+ | b ->fff_fallback
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | checktp CARG2, LJ_TSTR
+ | // A __tostring method in the string base metatable is ignored.
+ | beq ->fff_restv
+ | // Handle numbers inline, unless a number base metatable is present.
+ | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])]
+ | str BASE, L->base
+ | checktp CARG2, LJ_TISNUM
+ | it ls
+ | cmpls CARG4, #0
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | bhi ->fff_fallback
+ | ffgccheck
+ | mov CARG1, L
+ | mov CARG2, BASE
+ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
+ | // Returns GCstr *.
+ | ldr BASE, L->base
+ | mvn CARG2, #~LJ_TSTR
+ | b ->fff_restv
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | mvn CARG4, #~LJ_TNIL
+ | checktab CARG2, ->fff_fallback
+ | strd_i CARG3, CARG4, BASE, NARGS8:RC // Set missing 2nd arg to nil.
+ | ldr PC, [BASE, FRAME_PC]
+ | add CARG2, BASE, #8
+ | sub CARG3, BASE, #8
+ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | // Returns 1=found, 0=end, -1=error.
+ | .IOS ldr BASE, L->base
+ | cmp CRET1, #0
+ | mov RC, #(2+1)*8
+ | bgt ->fff_res // Found key/value.
+ | bmi ->fff_fallback // Invalid key.
+ | // End of traversal: return nil.
+ | mvn CRET2, #~LJ_TNIL
+ | b ->fff_restv
+ |
+ |.ffunc_1 pairs
+ | checktab CARG2, ->fff_fallback
+#if LJ_52
+ | ldr TAB:RB, TAB:CARG1->metatable
+#endif
+ | ldrd CFUNC:CARG3, CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
+ | ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+ | cmp TAB:RB, #0
+ | bne ->fff_fallback
+#endif
+ | mvn CARG2, #~LJ_TNIL
+ | mov RC, #(3+1)*8
+ | strd CFUNC:CARG3, CFUNC:CARG4, [BASE, #-8]
+ | str CARG2, [BASE, #12]
+ | b ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+ | checktp CARG2, LJ_TTAB
+ | checktpeq CARG4, LJ_TISNUM
+ | bne ->fff_fallback
+ | ldr RB, TAB:CARG1->asize
+ | ldr RC, TAB:CARG1->array
+ | add CARG3, CARG3, #1
+ | ldr PC, [BASE, FRAME_PC]
+ | cmp CARG3, RB
+ | add RC, RC, CARG3, lsl #3
+ | strd CARG3, CARG4, [BASE, #-8]
+ | it lo
+ | ldrdlo CARG1, CARG2, [RC]
+ | mov RC, #(0+1)*8
+ | bhs >2 // Not in array part?
+ |1:
+ | checktp CARG2, LJ_TNIL
+ | itt ne
+ | movne RC, #(2+1)*8
+ | strdne CARG1, CARG2, [BASE]
+ | b ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | ldr RB, TAB:CARG1->hmask
+ | mov CARG2, CARG3
+ | cmp RB, #0
+ | beq ->fff_res
+ | .IOS mov RA, BASE
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | .IOS mov BASE, RA
+ | cmp CRET1, #0
+ | beq ->fff_res
+ | ldrd CARG1, CARG2, [CRET1]
+ | b <1
+ |
+ |.ffunc_1 ipairs
+ | checktab CARG2, ->fff_fallback
+#if LJ_52
+ | ldr TAB:RB, TAB:CARG1->metatable
+#endif
+ | ldrd CFUNC:CARG3, CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
+ | ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+ | cmp TAB:RB, #0
+ | bne ->fff_fallback
+#endif
+ | mov CARG1, #0
+ | mvn CARG2, #~LJ_TISNUM
+ | mov RC, #(3+1)*8
+ | strd CFUNC:CARG3, CFUNC:CARG4, [BASE, #-8]
+ | strd CARG1, CARG2, [BASE, #8]
+ | b ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc pcall
+ | sub RA, DISPATCH, #-DISPATCH_GL(hookmask)
+ | ldrb RA, [RA]
+ | cmp NARGS8:RC, #8
+ | blo ->fff_fallback
+ | tst RA, #HOOK_ACTIVE // Remember active hook before pcall.
+ | mov RB, BASE
+ | add BASE, BASE, #8
+ | ite eq
+ | moveq PC, #8+FRAME_PCALL
+ | movne PC, #8+FRAME_PCALLH
+ | sub NARGS8:RC, NARGS8:RC, #8
+ | b ->vm_call_dispatch
+ |
+ |.ffunc_2 xpcall
+ | sub RA, DISPATCH, #-DISPATCH_GL(hookmask)
+ | ldrb RA, [RA]
+ | checkfunc CARG4, ->fff_fallback // Traceback must be a function.
+ | mov RB, BASE
+ | strd CARG1, CARG2, [BASE, #8] // Swap function and traceback.
+ | strd CARG3, CARG4, [BASE]
+ | tst RA, #HOOK_ACTIVE // Remember active hook before pcall.
+ | add BASE, BASE, #16
+ | ite eq
+ | moveq PC, #16+FRAME_PCALL
+ | movne PC, #16+FRAME_PCALLH
+ | sub NARGS8:RC, NARGS8:RC, #16
+ | b ->vm_call_dispatch
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | checktp CARG2, LJ_TTHREAD
+ | bne ->fff_fallback
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
+ |.endif
+ | ldr PC, [BASE, FRAME_PC]
+ | str BASE, L->base
+ | ldr CARG2, L:CARG1->top
+ | ldrb RA, L:CARG1->status
+ | ldr RB, L:CARG1->base
+ | add CARG3, CARG2, NARGS8:RC
+ | add CARG4, CARG2, RA
+ | str PC, SAVE_PC
+ | cmp CARG4, RB
+ | beq ->fff_fallback
+ | ldr CARG4, L:CARG1->maxstack
+ | ldr RB, L:CARG1->cframe
+ | cmp RA, #LUA_YIELD
+ | it ls
+ | cmpls CARG3, CARG4
+ | it ls
+ | cmpls RB, #0
+ | bhi ->fff_fallback
+ |1:
+ |.if resume
+ | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC.
+ | add BASE, BASE, #8
+ | sub NARGS8:RC, NARGS8:RC, #8
+ |.endif
+ | str CARG3, L:CARG1->top
+ | str BASE, L->top
+ |2: // Move args to coroutine.
+ | ldrd_i CARG3, CARG4, BASE, RB
+ | cmp RB, NARGS8:RC
+ | strdne_i CARG3, CARG4, CARG2, RB
+ | add RB, RB, #8
+ | bne <2
+ |
+ | mov CARG3, #0
+ | mov L:RA, L:CARG1
+ | mov CARG4, #0
+ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ | // Returns thread status.
+ |4:
+ | ldr CARG3, L:RA->base
+ | mv_vmstate CARG2, INTERP
+ | ldr CARG4, L:RA->top
+ | cmp CRET1, #LUA_YIELD
+ | ldr BASE, L->base
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
+ | st_vmstate CARG2
+ | bhi >8
+ | subs RC, CARG4, CARG3
+ | ldr CARG1, L->maxstack
+ | add CARG2, BASE, RC
+ | beq >6 // No results?
+ | cmp CARG2, CARG1
+ | mov RB, #0
+ | bhi >9 // Need to grow stack?
+ |
+ | sub CARG4, RC, #8
+ | str CARG3, L:RA->top // Clear coroutine stack.
+ |5: // Move results from coroutine.
+ | ldrd_i CARG1, CARG2, CARG3, RB
+ | cmp RB, CARG4
+ | strd_i CARG1, CARG2, BASE, RB
+ | add RB, RB, #8
+ | bne <5
+ |6:
+ |.if resume
+ | mvn CARG3, #~LJ_TTRUE
+ | add RC, RC, #16
+ |7:
+ | str CARG3, [BASE, #-4] // Prepend true/false to results.
+ | sub RA, BASE, #8
+ |.else
+ | mov RA, BASE
+ | add RC, RC, #8
+ |.endif
+ | ands CARG1, PC, #FRAME_TYPE
+ | str PC, SAVE_PC
+ | str RC, SAVE_MULTRES
+ | beq ->BC_RET_Z
+ | b ->vm_return
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | ldrd CARG1, CARG2, [CARG4, #-8]!
+ | mvn CARG3, #~LJ_TFALSE
+ | mov RC, #(2+1)*8
+ | str CARG4, L:RA->top // Remove error from coroutine stack.
+ | strd CARG1, CARG2, [BASE] // Copy error message.
+ | b <7
+ |.else
+ | mov CARG1, L
+ | mov CARG2, L:RA
+ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ | // Never returns.
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | mov CARG1, L
+ | lsr CARG2, RC, #3
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | mov CRET1, #0
+ | b <4
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | ldr CARG1, L->cframe
+ | add CARG2, BASE, NARGS8:RC
+ | str BASE, L->base
+ | tst CARG1, #CFRAME_RESUME
+ | str CARG2, L->top
+ | mov CRET1, #LUA_YIELD
+ | mov CARG3, #0
+ | beq ->fff_fallback
+ | str CARG3, L->cframe
+ | strb CRET1, L->status
+ | b ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ |.macro math_round, func
+ | .ffunc_1 math_ .. func
+ | checktp CARG2, LJ_TISNUM
+ | beq ->fff_restv
+ | bhi ->fff_fallback
+ | // Round FP value and normalize result.
+ | lsl CARG3, CARG2, #1
+ | adds RB, CARG3, #0x00200000
+ | bpl >2 // |x| < 1?
+ | mvn CARG4, #0x3e0
+ | subs RB, CARG4, RB, asr #21
+ | lsl CARG4, CARG2, #11
+ | lsl CARG3, CARG1, #11
+ | orr CARG4, CARG4, #0x80000000
+ | rsb INS, RB, #32
+ | orr CARG4, CARG4, CARG1, lsr #21
+ | bls >3 // |x| >= 2^31?
+ | lsl CARG1, CARG4, INS
+ | orr CARG3, CARG3, CARG1
+ | lsr CARG1, CARG4, RB
+ |.if "func" == "floor"
+ | tst CARG3, CARG2, asr #31
+ | it ne
+ | addne CARG1, CARG1, #1
+ |.else
+ | bics CARG3, CARG3, CARG2, asr #31
+ | it ne
+ | addsne CARG1, CARG1, #1
+ | it vs
+ | ldrdvs CARG1, CARG2, >9
+ | bvs ->fff_restv
+ |.endif
+ | cmp CARG2, #0
+ | it lt
+ | rsblt CARG1, CARG1, #0
+ |1:
+ | mvn CARG2, #~LJ_TISNUM
+ | b ->fff_restv
+ |
+ |2: // |x| < 1
+ | bcs ->fff_restv // |x| is not finite.
+ | orr CARG3, CARG3, CARG1 // ztest = abs(hi) | lo
+ |.if "func" == "floor"
+ | tst CARG3, CARG2, asr #31 // return (ztest & sign) == 0 ? 0 : -1
+ | ite eq
+ | moveq CARG1, #0
+ | mvnne CARG1, #0
+ |.else
+ | bics CARG3, CARG3, CARG2, asr #31 // return (ztest & ~sign) == 0 ? 0 : 1
+ | ite eq
+ | moveq CARG1, #0
+ | movne CARG1, #1
+ |.endif
+ | mvn CARG2, #~LJ_TISNUM
+ | b ->fff_restv
+ |
+ |3: // |x| >= 2^31. Check for x == -(2^31).
+ | it eq
+ | cmpeq CARG4, #0x80000000
+ |.if "func" == "floor"
+ | it eq
+ | cmpeq CARG3, #0
+ |.endif
+ | bne >4
+ | cmp CARG2, #0
+ | it mi
+ | movmi CARG1, #0x80000000
+ | bmi <1
+ |4:
+ | bl ->vm_..func.._sf
+ | b ->fff_restv
+ |.endmacro
+ |
+ | math_round floor
+ | math_round ceil
+ |
+ |.align 8
+ |9:
+ | .long 0x00000000, 0x000041e0 // 2^31. jturnsek: swaped halfwords!!!
+ |
+ |.ffunc_1 math_abs
+ | checktp CARG2, LJ_TISNUM
+ | bhi ->fff_fallback
+ | it ne
+ | bicne CARG2, CARG2, #0x80000000
+ | bne ->fff_restv
+ | cmp CARG1, #0
+ | it lt
+ | rsbslt CARG1, CARG1, #0
+ | it vs
+ | ldrdvs CARG1, CARG2, <9
+ | // Fallthrough.
+ |
+ |->fff_restv:
+ | // CARG1, CARG2 = TValue result.
+ | ldr PC, [BASE, FRAME_PC]
+ | strd CARG1, CARG2, [BASE, #-8]
+ |->fff_res1:
+ | // PC = return.
+ | mov RC, #(1+1)*8
+ |->fff_res:
+ | // RC = (nresults+1)*8, PC = return.
+ | ands CARG1, PC, #FRAME_TYPE
+ | it eq
+ | ldreq INS, [PC, #-4]
+ | str RC, SAVE_MULTRES
+ | sub RA, BASE, #8
+ | bne ->vm_return
+ | decode_RB8 RB, INS
+ |5:
+ | cmp RB, RC // More results expected?
+ | bhi >6
+ | decode_RA8 CARG1, INS
+ | ins_next1
+ | ins_next2
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | sub BASE, RA, CARG1
+ | ins_next3
+ |
+ |6: // Fill up results with nil.
+ | add CARG2, RA, RC
+ | mvn CARG1, #~LJ_TNIL
+ | add RC, RC, #8
+ | str CARG1, [CARG2, #-4]
+ | b <5
+ |
+ |.macro math_extern, func
+ |.if HFABI
+ | .ffunc_d math_ .. func
+ |.else
+ | .ffunc_n math_ .. func
+ |.endif
+ | .IOS mov RA, BASE
+ | bl extern func
+ | .IOS mov BASE, RA
+ |.if HFABI
+ | b ->fff_resd
+ |.else
+ | b ->fff_restv
+ |.endif
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ |.if HFABI
+ | .ffunc_dd math_ .. func
+ |.else
+ | .ffunc_nn math_ .. func
+ |.endif
+ | .IOS mov RA, BASE
+ | bl extern func
+ | .IOS mov BASE, RA
+ |.if HFABI
+ | b ->fff_resd
+ |.else
+ | b ->fff_restv
+ |.endif
+ |.endmacro
+ |
+ |.if FPU
+ | .ffunc_d math_sqrt
+ | vsqrt.f64 d0, d0
+ |->fff_resd:
+ | ldr PC, [BASE, FRAME_PC]
+ | vstr d0, [BASE, #-8]
+ | b ->fff_res1
+ |.else
+ | math_extern sqrt
+ |.endif
+ |
+ |.ffunc math_log
+ |.if HFABI
+ | ldr CARG2, [BASE, #4]
+ | cmp NARGS8:RC, #8 // Need exactly 1 argument.
+ | vldr d0, [BASE]
+ | bne ->fff_fallback
+ |.else
+ | ldrd CARG1, CARG2, [BASE]
+ | cmp NARGS8:RC, #8 // Need exactly 1 argument.
+ | bne ->fff_fallback
+ |.endif
+ | checktp CARG2, LJ_TISNUM
+ | bhs ->fff_fallback
+ | .IOS mov RA, BASE
+ | bl extern log
+ | .IOS mov BASE, RA
+ |.if HFABI
+ | b ->fff_resd
+ |.else
+ | b ->fff_restv
+ |.endif
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+ |.if HFABI
+ | .ffunc math_ldexp
+ | ldr CARG4, [BASE, #4]
+ | ldrd CARG1, CARG2, [BASE, #8]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ | vldr d0, [BASE]
+ | checktp CARG4, LJ_TISNUM
+ | bhs ->fff_fallback
+ | checktp CARG2, LJ_TISNUM
+ | bne ->fff_fallback
+ | .IOS mov RA, BASE
+ | bl extern ldexp // (double x, int exp)
+ | .IOS mov BASE, RA
+ | b ->fff_resd
+ |.else
+ |.ffunc_2 math_ldexp
+ | checktp CARG2, LJ_TISNUM
+ | bhs ->fff_fallback
+ | checktp CARG4, LJ_TISNUM
+ | bne ->fff_fallback
+ | .IOS mov RA, BASE
+ | bl extern ldexp // (double x, int exp)
+ | .IOS mov BASE, RA
+ | b ->fff_restv
+ |.endif
+ |
+ |.if HFABI
+ |.ffunc_d math_frexp
+ | mov CARG1, sp
+ | .IOS mov RA, BASE
+ | bl extern frexp
+ | .IOS mov BASE, RA
+ | ldr CARG3, [sp]
+ | mvn CARG4, #~LJ_TISNUM
+ | ldr PC, [BASE, FRAME_PC]
+ | vstr d0, [BASE, #-8]
+ | mov RC, #(2+1)*8
+ | strd CARG3, CARG4, [BASE]
+ | b ->fff_res
+ |.else
+ |.ffunc_n math_frexp
+ | mov CARG3, sp
+ | .IOS mov RA, BASE
+ | bl extern frexp
+ | .IOS mov BASE, RA
+ | ldr CARG3, [sp]
+ | mvn CARG4, #~LJ_TISNUM
+ | ldr PC, [BASE, FRAME_PC]
+ | strd CARG1, CARG2, [BASE, #-8]
+ | mov RC, #(2+1)*8
+ | strd CARG3, CARG4, [BASE]
+ | b ->fff_res
+ |.endif
+ |
+ |.if HFABI
+ |.ffunc_d math_modf
+ | sub CARG1, BASE, #8
+ | ldr PC, [BASE, FRAME_PC]
+ | .IOS mov RA, BASE
+ | bl extern modf
+ | .IOS mov BASE, RA
+ | mov RC, #(2+1)*8
+ | vstr d0, [BASE]
+ | b ->fff_res
+ |.else
+ |.ffunc_n math_modf
+ | sub CARG3, BASE, #8
+ | ldr PC, [BASE, FRAME_PC]
+ | .IOS mov RA, BASE
+ | bl extern modf
+ | .IOS mov BASE, RA
+ | mov RC, #(2+1)*8
+ | strd CARG1, CARG2, [BASE]
+ | b ->fff_res
+ |.endif
+ |
+ |.macro math_minmax, name, cond, fcond
+ |.if FPU
+ | .ffunc_1 name
+ | add RB, BASE, RC
+ | checktp CARG2, LJ_TISNUM
+ | add RA, BASE, #8
+ | bne >4
+ |1: // Handle integers.
+ | ldrd CARG3, CARG4, [RA]
+ | cmp RA, RB
+ | bhs ->fff_restv
+ | checktp CARG4, LJ_TISNUM
+ | bne >3
+ | cmp CARG1, CARG3
+ | add RA, RA, #8
+ | it cond
+ | mov..cond CARG1, CARG3
+ | b <1
+ |3: // Convert intermediate result to number and continue below.
+ | vmov s4, CARG1
+ | bhi ->fff_fallback
+ | vldr d1, [RA]
+ | vcvt.f64.s32 d0, s4
+ | b >6
+ |
+ |4:
+ | vldr d0, [BASE]
+ | bhi ->fff_fallback
+ |5: // Handle numbers.
+ | ldrd CARG3, CARG4, [RA]
+ | vldr d1, [RA]
+ | cmp RA, RB
+ | bhs ->fff_resd
+ | checktp CARG4, LJ_TISNUM
+ | bhs >7
+ |6:
+ | vcmp.f64 d0, d1
+ | vmrs
+ | add RA, RA, #8
+ | it fcond
+ | vmov..fcond.f64 d0, d1
+ | b <5
+ |7: // Convert integer to number and continue above.
+ | vmov s4, CARG3
+ | bhi ->fff_fallback
+ | vcvt.f64.s32 d1, s4
+ | b <6
+ |
+ |.else
+ |
+ | .ffunc_1 name
+ | checktp CARG2, LJ_TISNUM
+ | mov RA, #8
+ | bne >4
+ |1: // Handle integers.
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | cmp RA, RC
+ | bhs ->fff_restv
+ | checktp CARG4, LJ_TISNUM
+ | bne >3
+ | cmp CARG1, CARG3
+ | add RA, RA, #8
+ | it cond
+ | mov..cond CARG1, CARG3
+ | b <1
+ |3: // Convert intermediate result to number and continue below.
+ | bhi ->fff_fallback
+ | bl extern __aeabi_i2d
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | b >6
+ |
+ |4:
+ | bhi ->fff_fallback
+ |5: // Handle numbers.
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | cmp RA, RC
+ | bhs ->fff_restv
+ | checktp CARG4, LJ_TISNUM
+ | bhs >7
+ |6:
+ | bl extern __aeabi_cdcmple
+ | add RA, RA, #8
+ | mov..fcond CARG1, CARG3
+ | mov..fcond CARG2, CARG4
+ | b <5
+ |7: // Convert integer to number and continue above.
+ | bhi ->fff_fallback
+ | strd CARG1, CARG2, TMPD
+ | mov CARG1, CARG3
+ | bl extern __aeabi_i2d
+ | ldrd CARG3, CARG4, TMPD
+ | b <6
+ |.endif
+ |.endmacro
+ |
+ | math_minmax math_min, gt, pl
+ | math_minmax math_max, lt, le
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | ldrd CARG1, CARG2, [BASE]
+ | ldr PC, [BASE, FRAME_PC]
+ | cmp NARGS8:RC, #8
+ | checktpeq CARG2, LJ_TSTR // Need exactly 1 argument.
+ | bne ->fff_fallback
+ | ldr CARG3, STR:CARG1->len
+ | ldrb CARG1, STR:CARG1[1] // Access is always ok (NUL at end).
+ | mvn CARG2, #~LJ_TISNUM
+ | cmp CARG3, #0
+ | ite eq
+ | moveq RC, #(0+1)*8
+ | movne RC, #(1+1)*8
+ | strd CARG1, CARG2, [BASE, #-8]
+ | b ->fff_res
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ | ldrd CARG1, CARG2, [BASE]
+ | ldr PC, [BASE, FRAME_PC]
+ | cmp NARGS8:RC, #8 // Need exactly 1 argument.
+ | checktpeq CARG2, LJ_TISNUM
+ | it eq
+ | bicseq CARG4, CARG1, #255
+ | mov CARG3, #1
+ | bne ->fff_fallback
+ | str CARG1, TMPD
+ | mov CARG2, TMPDp // Points to stack. Little-endian.
+ |->fff_newstr:
+ | // CARG2 = str, CARG3 = len.
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
+ | // Returns GCstr *.
+ | ldr BASE, L->base
+ | mvn CARG2, #~LJ_TSTR
+ | b ->fff_restv
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ | ldrd CARG1, CARG2, [BASE]
+ | ldrd CARG3, CARG4, [BASE, #16]
+ | cmp NARGS8:RC, #16
+ | mvn RB, #0
+ | beq >1
+ | blo ->fff_fallback
+ | checktp CARG4, LJ_TISNUM
+ | mov RB, CARG3
+ | bne ->fff_fallback
+ |1:
+ | ldrd CARG3, CARG4, [BASE, #8]
+ | checktp CARG2, LJ_TSTR
+ | it eq
+ | ldreq CARG2, STR:CARG1->len
+ | checktpeq CARG4, LJ_TISNUM
+ | bne ->fff_fallback
+ | // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end
+ | add CARG4, CARG2, #1
+ | cmp CARG3, #0 // if (start < 0) start += len+1
+ | it lt
+ | addlt CARG3, CARG3, CARG4
+ | cmp CARG3, #1 // if (start < 1) start = 1
+ | it lt
+ | movlt CARG3, #1
+ | cmp RB, #0 // if (end < 0) end += len+1
+ | it lt
+ | addlt RB, RB, CARG4
+ | bic RB, RB, RB, asr #31 // if (end < 0) end = 0
+ | cmp RB, CARG2 // if (end > len) end = len
+ | add CARG1, STR:CARG1, #sizeof(GCstr)-1
+ | it gt
+ | movgt RB, CARG2
+ | add CARG2, CARG1, CARG3
+ | subs CARG3, RB, CARG3 // len = end - start
+ | add CARG3, CARG3, #1 // len += 1
+ | bge ->fff_newstr
+ |->fff_emptystr:
+ | sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty)
+ | mvn CARG2, #~LJ_TSTR
+ | b ->fff_restv
+ |
+ |.macro ffstring_op, name
+ | .ffunc string_ .. name
+ | ffgccheck
+ | ldr CARG3, [BASE, #4]
+ | cmp NARGS8:RC, #8
+ | ldr STR:CARG2, [BASE]
+ | blo ->fff_fallback
+ | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
+ | checkstr CARG3, ->fff_fallback
+ | ldr CARG4, SBUF:CARG1->b
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | str L, SBUF:CARG1->L
+ | str CARG4, SBUF:CARG1->w
+ | bl extern lj_buf_putstr_ .. name
+ | bl extern lj_buf_tostr
+ | b ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |// FP number to bit conversion for soft-float. Clobbers r0-r3.
+ |->vm_tobit_fb:
+ | bhi ->fff_fallback
+ |->vm_tobit:
+ | lsl RB, CARG2, #1
+ | adds RB, RB, #0x00200000
+ | ittt pl
+ | movpl CARG1, #0 // |x| < 1?
+ | bxpl lr
+ | mvn CARG4, #0x3e0
+ | subs RB, CARG4, RB, asr #21
+ | bmi >1 // |x| >= 2^32?
+ | lsl CARG4, CARG2, #11
+ | orr CARG4, CARG4, #0x80000000
+ | orr CARG4, CARG4, CARG1, lsr #21
+ | cmp CARG2, #0
+ | lsr CARG1, CARG4, RB
+ | it lt
+ | rsblt CARG1, CARG1, #0
+ | bx lr
+ |1:
+ | add RB, RB, #21
+ | lsr CARG4, CARG1, RB
+ | rsb RB, RB, #20
+ | lsl CARG1, CARG2, #12
+ | cmp CARG2, #0
+ | lsl CARG1, CARG1, RB
+ | orr CARG1, CARG4, CARG1
+ | it lt
+ | rsblt CARG1, CARG1, #0
+ | bx lr
+ |
+ |.macro .ffunc_bit, name
+ | .ffunc_1 bit_..name
+ | checktp CARG2, LJ_TISNUM
+ | it ne
+ | blne ->vm_tobit_fb
+ |.endmacro
+ |
+ |.ffunc_bit tobit
+ | mvn CARG2, #~LJ_TISNUM
+ | b ->fff_restv
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name
+ | mov CARG3, CARG1
+ | mov RA, #8
+ |1:
+ | ldrd_i CARG1, CARG2, BASE, RA
+ | cmp RA, NARGS8:RC
+ | add RA, RA, #8
+ | bge >2
+ | checktp CARG2, LJ_TISNUM
+ | it ne
+ | blne ->vm_tobit_fb
+ | ins CARG3, CARG3, CARG1
+ | b <1
+ |.endmacro
+ |
+ |.ffunc_bit_op band, and
+ |.ffunc_bit_op bor, orr
+ |.ffunc_bit_op bxor, eor
+ |
+ |2:
+ | mvn CARG4, #~LJ_TISNUM
+ | ldr PC, [BASE, FRAME_PC]
+ | strd CARG3, CARG4, [BASE, #-8]
+ | b ->fff_res1
+ |
+ |.ffunc_bit bswap
+ | eor CARG3, CARG1, CARG1, ror #16
+ | bic CARG3, CARG3, #0x00ff0000
+ | ror CARG1, CARG1, #8
+ | mvn CARG2, #~LJ_TISNUM
+ | eor CARG1, CARG1, CARG3, lsr #8
+ | b ->fff_restv
+ |
+ |.ffunc_bit bnot
+ | mvn CARG1, CARG1
+ | mvn CARG2, #~LJ_TISNUM
+ | b ->fff_restv
+ |
+ |.macro .ffunc_bit_sh, name, ins, shmod
+ | .ffunc bit_..name
+ | ldrd CARG1, CARG2, [BASE, #8]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ | checktp CARG2, LJ_TISNUM
+ | it ne
+ | blne ->vm_tobit_fb
+ |.if shmod == 0
+ | and RA, CARG1, #31
+ |.else
+ | rsb RA, CARG1, #0
+ |.endif
+ | ldrd CARG1, CARG2, [BASE]
+ | checktp CARG2, LJ_TISNUM
+ | it ne
+ | blne ->vm_tobit_fb
+ | ins CARG1, CARG1, RA
+ | mvn CARG2, #~LJ_TISNUM
+ | b ->fff_restv
+ |.endmacro
+ |
+ |.ffunc_bit_sh lshift, lsl, 0
+ |.ffunc_bit_sh rshift, lsr, 0
+ |.ffunc_bit_sh arshift, asr, 0
+ |.ffunc_bit_sh rol, ror, 1
+ |.ffunc_bit_sh ror, ror, 0
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback: // Call fast function fallback handler.
+ | // BASE = new base, RC = nargs*8
+ | ldr CARG3, [BASE, FRAME_FUNC]
+ | ldr CARG2, L->maxstack
+ | add CARG1, BASE, NARGS8:RC
+ | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC.
+ | str CARG1, L->top
+ | ldr CARG3, CFUNC:CARG3->f
+ | str BASE, L->base
+ | add CARG1, CARG1, #8*LUA_MINSTACK
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | cmp CARG1, CARG2
+ | mov CARG1, L
+ | bhi >5 // Need to grow stack.
+ | blx CARG3 // (lua_State *L)
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+ | ldr BASE, L->base
+ | cmp CRET1, #0
+ | lsl RC, CRET1, #3
+ | sub RA, BASE, #8
+ | bgt ->fff_res // Returned nresults+1?
+ |1: // Returned 0 or -1: retry fast path.
+ | ldr CARG1, L->top
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | sub NARGS8:RC, CARG1, BASE
+ | bne ->vm_call_tail // Returned -1?
+ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+ | ands CARG1, PC, #FRAME_TYPE
+ | bic CARG2, PC, #FRAME_TYPEP
+ | ittt eq
+ | ldreq INS, [PC, #-4]
+ | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8.
+ | addeq CARG2, CARG2, #8
+ | sub RB, BASE, CARG2
+ | b ->vm_call_dispatch // Resolve again for tailcall.
+ |
+ |5: // Grow stack for fallback handler.
+ | mov CARG2, #LUA_MINSTACK
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldr BASE, L->base
+ | cmp CARG1, CARG1 // Set zero-flag to force retry.
+ | b <1
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RC = nargs*8
+ | mov RA, lr
+ | str BASE, L->base
+ | add CARG2, BASE, NARGS8:RC
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | str CARG2, L->top
+ | mov CARG1, L
+ | bl extern lj_gc_step // (lua_State *L)
+ | ldr BASE, L->base
+ | mov lr, RA // Help return address predictor.
+ | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
+ | bx lr
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+ |.if JIT
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask)
+ | ldrb CARG1, [CARG1]
+ | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
+ | bne >5
+ | // Decrement the hookcount for consistency, but always do the call.
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(hookcount)
+ | ldr CARG2, [CARG2]
+ | tst CARG1, #HOOK_ACTIVE
+ | bne >1
+ | sub CARG2, CARG2, #1
+ | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookcount)
+ | it ne
+ | strne CARG2, [CARG1]
+ | b >1
+ |.endif
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask)
+ | ldrb CARG1, [CARG1]
+ | tst CARG1, #HOOK_ACTIVE // Hook already active?
+ | beq >1
+ |5: // Re-dispatch to static ins.
+ | decode_OP OP, INS
+ | add OP, DISPATCH, OP, lsl #2
+ | ldr pc, [OP, #GG_DISP2STATIC]
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask)
+ | ldrb CARG1, [CARG1]
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(hookcount)
+ | ldr CARG2, [CARG2]
+ | tst CARG1, #HOOK_ACTIVE // Hook already active?
+ | bne <5
+ | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
+ | beq <5
+ | subs CARG2, CARG2, #1
+ | sub CARG3, DISPATCH, #-DISPATCH_GL(hookcount)
+ | str CARG2, [CARG3]
+ | beq >1
+ | tst CARG1, #LUA_MASKLINE
+ | beq <5
+ |1:
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, PC
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
+ |3:
+ | ldr BASE, L->base
+ |4: // Re-dispatch to static ins.
+ | ldrb OP, [PC, #-4]
+ | ldr INS, [PC, #-4]
+ | add OP, DISPATCH, OP, lsl #2
+ | ldr OP, [OP, #GG_DISP2STATIC]
+ | decode_RA8 RA, INS
+ | decode_RD RC, INS
+ | bx OP
+ |
+ |->cont_hook: // Continue from hook yield.
+ | ldr CARG1, [CARG4, #-24]
+ | add PC, PC, #4
+ | str CARG1, SAVE_MULTRES // Restore MULTRES for *M ins.
+ | b <4
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+ |.if JIT
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L).
+ | sub CARG1, DISPATCH, #-GG_DISP2J
+ | str PC, SAVE_PC
+ | ldr CARG3, LFUNC:CARG3->field_pc
+ | mov CARG2, PC
+ | sub RB, DISPATCH, #-DISPATCH_J(L)
+ | str L, [RB]
+ | ldrb CARG3, [CARG3, #PC2PROTO(framesize)]
+ | str BASE, L->base
+ | add CARG3, BASE, CARG3, lsl #3
+ | str CARG3, L->top
+ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ | b <3
+ |.endif
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+ | mov CARG2, PC
+ |.if JIT
+ | b >1
+ |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+ |.if JIT
+ | orr CARG2, PC, #1
+ |1:
+ |.endif
+ | add CARG4, BASE, RC
+ | str PC, SAVE_PC
+ | mov CARG1, L
+ | str BASE, L->base
+ | sub RA, RA, BASE
+ | str CARG4, L->top
+ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ | // Returns ASMFunction.
+ | ldr BASE, L->base
+ | ldr CARG4, L->top
+ | mov CARG2, #0
+ | add RA, BASE, RA
+ | sub NARGS8:RC, CARG4, BASE
+ | str CARG2, SAVE_PC // Invalidate for subsequent line hook.
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | ldr INS, [PC, #-4]
+ | bx CRET1
+ |
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // RA = resultptr, CARG4 = meta base
+ | ldr RB, SAVE_MULTRES
+ | ldr INS, [PC, #-4]
+ | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
+ | subs RB, RB, #8
+ | decode_RA8 RC, INS // Call base.
+ | beq >2
+ |1: // Move results down.
+ | ldrd CARG1, CARG2, [RA]
+ | add RA, RA, #8
+ | subs RB, RB, #8
+ | strd_i CARG1, CARG2, BASE, RC
+ | add RC, RC, #8
+ | bne <1
+ |2:
+ | decode_RA8 RA, INS
+ | decode_RB8 RB, INS
+ | add RA, RA, RB
+ |3:
+ | cmp RA, RC
+ | mvn CARG2, #~LJ_TNIL
+ | bhi >9 // More results wanted?
+ |
+ | ldrh RA, TRACE:CARG3->traceno
+ | ldrh RC, TRACE:CARG3->link
+ | cmp RC, RA
+ | beq ->cont_nop // Blacklisted.
+ | cmp RC, #0
+ | bne =>BC_JLOOP // Jump to stitched trace.
+ |
+ | // Stitch a new trace to the previous trace.
+ | sub RB, DISPATCH, #-DISPATCH_J(exitno)
+ | str RA, [RB]
+ | sub RB, DISPATCH, #-DISPATCH_J(L)
+ | str L, [RB]
+ | str BASE, L->base
+ | sub CARG1, DISPATCH, #-GG_DISP2J
+ | mov CARG2, PC
+ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ | ldr BASE, L->base
+ | b ->cont_nop
+ |
+ |9: // Fill up results with nil.
+ | strd_i CARG1, CARG2, BASE, RC
+ | add RC, RC, #8
+ | b <3
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, PC
+ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | ldr BASE, L->base
+ | sub PC, PC, #4
+ | b ->cont_nop
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_exit_handler:
+ |.if JIT
+ | sub sp, sp, #12
+ | push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}
+ | ldr CARG1, [sp, #64] // Load original value of lr.
+ | ldr DISPATCH, [lr, #-1] // Load DISPATCH.
+ | add CARG3, sp, #64 // Recompute original value of sp.
+ | mv_vmstate CARG4, EXIT
+ | str CARG3, [sp, #52] // Store sp in RID_SP
+ | st_vmstate CARG4
+ | ldr CARG4, [CARG1, #-5]! // Get exit instruction.
+ | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC.
+ | str CARG1, [sp, #60]
+ |.if FPU
+ | vpush {d0-d15}
+ |.endif
+ | .long 0xf3432180 //SBFX CARG2, CARG4, #10, #1
+ | .long 0xf36321d4 //BFI CARG2, CARG4, #11, #10
+ | lsr CARG4, CARG4, #16
+ | .long 0xf363010a //BFI CARG2, CARG4, #0, #11
+ | add CARG1, CARG1, CARG2, lsl #1
+ | ldr CARG2, [lr, #3] // Load exit stub group offset.
+ | sub CARG1, CARG1, lr
+ | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
+ | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
+ | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
+ | sub RB, DISPATCH, #-DISPATCH_J(exitno)
+ | str CARG1, [RB]
+ | mov CARG4, #0
+ | str BASE, L->base
+ | sub RB, DISPATCH, #-DISPATCH_J(L)
+ | str L, [RB]
+ | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
+ | sub CARG1, DISPATCH, #-GG_DISP2J
+ | mov CARG2, sp
+ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
+ | // Returns MULTRES (unscaled) or negated error code.
+ | ldr CARG2, L->cframe
+ | ldr BASE, L->base
+ | bic CARG2, CARG2, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated.
+ | mov sp, CARG2
+ | ldr PC, SAVE_PC // Get SAVE_PC.
+ | str L, SAVE_L // Set SAVE_L (on-trace resume/yield).
+ | b >1
+ |.endif
+ |->vm_exit_interp:
+ | // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set.
+ |.if JIT
+ | ldr L, SAVE_L
+ |1:
+ | cmp CARG1, #0
+ | blt >9 // Check for error from exit.
+ | lsl RC, CARG1, #3
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | str RC, SAVE_MULTRES
+ | mov CARG3, #0
+ | str BASE, L->base
+ | ldr CARG2, LFUNC:CARG2->field_pc
+ | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
+ | mv_vmstate CARG4, INTERP
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | // Modified copy of ins_next which handles function header dispatch, too.
+ | ldrb OP, [PC]
+ | mov MASKR8, #255
+ | ldr INS, [PC], #4
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
+ | st_vmstate CARG4
+ | cmp OP, #BC_FUNCC+2 // Fast function?
+ | bhs >4
+ |2:
+ | cmp OP, #BC_FUNCF // Function header?
+ | ldr OP, [DISPATCH, OP, lsl #2]
+ | decode_RA8 RA, INS
+ | iteee lo
+ | lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
+ | subhs RC, RC, #8
+ | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
+ | ldrhs CARG3, [BASE, FRAME_FUNC]
+ | bx OP
+ |
+ |4: // Check frame below fast function.
+ | ldr CARG1, [BASE, FRAME_PC]
+ | ands CARG2, CARG1, #FRAME_TYPE
+ | bne <2 // Trace stitching continuation?
+ | // Otherwise set KBASE for Lua function below fast function.
+ | ldr CARG3, [CARG1, #-4]
+ | decode_RA8 CARG1, CARG3
+ | sub CARG2, BASE, CARG1
+ | ldr LFUNC:CARG3, [CARG2, #-16]
+ | ldr CARG3, LFUNC:CARG3->field_pc
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | b <2
+ |
+ |9: // Rethrow error from the right C frame.
+ | mov CARG1, L
+ | bl extern lj_err_run // (lua_State *L)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// FP value rounding. Called from JIT code.
+ |//
+ |// double lj_vm_floor/ceil/trunc(double x);
+ |.macro vm_round, func, hf
+ |.if hf == 1
+ | vmov CARG1, CARG2, d0
+ |.endif
+ | lsl CARG3, CARG2, #1
+ | adds RB, CARG3, #0x00200000
+ | bpl >2 // |x| < 1?
+ | mvn CARG4, #0x3cc
+ | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0.
+ | itt lo
+ | bxlo lr // |x| >= 2^52: done.
+ | mvn CARG4, #1
+ | lsl CARG4, CARG4, RB
+ | bic CARG3, CARG1, CARG4 // ztest = lo & ~lomask
+ | and CARG1, CARG1, CARG4 // lo &= lomask
+ | subs RB, RB, #32
+ | mvn CARG4, #1
+ | itttt pl
+ | lslpl CARG4, CARG4, RB
+ | bicpl CARG4, CARG2, CARG4 // |x| <= 2^20: ztest |= hi & ~himask
+ | orrpl CARG3, CARG3, CARG4
+ | mvnpl CARG4, #1
+ | itt pl
+ | lslpl CARG4, CARG4, RB
+ | andpl CARG2, CARG2, CARG4 // |x| <= 2^20: hi &= himask
+ | mvn CARG4, #1
+ |.if "func" == "floor"
+ | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0)
+ |.else
+ | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0)
+ |.endif
+ |.if hf == 1
+ | it eq
+ | vmoveq d0, CARG1, CARG2
+ |.endif
+ | itt eq
+ | bxeq lr // iszero: done.
+ | mvn CARG4, #1
+ | cmp RB, #0
+ | ite pl
+ | lslpl CARG3, CARG4, RB
+ | mvnmi CARG3, #0
+ | add RB, RB, #32
+ | lsl CARG4, CARG4, RB
+ | subs CARG1, CARG1, CARG4 // lo = lo-lomask
+ | mvn CARG4, #1
+ | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry
+ |.if hf == 1
+ | vmov d0, CARG1, CARG2
+ |.endif
+ | bx lr
+ |
+ |2: // |x| < 1:
+ | itt cs
+ | bxcs lr // |x| is not finite.
+ | orr CARG3, CARG3, CARG1 // ztest = (2*hi) | lo
+ |.if "func" == "floor"
+ | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0)
+ |.else
+ | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0)
+ |.endif
+ | mov CARG1, #0 // lo = 0
+ | and CARG2, CARG2, #0x80000000
+ | itt ne
+ | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)
+ | orrne CARG2, CARG2, CARG4
+ |.if hf == 1
+ | vmov d0, CARG1, CARG2
+ |.endif
+ | bx lr
+ |.endmacro
+ |
+ |9:
+ | .long 0x00003ff0 // hiword(+1.0) jturnsek: swaped halfwords!!!
+ |
+ |->vm_floor:
+ |.if HFABI
+ | vm_round floor, 1
+ |.endif
+ |->vm_floor_sf:
+ | vm_round floor, 0
+ |
+ |->vm_ceil:
+ |.if HFABI
+ | vm_round ceil, 1
+ |.endif
+ |->vm_ceil_sf:
+ | vm_round ceil, 0
+ |
+ |.macro vm_trunc, hf
+ |.if JIT
+ |.if hf == 1
+ | vmov CARG1, CARG2, d0
+ |.endif
+ | lsl CARG3, CARG2, #1
+ | adds RB, CARG3, #0x00200000
+ | itt pl
+ | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.
+ | movpl CARG1, #0
+ |.if hf == 1
+ | it pl
+ | vmovpl d0, CARG1, CARG2
+ |.endif
+ | itt pl
+ | bxpl lr
+ | mvn CARG4, #0x3cc
+ | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0.
+ | itt lo
+ | bxlo lr // |x| >= 2^52: already done.
+ | mvn CARG4, #1
+ | lsl CARG4, CARG4, RB
+ | and CARG1, CARG1, CARG4 // lo &= lomask
+ | subs RB, RB, #32
+ | mvn CARG4, #1
+ | itt pl
+ | lsl CARG4, CARG4, RB
+ | andpl CARG2, CARG2, CARG4 // |x| <= 2^20: hi &= himask
+ |.if hf == 1
+ | vmov d0, CARG1, CARG2
+ |.endif
+ | bx lr
+ |.endif
+ |.endmacro
+ |
+ |->vm_trunc:
+ |.if HFABI
+ | vm_trunc 1
+ |.endif
+ |->vm_trunc_sf:
+ | vm_trunc 0
+ |
+ | // double lj_vm_mod(double dividend, double divisor);
+ |->vm_mod:
+ |.if FPU
+ | // Special calling convention. Also, RC (r11) is not preserved.
+ | vdiv.f64 d0, d6, d7
+ | mov RC, lr
+ | vmov CARG1, CARG2, d0
+ | bl ->vm_floor_sf
+ | vmov d0, CARG1, CARG2
+ | vmul.f64 d0, d0, d7
+ | mov lr, RC
+ | vsub.f64 d6, d6, d0
+ | bx lr
+ |.else
+ | push {r0, r1, r2, r3, r4, lr}
+ | bl extern __aeabi_ddiv
+ | bl ->vm_floor_sf
+ | ldrd CARG3, CARG4, [sp, #8]
+ | bl extern __aeabi_dmul
+ | ldrd CARG3, CARG4, [sp]
+ | eor CARG2, CARG2, #0x80000000
+ | bl extern __aeabi_dadd
+ | add sp, sp, #20
+ | pop {pc}
+ |.endif
+ |
+ | // int lj_vm_modi(int dividend, int divisor);
+ |->vm_modi:
+ | ands RB, CARG1, #0x80000000
+ | it mi
+ | rsbmi CARG1, CARG1, #0 // a = |dividend|
+ | eor RB, RB, CARG2, asr #1 // Keep signdiff and sign(divisor).
+ | cmp CARG2, #0
+ | it mi
+ | rsbmi CARG2, CARG2, #0 // b = |divisor|
+ | subs CARG4, CARG2, #1
+ | ite ne
+ | cmpne CARG1, CARG2
+ | moveq CARG1, #0 // if (b == 1 || a == b) a = 0
+ | it hi
+ | tsthi CARG2, CARG4
+ | it eq
+ | andeq CARG1, CARG1, CARG4 // else if ((b & (b-1)) == 0) a &= b-1
+ | bls >1
+ | // Use repeated subtraction to get the remainder.
+ | clz CARG3, CARG1
+ | clz CARG4, CARG2
+ | sub CARG4, CARG4, CARG3
+ | rsbs CARG3, CARG4, #31 // entry = (31-(clz(b)-clz(a)))*12
+ | it ne
+ | .long 0xe8dff002 // tbbne [pc, CARG3] // Duff's device.
+ | .long 0xb8bec4ca, 0xa0a6acb2, 0x888e949a, 0x70767c82 // TBB table (part1)
+ | .long 0x585e646a, 0x40464c52, 0x282e343a, 0x10161c22 // TBB table (part2)
+ {
+ int i;
+ for (i = 31; i >= 0; i--) {
+ | cmp CARG1, CARG2, lsl #i
+ | it hs
+ | subhs CARG1, CARG1, CARG2, lsl #i
+ }
+ }
+ |1:
+ | cmp CARG1, #0
+ | it ne
+ | cmpne RB, #0
+ | it mi
+ | submi CARG1, CARG1, CARG2 // if (y != 0 && signdiff) y = y - b
+ | eors CARG2, CARG1, RB, lsl #1
+ | it mi
+ | rsbmi CARG1, CARG1, #0 // if (sign(divisor) != sign(y)) y = -y
+ | bx lr
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.define NEXT_TAB, TAB:CARG1
+ |.define NEXT_RES, CARG1
+ |.define NEXT_IDX, CARG2
+ |.define NEXT_TMP0, CARG3
+ |.define NEXT_TMP1, CARG4
+ |.define NEXT_LIM, r12
+ |.define NEXT_RES_PTR, sp
+ |.define NEXT_RES_VAL, [sp]
+ |.define NEXT_RES_KEY_I, [sp, #8]
+ |.define NEXT_RES_KEY_IT, [sp, #12]
+ |
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+ |// Next idx returned in CRET2.
+ |->vm_next:
+ |.if JIT
+ | ldr NEXT_TMP0, NEXT_TAB->array
+ | ldr NEXT_LIM, NEXT_TAB->asize
+ | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3
+ |1: // Traverse array part.
+ | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM
+ | bhs >5
+ | ldr NEXT_TMP1, [NEXT_TMP0, #4]
+ | str NEXT_IDX, NEXT_RES_KEY_I
+ | add NEXT_TMP0, NEXT_TMP0, #8
+ | add NEXT_IDX, NEXT_IDX, #1
+ | checktp NEXT_TMP1, LJ_TNIL
+ | beq <1 // Skip holes in array part.
+ | ldr NEXT_TMP0, [NEXT_TMP0, #-8]
+ | mov NEXT_RES, NEXT_RES_PTR
+ | strd NEXT_TMP0, NEXT_TMP1, NEXT_RES_VAL
+ | mvn NEXT_TMP0, #~LJ_TISNUM
+ | str NEXT_TMP0, NEXT_RES_KEY_IT
+ | bx lr
+ |
+ |5: // Traverse hash part.
+ | ldr NEXT_TMP0, NEXT_TAB->hmask
+ | ldr NODE:NEXT_RES, NEXT_TAB->node
+ | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1
+ | add NEXT_LIM, NEXT_LIM, NEXT_TMP0
+ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3
+ |6:
+ | cmp NEXT_IDX, NEXT_LIM
+ | bhi >9
+ | ldr NEXT_TMP1, NODE:NEXT_RES->val.it
+ | checktp NEXT_TMP1, LJ_TNIL
+ | add NEXT_IDX, NEXT_IDX, #1
+ | itt ne
+ | bxne lr
+ | // Skip holes in hash part.
+ | add NEXT_RES, NEXT_RES, #sizeof(Node)
+ | b <6
+ |
+ |9: // End of iteration. Set the key to nil (not the value).
+ | mvn NEXT_TMP0, #0
+ | mov NEXT_RES, NEXT_RES_PTR
+ | str NEXT_TMP0, NEXT_RES_KEY_IT
+ | bx lr
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Handler for callback functions.
+ |// Saveregs already performed. Callback slot number in [sp], g in r12.
+ |->vm_ffi_callback:
+ |.if FFI
+ |.type CTSTATE, CTState, PC
+ | ldr CTSTATE, GL:r12->ctype_state
+ | add DISPATCH, r12, #GG_G2DISP
+ |.if FPU
+ | str r4, SAVE_R4
+ | add r4, sp, CFRAME_SPACE+4+8*8
+ | vstmdb r4!, {d8-d15}
+ |.endif
+ |.if HFABI
+ | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8])
+ |.endif
+ | strd CARG3, CARG4, CTSTATE->cb.gpr[2]
+ | strd CARG1, CARG2, CTSTATE->cb.gpr[0]
+ |.if HFABI
+ | vstmdb r12!, {d0-d7}
+ |.endif
+ | ldr CARG4, [sp]
+ | add CARG3, sp, #CFRAME_SIZE
+ | mov CARG1, CTSTATE
+ | lsr CARG4, CARG4, #3
+ | str CARG3, CTSTATE->cb.stack
+ | mov CARG2, sp
+ | str CARG4, CTSTATE->cb.slot
+ | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
+ | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
+ | // Returns lua_State *.
+ | ldr BASE, L:CRET1->base
+ | mv_vmstate CARG2, INTERP
+ | ldr RC, L:CRET1->top
+ | mov MASKR8, #255
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | mov L, CRET1
+ | sub RC, RC, BASE
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
+ | st_vmstate CARG2
+ | ins_callt
+ |.endif
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+ |.if FFI
+ | ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)]
+ | str BASE, L->base
+ | str CARG4, L->top
+ | str L, CTSTATE->L
+ | mov CARG1, CTSTATE
+ | mov CARG2, RA
+ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
+ | ldrd CARG1, CARG2, CTSTATE->cb.gpr[0]
+ |.if HFABI
+ | vldr d0, CTSTATE->cb.fpr[0]
+ |.endif
+ | b ->vm_leave_unw
+ |.endif
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, r4
+ | push {CCSTATE, r5, r11, lr}
+ | mov CCSTATE, CARG1
+ | ldr CARG1, CCSTATE:CARG1->spadj
+ | ldrb CARG2, CCSTATE->nsp
+ | add CARG3, CCSTATE, #offsetof(CCallState, stack)
+ |.if HFABI
+ | add RB, CCSTATE, #offsetof(CCallState, fpr[0])
+ |.endif
+ | mov r11, sp
+ | sub sp, sp, CARG1 // Readjust stack.
+ | subs CARG2, CARG2, #1
+ |.if HFABI
+ | vldm RB, {d0-d7}
+ |.endif
+ | ldr RB, CCSTATE->func
+ | bmi >2
+ |1: // Copy stack slots.
+ | ldr CARG4, [CARG3, CARG2, lsl #2]
+ | str CARG4, [sp, CARG2, lsl #2]
+ | subs CARG2, CARG2, #1
+ | bpl <1
+ |2:
+ | ldrd CARG1, CARG2, CCSTATE->gpr[0]
+ | ldrd CARG3, CARG4, CCSTATE->gpr[2]
+ | blx RB
+ | mov sp, r11
+ |.if HFABI
+ | add r12, CCSTATE, #offsetof(CCallState, fpr[4])
+ |.endif
+ | strd CRET1, CRET2, CCSTATE->gpr[0]
+ |.if HFABI
+ | vstmdb r12!, {d0-d3}
+ |.endif
+ | pop {CCSTATE, r5, r11, pc}
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1*8, RC = src2, JMP with RC = target
+ | lsl RC, RC, #3
+ | ldrd_iw CARG1, CARG2, RA, BASE
+ | ldrh RB, [PC, #2]
+ | ldrd_iw CARG3, CARG4, RC, BASE
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | checktp CARG2, LJ_TISNUM
+ | bne >3
+ | checktp CARG4, LJ_TISNUM
+ | bne >4
+ | cmp CARG1, CARG3
+ if (op == BC_ISLT) {
+ | it lt
+ | sublt PC, RB, #0x20000
+ } else if (op == BC_ISGE) {
+ | it ge
+ | subge PC, RB, #0x20000
+ } else if (op == BC_ISLE) {
+ | it le
+ | suble PC, RB, #0x20000
+ } else {
+ | it gt
+ | subgt PC, RB, #0x20000
+ }
+ |1:
+ | ins_next
+ |
+ |3: // CARG1, CARG2 is not an integer.
+ |.if FPU
+ | vldr d0, [RA]
+ | bhi ->vmeta_comp
+ | // d0 is a number.
+ | checktp CARG4, LJ_TISNUM
+ | vldr d1, [RC]
+ | blo >5
+ | bhi ->vmeta_comp
+ | // d0 is a number, CARG3 is an integer.
+ | vmov s4, CARG3
+ | vcvt.f64.s32 d1, s4
+ | b >5
+ |4: // CARG1 is an integer, CARG3, CARG4 is not an integer.
+ | vldr d1, [RC]
+ | bhi ->vmeta_comp
+ | // CARG1 is an integer, d1 is a number.
+ | vmov s4, CARG1
+ | vcvt.f64.s32 d0, s4
+ |5: // d0 and d1 are numbers.
+ | vcmp.f64 d0, d1
+ | vmrs
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+ if (op == BC_ISLT) {
+ | it lo
+ | sublo PC, RB, #0x20000
+ } else if (op == BC_ISGE) {
+ | it hs
+ | subhs PC, RB, #0x20000
+ } else if (op == BC_ISLE) {
+ | it ls
+ | subls PC, RB, #0x20000
+ } else {
+ | it hi
+ | subhi PC, RB, #0x20000
+ }
+ | b <1
+ |.else
+ | bhi ->vmeta_comp
+ | // CARG1, CARG2 is a number.
+ | checktp CARG4, LJ_TISNUM
+ | it lo
+ | movlo RA, RB // Save RB.
+ | blo >5
+ | bhi ->vmeta_comp
+ | // CARG1, CARG2 is a number, CARG3 is an integer.
+ | mov CARG1, CARG3
+ | mov RC, RA
+ | mov RA, RB // Save RB.
+ | bl extern __aeabi_i2d
+ | mov CARG3, CARG1
+ | mov CARG4, CARG2
+ | ldrd CARG1, CARG2, [RC] // Restore first operand.
+ | b >5
+ |4: // CARG1 is an integer, CARG3, CARG4 is not an integer.
+ | bhi ->vmeta_comp
+ | // CARG1 is an integer, CARG3, CARG4 is a number.
+ | mov RA, RB // Save RB.
+ | bl extern __aeabi_i2d
+ | ldrd CARG3, CARG4, [RC] // Restore second operand.
+ |5: // CARG1, CARG2 and CARG3, CARG4 are numbers.
+ | bl extern __aeabi_cdcmple
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+ if (op == BC_ISLT) {
+ | it lo
+ | sublo PC, RA, #0x20000
+ } else if (op == BC_ISGE) {
+ | it hs
+ | subhs PC, RA, #0x20000
+ } else if (op == BC_ISLE) {
+ | it ls
+ | subls PC, RA, #0x20000
+ } else {
+ | it hi
+ | subhi PC, RA, #0x20000
+ }
+ | b <1
+ |.endif
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | // RA = src1*8, RC = src2, JMP with RC = target
+ | lsl RC, RC, #3
+ | ldrd_iw CARG1, CARG2, RA, BASE
+ | ldrh RB, [PC, #2]
+ | ldrd_iw CARG3, CARG4, RC, BASE
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | checktp CARG2, LJ_TISNUM
+ | it ls
+ | cmnls CARG4, #-LJ_TISNUM
+ if (vk) {
+ | bls ->BC_ISEQN_Z
+ } else {
+ | bls ->BC_ISNEN_Z
+ }
+ | // Either or both types are not numbers.
+ |.if FFI
+ | checktp CARG2, LJ_TCDATA
+ | checktpne CARG4, LJ_TCDATA
+ | beq ->vmeta_equal_cd
+ |.endif
+ | cmp CARG2, CARG4 // Compare types.
+ | bne >2 // Not the same type?
+ | checktp CARG2, LJ_TISPRI
+ | bhs >1 // Same type and primitive type?
+ |
+ | // Same types and not a primitive type. Compare GCobj or pvalue.
+ | cmp CARG1, CARG3
+ if (vk) {
+ | bne >3 // Different GCobjs or pvalues?
+ |1: // Branch if same.
+ | sub PC, RB, #0x20000
+ |2: // Different.
+ | ins_next
+ |3:
+ | checktp CARG2, LJ_TISTABUD
+ | bhi <2 // Different objects and not table/ud?
+ } else {
+ | beq >1 // Same GCobjs or pvalues?
+ | checktp CARG2, LJ_TISTABUD
+ | bhi >2 // Different objects and not table/ud?
+ }
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | ldr TAB:RA, TAB:CARG1->metatable
+ | cmp TAB:RA, #0
+ if (vk) {
+ | beq <2 // No metatable?
+ } else {
+ | beq >2 // No metatable?
+ }
+ | ldrb RA, TAB:RA->nomm
+ | mov CARG4, #1-vk // ne = 0 or 1.
+ | mov CARG2, CARG1
+ | tst RA, #1<<MM_eq
+ | beq ->vmeta_equal // 'no __eq' flag not set?
+ if (vk) {
+ | b <2
+ } else {
+ |2: // Branch if different.
+ | sub PC, RB, #0x20000
+ |1: // Same.
+ | ins_next
+ }
+ break;
+
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | // RA = src*8, RC = str_const (~), JMP with RC = target
+ | mvn RC, RC
+ | ldrd_i CARG1, CARG2, BASE, RA
+ | ldrh RB, [PC, #2]
+ | ldr STR:CARG3, [KBASE, RC, lsl #2]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | checktp CARG2, LJ_TSTR
+ |.if FFI
+ | bne >7
+ | cmp CARG1, CARG3
+ |.else
+ | it eq
+ | cmpeq CARG1, CARG3
+ |.endif
+ if (vk) {
+ | it eq
+ | subeq PC, RB, #0x20000
+ |1:
+ } else {
+ |1:
+ | it ne
+ | subne PC, RB, #0x20000
+ }
+ | ins_next
+ |
+ |.if FFI
+ |7:
+ | checktp CARG2, LJ_TCDATA
+ | bne <1
+ | b ->vmeta_equal_cd
+ |.endif
+ break;
+
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | // RA = src*8, RC = num_const (~), JMP with RC = target
+ | lsl RC, RC, #3
+ | ldrd_iw CARG1, CARG2, RA, BASE
+ | ldrh RB, [PC, #2]
+ | ldrd_iw CARG3, CARG4, RC, KBASE
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ if (vk) {
+ |->BC_ISEQN_Z:
+ } else {
+ |->BC_ISNEN_Z:
+ }
+ | checktp CARG2, LJ_TISNUM
+ | bne >3
+ | checktp CARG4, LJ_TISNUM
+ | bne >4
+ | cmp CARG1, CARG3
+ if (vk) {
+ | it eq
+ | subeq PC, RB, #0x20000
+ |1:
+ } else {
+ |1:
+ | it ne
+ | subne PC, RB, #0x20000
+ }
+ |2:
+ | ins_next
+ |
+ |3: // CARG1, CARG2 is not an integer.
+ |.if FFI
+ | bhi >7
+ |.else
+ if (!vk) {
+ | it hi
+ | subhi PC, RB, #0x20000
+ }
+ | bhi <2
+ |.endif
+ |.if FPU
+ | checktp CARG4, LJ_TISNUM
+ | vmov s4, CARG3
+ | vldr d0, [RA]
+ | ite lo
+ | vldrlo d1, [RC]
+ | vcvths.f64.s32 d1, s4
+ | b >5
+ |4: // CARG1 is an integer, d1 is a number.
+ | vmov s4, CARG1
+ | vldr d1, [RC]
+ | vcvt.f64.s32 d0, s4
+ |5: // d0 and d1 are numbers.
+ | vcmp.f64 d0, d1
+ | vmrs
+ if (vk) {
+ | it eq
+ | subeq PC, RB, #0x20000
+ } else {
+ | it ne
+ | subne PC, RB, #0x20000
+ }
+ | b <2
+ |.else
+ | // CARG1, CARG2 is a number.
+ | checktp CARG4, LJ_TISNUM
+ | it lo
+ | movlo RA, RB // Save RB.
+ | blo >5
+ | // CARG1, CARG2 is a number, CARG3 is an integer.
+ | mov CARG1, CARG3
+ | mov RC, RA
+ |4: // CARG1 is an integer, CARG3, CARG4 is a number.
+ | mov RA, RB // Save RB.
+ | bl extern __aeabi_i2d
+ | ldrd CARG3, CARG4, [RC] // Restore other operand.
+ |5: // CARG1, CARG2 and CARG3, CARG4 are numbers.
+ | bl extern __aeabi_cdcmpeq
+ if (vk) {
+ | it eq
+ | subeq PC, RA, #0x20000
+ } else {
+ | it ne
+ | subne PC, RA, #0x20000
+ }
+ | b <2
+ |.endif
+ |
+ |.if FFI
+ |7:
+ | checktp CARG2, LJ_TCDATA
+ | bne <1
+ | b ->vmeta_equal_cd
+ |.endif
+ break;
+
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | // RA = src*8, RC = primitive_type (~), JMP with RC = target
+ | ldrd_i CARG1, CARG2, BASE, RA
+ | ldrh RB, [PC, #2]
+ | add PC, PC, #4
+ | mvn RC, RC
+ | add RB, PC, RB, lsl #2
+ |.if FFI
+ | checktp CARG2, LJ_TCDATA
+ | beq ->vmeta_equal_cd
+ |.endif
+ | cmp CARG2, RC
+ if (vk) {
+ | it eq
+ | subeq PC, RB, #0x20000
+ } else {
+ | it ne
+ | subne PC, RB, #0x20000
+ }
+ | ins_next
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | // RA = dst*8 or unused, RC = src, JMP with RC = target
+ | add RC, BASE, RC, lsl #3
+ | ldrh RB, [PC, #2]
+ | ldrd CARG1, CARG2, [RC]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | checktp CARG2, LJ_TTRUE
+ if (op == BC_ISTC || op == BC_IST) {
+ | it ls
+ | subls PC, RB, #0x20000
+ if (op == BC_ISTC) {
+ | it ls
+ | strdls_i CARG1, CARG2, BASE, RA
+ }
+ } else {
+ | it hi
+ | subhi PC, RB, #0x20000
+ if (op == BC_ISFC) {
+ | it hi
+ | strdhi_i CARG1, CARG2, BASE, RA
+ }
+ }
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | // RA = src*8, RC = -type
+ | ldrd_i CARG1, CARG2, BASE, RA
+ | ins_next1
+ | cmn CARG2, RC
+ | ins_next2
+ | bne ->vmeta_istype
+ | ins_next3
+ break;
+ case BC_ISNUM:
+ | // RA = src*8, RC = -(TISNUM-1)
+ | ldrd_i CARG1, CARG2, BASE, RA
+ | ins_next1
+ | checktp CARG2, LJ_TISNUM
+ | ins_next2
+ | bhs ->vmeta_istype
+ | ins_next3
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */
+
+ case BC_MOV:
+ | // RA = dst*8, RC = src
+ | lsl RC, RC, #3
+ | ins_next1
+ | ldrd_i CARG1, CARG2, BASE, RC
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ break;
+ case BC_NOT:
+ | // RA = dst*8, RC = src
+ | add RC, BASE, RC, lsl #3
+ | ins_next1
+ | ldr CARG1, [RC, #4]
+ | add RA, BASE, RA
+ | ins_next2
+ | checktp CARG1, LJ_TTRUE
+ | ite ls
+ | mvnls CARG2, #~LJ_TFALSE
+ | mvnhi CARG2, #~LJ_TTRUE
+ | str CARG2, [RA, #4]
+ | ins_next3
+ break;
+ case BC_UNM:
+ | // RA = dst*8, RC = src
+ | lsl RC, RC, #3
+ | ldrd_i CARG1, CARG2, BASE, RC
+ | ins_next1
+ | ins_next2
+ | checktp CARG2, LJ_TISNUM
+ | bhi ->vmeta_unm
+ | it ne
+ | eorne CARG2, CARG2, #0x80000000
+ | bne >5
+ | it eq
+ | rsbseq CARG1, CARG1, #0
+ | it vs
+ | ldrdvs CARG1, CARG2, >9
+ |5:
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ |
+ |.align 8
+ |9:
+ | .long 0x00000000, 0x000041e0 // 2^31. jturnsek: swaped halfwords!!!
+ break;
+ case BC_LEN:
+ | // RA = dst*8, RC = src
+ | lsl RC, RC, #3
+ | ldrd_i CARG1, CARG2, BASE, RC
+ | checkstr CARG2, >2
+ | ldr CARG1, STR:CARG1->len
+ |1:
+ | mvn CARG2, #~LJ_TISNUM
+ | ins_next1
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ |2:
+ | checktab CARG2, ->vmeta_len
+#if LJ_52
+ | ldr TAB:CARG3, TAB:CARG1->metatable
+ | cmp TAB:CARG3, #0
+ | bne >9
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | .IOS mov RC, BASE
+ | bl extern lj_tab_len // (GCtab *t)
+ | // Returns uint32_t (but less than 2^31).
+ | .IOS mov BASE, RC
+ | b <1
+#if LJ_52
+ |9:
+ | ldrb CARG4, TAB:CARG3->nomm
+ | tst CARG4, #1<<MM_len
+ | bne <3 // 'no __len' flag set: done.
+ | b ->vmeta_len
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro ins_arithcheck, cond, ncond, target
+ ||if (vk == 1) {
+ | cmn CARG4, #-LJ_TISNUM
+ | it cond
+ | cmn..cond CARG2, #-LJ_TISNUM
+ ||} else {
+ | cmn CARG2, #-LJ_TISNUM
+ | it cond
+ | cmn..cond CARG4, #-LJ_TISNUM
+ ||}
+ | b..ncond target
+ |.endmacro
+ |.macro ins_arithcheck_int, target
+ | ins_arithcheck eq, ne, target
+ |.endmacro
+ |.macro ins_arithcheck_num, target
+ | ins_arithcheck lo, hs, target
+ |.endmacro
+ |
+ |.macro ins_arithpre
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+ | .if FPU
+ | ldrd_iw CARG1, CARG2, RB, BASE
+ | ldrd_iw CARG3, CARG4, RC, KBASE
+ | .else
+ | ldrd_i CARG1, CARG2, BASE, RB
+ | ldrd_i CARG3, CARG4, KBASE, RC
+ | .endif
+ || break;
+ ||case 1:
+ | .if FPU
+ | ldrd_iw CARG3, CARG4, RB, BASE
+ | ldrd_iw CARG1, CARG2, RC, KBASE
+ | .else
+ | ldrd_i CARG3, CARG4, BASE, RB
+ | ldrd_i CARG1, CARG2, KBASE, RC
+ | .endif
+ || break;
+ ||default:
+ | .if FPU
+ | ldrd_iw CARG1, CARG2, RB, BASE
+ | ldrd_iw CARG3, CARG4, RC, BASE
+ | .else
+ | ldrd_i CARG1, CARG2, BASE, RB
+ | ldrd_i CARG3, CARG4, BASE, RC
+ | .endif
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithpre_fpu, reg1, reg2
+ |.if FPU
+ ||if (vk == 1) {
+ | vldr reg2, [RB]
+ | vldr reg1, [RC]
+ ||} else {
+ | vldr reg1, [RB]
+ | vldr reg2, [RC]
+ ||}
+ |.endif
+ |.endmacro
+ |
+ |.macro ins_arithpost_fpu, reg
+ | ins_next1
+ | add RA, BASE, RA
+ | ins_next2
+ | vstr reg, [RA]
+ | ins_next3
+ |.endmacro
+ |
+ |.macro ins_arithfallback, ins
+ ||switch (vk) {
+ ||case 0:
+ | ins ->vmeta_arith_vn
+ || break;
+ ||case 1:
+ | ins ->vmeta_arith_nv
+ || break;
+ ||default:
+ | ins ->vmeta_arith_vv
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithdn, intins, fpins, fpcall
+ | ins_arithpre
+ |.if "intins" ~= "vm_modi" and not FPU
+ | ins_next1
+ |.endif
+ | ins_arithcheck_int >5
+ |.if "intins" == "smull"
+ | smull CARG1, RC, CARG3, CARG1
+ | cmp RC, CARG1, asr #31
+ | ins_arithfallback bne
+ |.elif "intins" == "vm_modi"
+ | movs CARG2, CARG3
+ | ins_arithfallback beq
+ | bl ->vm_modi
+ | mvn CARG2, #~LJ_TISNUM
+ |.else
+ | intins CARG1, CARG1, CARG3
+ | ins_arithfallback bvs
+ |.endif
+ |4:
+ |.if "intins" == "vm_modi" or FPU
+ | ins_next1
+ |.endif
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ |5: // FP variant.
+ | ins_arithpre_fpu d6, d7
+ | ins_arithfallback ins_arithcheck_num
+ |.if FPU
+ |.if "intins" == "vm_modi"
+ | bl fpcall
+ |.else
+ | fpins d6, d6, d7
+ |.endif
+ | ins_arithpost_fpu d6
+ |.else
+ | bl fpcall
+ |.if "intins" ~= "vm_modi"
+ | ins_next1
+ |.endif
+ | b <4
+ |.endif
+ |.endmacro
+ |
+ |.macro ins_arithfp, fpins, fpcall
+ | ins_arithpre
+ |.if "fpins" ~= "extern" or HFABI
+ | ins_arithpre_fpu d0, d1
+ |.endif
+ | ins_arithfallback ins_arithcheck_num
+ |.if "fpins" == "extern"
+ | .IOS mov RC, BASE
+ | bl fpcall
+ | .IOS mov BASE, RC
+ |.elif FPU
+ | fpins d0, d0, d1
+ |.else
+ | bl fpcall
+ |.endif
+ |.if ("fpins" ~= "extern" or HFABI) and FPU
+ | ins_arithpost_fpu d0
+ |.else
+ | ins_next1
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ |.endif
+ |.endmacro
+
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arithdn adds, vadd.f64, extern __aeabi_dadd
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arithdn subs, vsub.f64, extern __aeabi_dsub
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arithdn smull, vmul.f64, extern __aeabi_dmul
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithfp vdiv.f64, extern __aeabi_ddiv
+ break;
+ case BC_MODVN: case BC_MODNV: case BC_MODVV:
+ | ins_arithdn vm_modi, vm_mod, ->vm_mod
+ break;
+ case BC_POW:
+ | // NYI: (partial) integer arithmetic.
+ | ins_arithfp extern, extern pow
+ break;
+
+ case BC_CAT:
+ | decode_RB8 RC, INS
+ | decode_RC8 RB, INS
+ | // RA = dst*8, RC = src_start*8, RB = src_end*8 (note: RB/RC swapped!)
+ | sub CARG3, RB, RC
+ | str BASE, L->base
+ | add CARG2, BASE, RB
+ |->BC_CAT_Z:
+ | // RA = dst*8, RC = src_start*8, CARG2 = top-1
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | lsr CARG3, CARG3, #3
+ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | ldr BASE, L->base
+ | cmp CRET1, #0
+ | bne ->vmeta_binop
+ | ldrd_i CARG3, CARG4, BASE, RC
+ | ins_next1
+ | ins_next2
+ | strd_i CARG3, CARG4, BASE, RA // Copy result to RA.
+ | ins_next3
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | // RA = dst*8, RC = str_const (~)
+ | mvn RC, RC
+ | ins_next1
+ | ldr CARG1, [KBASE, RC, lsl #2]
+ | mvn CARG2, #~LJ_TSTR
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | // RA = dst*8, RC = cdata_const (~)
+ | mvn RC, RC
+ | ins_next1
+ | ldr CARG1, [KBASE, RC, lsl #2]
+ | mvn CARG2, #~LJ_TCDATA
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ |.endif
+ break;
+ case BC_KSHORT:
+ | // RA = dst*8, (RC = int16_literal)
+ | mov CARG1, INS, asr #16 // Refetch sign-extended reg.
+ | mvn CARG2, #~LJ_TISNUM
+ | ins_next1
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ break;
+ case BC_KNUM:
+ | // RA = dst*8, RC = num_const
+ | lsl RC, RC, #3
+ | ins_next1
+ | ldrd_i CARG1, CARG2, KBASE, RC
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ break;
+ case BC_KPRI:
+ | // RA = dst*8, RC = primitive_type (~)
+ | add RA, BASE, RA
+ | mvn RC, RC
+ | ins_next1
+ | ins_next2
+ | str RC, [RA, #4]
+ | ins_next3
+ break;
+ case BC_KNIL:
+ | // RA = base*8, RC = end
+ | add RA, BASE, RA
+ | add RC, BASE, RC, lsl #3
+ | mvn CARG1, #~LJ_TNIL
+ | str CARG1, [RA, #4]
+ | add RA, RA, #8
+ |1:
+ | str CARG1, [RA, #4]
+ | cmp RA, RC
+ | add RA, RA, #8
+ | blt <1
+ | ins_next_
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | // RA = dst*8, RC = uvnum
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | lsl RC, RC, #2
+ | add RC, RC, #offsetof(GCfuncL, uvptr)
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RC]
+ | ldr CARG2, UPVAL:CARG2->v
+ | ldrd CARG3, CARG4, [CARG2]
+ | ins_next1
+ | ins_next2
+ | strd_i CARG3, CARG4, BASE, RA
+ | ins_next3
+ break;
+ case BC_USETV:
+ | // RA = uvnum*8, RC = src
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | lsr RA, RA, #1
+ | add RA, RA, #offsetof(GCfuncL, uvptr)
+ | lsl RC, RC, #3
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
+ | ldrd_i CARG3, CARG4, BASE, RC
+ | ldrb RB, UPVAL:CARG2->marked
+ | ldrb RC, UPVAL:CARG2->closed
+ | ldr CARG2, UPVAL:CARG2->v
+ | tst RB, #LJ_GC_BLACK // isblack(uv)
+ | add RB, CARG4, #-LJ_TISGCV
+ | it ne
+ | cmpne RC, #0
+ | strd CARG3, CARG4, [CARG2]
+ | bne >2 // Upvalue is closed and black?
+ |1:
+ | ins_next
+ |
+ |2: // Check if new value is collectable.
+ | cmn RB, #-(LJ_TNUMX - LJ_TISGCV)
+ | it hi
+ | ldrbhi RC, GCOBJ:CARG3->gch.marked
+ | bls <1 // tvisgcv(v)
+ | sub CARG1, DISPATCH, #-GG_DISP2G
+ | tst RC, #LJ_GC_WHITES
+ | // Crossed a write barrier. Move the barrier forward.
+ |.if IOS
+ | beq <1
+ | mov RC, BASE
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | mov BASE, RC
+ |.else
+ | it ne
+ | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ |.endif
+ | b <1
+ break;
+ case BC_USETS:
+ | // RA = uvnum*8, RC = str_const (~)
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | lsr RA, RA, #1
+ | add RA, RA, #offsetof(GCfuncL, uvptr)
+ | mvn RC, RC
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
+ | ldr STR:CARG3, [KBASE, RC, lsl #2]
+ | ldrb RB, UPVAL:CARG2->marked
+ | ldrb RC, UPVAL:CARG2->closed
+ | ldr CARG2, UPVAL:CARG2->v
+ | mvn CARG4, #~LJ_TSTR
+ | tst RB, #LJ_GC_BLACK // isblack(uv)
+ | ldrb RB, STR:CARG3->marked
+ | strd CARG3, CARG4, [CARG2]
+ | bne >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | tst RB, #LJ_GC_WHITES // iswhite(str)
+ | it ne
+ | cmpne RC, #0
+ | sub CARG1, DISPATCH, #-GG_DISP2G
+ | // Crossed a write barrier. Move the barrier forward.
+ |.if IOS
+ | beq <1
+ | mov RC, BASE
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | mov BASE, RC
+ |.else
+ | it ne
+ | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ |.endif
+ | b <1
+ break;
+ case BC_USETN:
+ | // RA = uvnum*8, RC = num_const
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | lsr RA, RA, #1
+ | add RA, RA, #offsetof(GCfuncL, uvptr)
+ | lsl RC, RC, #3
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
+ | ldrd_i CARG3, CARG4, KBASE, RC
+ | ldr CARG2, UPVAL:CARG2->v
+ | ins_next1
+ | ins_next2
+ | strd CARG3, CARG4, [CARG2]
+ | ins_next3
+ break;
+ case BC_USETP:
+ | // RA = uvnum*8, RC = primitive_type (~)
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | lsr RA, RA, #1
+ | add RA, RA, #offsetof(GCfuncL, uvptr)
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
+ | mvn RC, RC
+ | ldr CARG2, UPVAL:CARG2->v
+ | ins_next1
+ | ins_next2
+ | str RC, [CARG2, #4]
+ | ins_next3
+ break;
+
+ case BC_UCLO:
+ | // RA = level*8, RC = target
+ | ldr CARG3, L->openupval
+ | add RC, PC, RC, lsl #2
+ | str BASE, L->base
+ | cmp CARG3, #0
+ | sub PC, RC, #0x20000
+ | beq >1
+ | mov CARG1, L
+ | add CARG2, BASE, RA
+ | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
+ | ldr BASE, L->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | // RA = dst*8, RC = proto_const (~) (holding function prototype)
+ | mvn RC, RC
+ | str BASE, L->base
+ | ldr CARG2, [KBASE, RC, lsl #2]
+ | str PC, SAVE_PC
+ | ldr CARG3, [BASE, FRAME_FUNC]
+ | mov CARG1, L
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | bl extern lj_func_newL_gc
+ | // Returns GCfuncL *.
+ | ldr BASE, L->base
+ | mvn CARG2, #~LJ_TFUNC
+ | ins_next1
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */
+
+ case BC_TNEW:
+ case BC_TDUP:
+ | // RA = dst*8, RC = (hbits|asize) | tab_const (~)
+ if (op == BC_TDUP) {
+ | mvn RC, RC
+ }
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.total)
+ | ldr CARG3, [CARG1]
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.threshold)
+ | ldr CARG4, [CARG1]
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | cmp CARG3, CARG4
+ | mov CARG1, L
+ | bhs >5
+ |1:
+ if (op == BC_TNEW) {
+ | lsl CARG2, RC, #21
+ | lsr CARG3, RC, #11
+ | asr RC, CARG2, #21
+ | lsr CARG2, CARG2, #21
+ | cmn RC, #1
+ | it eq
+ | addeq CARG2, CARG2, #2
+ | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
+ | // Returns GCtab *.
+ } else {
+ | ldr CARG2, [KBASE, RC, lsl #2]
+ | bl extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Returns GCtab *.
+ }
+ | ldr BASE, L->base
+ | mvn CARG2, #~LJ_TTAB
+ | ins_next1
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ |5:
+ | bl extern lj_gc_step_fixtop // (lua_State *L)
+ | mov CARG1, L
+ | b <1
+ break;
+
+ case BC_GGET:
+ | // RA = dst*8, RC = str_const (~)
+ case BC_GSET:
+ | // RA = dst*8, RC = str_const (~)
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | mvn RC, RC
+ | ldr TAB:CARG1, LFUNC:CARG2->env
+ | ldr STR:RC, [KBASE, RC, lsl #2]
+ if (op == BC_GGET) {
+ | b ->BC_TGETS_Z
+ } else {
+ | b ->BC_TSETS_Z
+ }
+ break;
+
+ case BC_TGETV:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | ldrd_i TAB:CARG1, CARG2, BASE, RB
+ | ldrd_i CARG3, CARG4, BASE, RC
+ | checktab CARG2, ->vmeta_tgetv // STALL: load CARG1, CARG2.
+ | checktp CARG4, LJ_TISNUM // Integer key?
+ | it eq
+ | ldreq CARG4, TAB:CARG1->array
+ | it eq
+ | ldreq CARG2, TAB:CARG1->asize
+ | bne >9
+ |
+ | add CARG4, CARG4, CARG3, lsl #3
+ | cmp CARG3, CARG2 // In array part?
+ | it lo
+ | ldrdlo CARG3, CARG4, [CARG4]
+ | bhs ->vmeta_tgetv
+ | ins_next1 // Overwrites RB!
+ | checktp CARG4, LJ_TNIL
+ | beq >5
+ |1:
+ | ins_next2
+ | strd_i CARG3, CARG4, BASE, RA
+ | ins_next3
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG2, TAB:CARG1->metatable
+ | cmp TAB:CARG2, #0
+ | beq <1 // No metatable: done.
+ | ldrb CARG2, TAB:CARG2->nomm
+ | tst CARG2, #1<<MM_index
+ | bne <1 // 'no __index' flag set: done.
+ | decode_RB8 RB, INS // Restore RB.
+ | b ->vmeta_tgetv
+ |
+ |9:
+ | checktp CARG4, LJ_TSTR // String key?
+ | it eq
+ | moveq STR:RC, CARG3
+ | beq ->BC_TGETS_Z
+ | b ->vmeta_tgetv
+ break;
+ case BC_TGETS:
+ | decode_RB8 RB, INS
+ | and RC, RC, #255
+ | // RA = dst*8, RB = table*8, RC = str_const (~)
+ | ldrd_i CARG1, CARG2, BASE, RB
+ | mvn RC, RC
+ | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC.
+ | checktab CARG2, ->vmeta_tgets1
+ |->BC_TGETS_Z:
+ | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | ldr CARG3, TAB:CARG1->hmask
+ | ldr CARG4, STR:RC->sid
+ | ldr NODE:INS, TAB:CARG1->node
+ | mov TAB:RB, TAB:CARG1
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
+ | add CARG3, CARG3, CARG3, lsl #1
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
+ |1:
+ | ldrd CARG1, CARG2, NODE:INS->key // STALL: early NODE:INS.
+ | ldrd CARG3, CARG4, NODE:INS->val
+ | ldr NODE:INS, NODE:INS->next
+ | checktp CARG2, LJ_TSTR
+ | it eq
+ | cmpeq CARG1, STR:RC
+ | bne >4
+ | checktp CARG4, LJ_TNIL
+ | beq >5
+ |3:
+ | ins_next1
+ | ins_next2
+ | strd_i CARG3, CARG4, BASE, RA
+ | ins_next3
+ |
+ |4: // Follow hash chain.
+ | cmp NODE:INS, #0
+ | bne <1
+ | // End of hash chain: key not found, nil result.
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG1, TAB:RB->metatable
+ | mov CARG3, #0 // Optional clear of undef. value (during load stall).
+ | mvn CARG4, #~LJ_TNIL
+ | cmp TAB:CARG1, #0
+ | beq <3 // No metatable: done.
+ | ldrb CARG2, TAB:CARG1->nomm
+ | tst CARG2, #1<<MM_index
+ | bne <3 // 'no __index' flag set: done.
+ | b ->vmeta_tgets
+ break;
+ case BC_TGETB:
+ | decode_RB8 RB, INS
+ | and RC, RC, #255
+ | // RA = dst*8, RB = table*8, RC = index
+ | ldrd_i CARG1, CARG2, BASE, RB
+ | checktab CARG2, ->vmeta_tgetb // STALL: load CARG1, CARG2.
+ | ldr CARG3, TAB:CARG1->asize
+ | ldr CARG4, TAB:CARG1->array
+ | lsl CARG2, RC, #3
+ | cmp RC, CARG3
+ | ldrdlo_i CARG3, CARG4, CARG4, CARG2
+ | bhs ->vmeta_tgetb
+ | ins_next1 // Overwrites RB!
+ | checktp CARG4, LJ_TNIL
+ | beq >5
+ |1:
+ | ins_next2
+ | strd_i CARG3, CARG4, BASE, RA
+ | ins_next3
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG2, TAB:CARG1->metatable
+ | cmp TAB:CARG2, #0
+ | beq <1 // No metatable: done.
+ | ldrb CARG2, TAB:CARG2->nomm
+ | tst CARG2, #1<<MM_index
+ | bne <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetb
+ break;
+ case BC_TGETR:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | ldr TAB:CARG1, [BASE, RB]
+ | ldr CARG2, [BASE, RC]
+ | ldr CARG4, TAB:CARG1->array
+ | ldr CARG3, TAB:CARG1->asize
+ | add CARG4, CARG4, CARG2, lsl #3
+ | cmp CARG2, CARG3 // In array part?
+ | bhs ->vmeta_tgetr
+ | ldrd CARG1, CARG2, [CARG4]
+ |->BC_TGETR_Z:
+ | ins_next1
+ | ins_next2
+ | strd_i CARG1, CARG2, BASE, RA
+ | ins_next3
+ break;
+
+ case BC_TSETV:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | // RA = src*8, RB = table*8, RC = key*8
+ | ldrd_i TAB:CARG1, CARG2, BASE, RB
+ | ldrd_i CARG3, CARG4, BASE, RC
+ | checktab CARG2, ->vmeta_tsetv // STALL: load CARG1, CARG2.
+ | checktp CARG4, LJ_TISNUM // Integer key?
+ | it eq
+ | ldreq CARG2, TAB:CARG1->array
+ | it eq
+ | ldreq CARG4, TAB:CARG1->asize
+ | bne >9
+ |
+ | add CARG2, CARG2, CARG3, lsl #3
+ | cmp CARG3, CARG4 // In array part?
+ | it lo
+ | ldrlo INS, [CARG2, #4]
+ | bhs ->vmeta_tsetv
+ | ins_next1 // Overwrites RB!
+ | checktp INS, LJ_TNIL
+ | ldrb INS, TAB:CARG1->marked
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | beq >5
+ |1:
+ | tst INS, #LJ_GC_BLACK // isblack(table)
+ | strd CARG3, CARG4, [CARG2]
+ | bne >7
+ |2:
+ | ins_next2
+ | ins_next3
+ |
+ |5: // Check for __newindex if previous value is nil.
+ | ldr TAB:RA, TAB:CARG1->metatable
+ | cmp TAB:RA, #0
+ | beq <1 // No metatable: done.
+ | ldrb RA, TAB:RA->nomm
+ | tst RA, #1<<MM_newindex
+ | bne <1 // 'no __newindex' flag set: done.
+ | ldr INS, [PC, #-4] // Restore RA and RB.
+ | decode_RB8 RB, INS
+ | decode_RA8 RA, INS
+ | b ->vmeta_tsetv
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG1, INS, CARG3
+ | b <2
+ |
+ |9:
+ | checktp CARG4, LJ_TSTR // String key?
+ | it eq
+ | moveq STR:RC, CARG3
+ | beq ->BC_TSETS_Z
+ | b ->vmeta_tsetv
+ break;
+ case BC_TSETS:
+ | decode_RB8 RB, INS
+ | and RC, RC, #255
+ | // RA = src*8, RB = table*8, RC = str_const (~)
+ | ldrd_i CARG1, CARG2, BASE, RB
+ | mvn RC, RC
+ | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC.
+ | checktab CARG2, ->vmeta_tsets1
+ |->BC_TSETS_Z:
+ | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | ldr CARG3, TAB:CARG1->hmask
+ | ldr CARG4, STR:RC->sid
+ | ldr NODE:INS, TAB:CARG1->node
+ | mov TAB:RB, TAB:CARG1
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
+ | add CARG3, CARG3, CARG3, lsl #1
+ | mov CARG4, #0
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
+ | strb CARG4, TAB:RB->nomm // Clear metamethod cache.
+ |1:
+ | ldrd CARG1, CARG2, NODE:INS->key
+ | ldr CARG4, NODE:INS->val.it
+ | ldr NODE:CARG3, NODE:INS->next
+ | checktp CARG2, LJ_TSTR
+ | it eq
+ | cmpeq CARG1, STR:RC
+ | bne >5
+ | ldrb CARG2, TAB:RB->marked
+ | checktp CARG4, LJ_TNIL // Key found, but nil value?
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | beq >4
+ |2:
+ | tst CARG2, #LJ_GC_BLACK // isblack(table)
+ | strd CARG3, CARG4, NODE:INS->val
+ | bne >7
+ |3:
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | ldr TAB:CARG1, TAB:RB->metatable
+ | cmp TAB:CARG1, #0
+ | beq <2 // No metatable: done.
+ | ldrb CARG1, TAB:CARG1->nomm
+ | tst CARG1, #1<<MM_newindex
+ | bne <2 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsets
+ |
+ |5: // Follow hash chain.
+ | movs NODE:INS, NODE:CARG3
+ | bne <1
+ | // End of hash chain: key not found, add a new one.
+ |
+ | // But check for __newindex first.
+ | ldr TAB:CARG1, TAB:RB->metatable
+ | mov CARG3, TMPDp
+ | str PC, SAVE_PC
+ | cmp TAB:CARG1, #0 // No metatable: continue.
+ | str BASE, L->base
+ | it ne
+ | ldrbne CARG2, TAB:CARG1->nomm
+ | mov CARG1, L
+ | beq >6
+ | tst CARG2, #1<<MM_newindex
+ | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+ | mvn CARG4, #~LJ_TSTR
+ | str STR:RC, TMPDlo
+ | mov CARG2, TAB:RB
+ | str CARG4, TMPDhi
+ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+ | // Returns TValue *.
+ | ldr BASE, L->base
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | strd CARG3, CARG4, [CRET1]
+ | b <3 // No 2nd write barrier needed.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, CARG2, CARG3
+ | b <3
+ break;
+ case BC_TSETB:
+ | decode_RB8 RB, INS
+ | and RC, RC, #255
+ | // RA = src*8, RB = table*8, RC = index
+ | ldrd_i CARG1, CARG2, BASE, RB
+ | checktab CARG2, ->vmeta_tsetb // STALL: load CARG1, CARG2.
+ | ldr CARG3, TAB:CARG1->asize
+ | ldr RB, TAB:CARG1->array
+ | lsl CARG2, RC, #3
+ | cmp RC, CARG3
+ | ldrdlo_iw CARG3, CARG4, CARG2, RB
+ | bhs ->vmeta_tsetb
+ | ins_next1 // Overwrites RB!
+ | checktp CARG4, LJ_TNIL
+ | ldrb INS, TAB:CARG1->marked
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | beq >5
+ |1:
+ | tst INS, #LJ_GC_BLACK // isblack(table)
+ | strd CARG3, CARG4, [CARG2]
+ | bne >7
+ |2:
+ | ins_next2
+ | ins_next3
+ |
+ |5: // Check for __newindex if previous value is nil.
+ | ldr TAB:RA, TAB:CARG1->metatable
+ | cmp TAB:RA, #0
+ | beq <1 // No metatable: done.
+ | ldrb RA, TAB:RA->nomm
+ | tst RA, #1<<MM_newindex
+ | bne <1 // 'no __newindex' flag set: done.
+ | ldr INS, [PC, #-4] // Restore INS.
+ | decode_RA8 RA, INS
+ | b ->vmeta_tsetb
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG1, INS, CARG3
+ | b <2
+ break;
+ case BC_TSETR:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | // RA = src*8, RB = table*8, RC = key*8
+ | ldr TAB:CARG2, [BASE, RB]
+ | ldr CARG3, [BASE, RC]
+ | ldrb INS, TAB:CARG2->marked
+ | ldr CARG1, TAB:CARG2->array
+ | ldr CARG4, TAB:CARG2->asize
+ | tst INS, #LJ_GC_BLACK // isblack(table)
+ | add CARG1, CARG1, CARG3, lsl #3
+ | bne >7
+ |2:
+ | cmp CARG3, CARG4 // In array part?
+ | bhs ->vmeta_tsetr
+ |->BC_TSETR_Z:
+ | ldrd_i CARG3, CARG4, BASE, RA
+ | ins_next1
+ | ins_next2
+ | strd CARG3, CARG4, [CARG1]
+ | ins_next3
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, INS, RB
+ | b <2
+ break;
+
+ case BC_TSETM:
+ | // RA = base*8 (table at base-1), RC = num_const (start index)
+ | add RA, BASE, RA
+ |1:
+ | ldr RB, SAVE_MULTRES
+ | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table.
+ | ldr CARG1, [KBASE, RC, lsl #3] // Integer constant is in lo-word.
+ | subs RB, RB, #8
+ | ldr CARG4, TAB:CARG2->asize
+ | beq >4 // Nothing to copy?
+ | add CARG3, CARG1, RB, lsr #3
+ | cmp CARG3, CARG4
+ | ldr CARG4, TAB:CARG2->array
+ | add RB, RA, RB
+ | bhi >5
+ | add INS, CARG4, CARG1, lsl #3
+ | ldrb CARG1, TAB:CARG2->marked
+ |3: // Copy result slots to table.
+ | ldrd CARG3, CARG4, [RA], #8
+ | strd CARG3, CARG4, [INS], #8
+ | cmp RA, RB
+ | blo <3
+ | tst CARG1, #LJ_GC_BLACK // isblack(table)
+ | bne >7
+ |4:
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ | // Must not reallocate the stack.
+ | .IOS ldr BASE, L->base
+ | b <1
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:CARG2, CARG1, CARG3
+ | b <4
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALLM:
+ | // RA = base*8, (RB = nresults+1,) RC = extra_nargs
+ | ldr CARG1, SAVE_MULTRES
+ | decode_RC8 NARGS8:RC, INS
+ | add NARGS8:RC, NARGS8:RC, CARG1
+ | b ->BC_CALL_Z
+ break;
+ case BC_CALL:
+ | decode_RC8 NARGS8:RC, INS
+ | // RA = base*8, (RB = nresults+1,) RC = (nargs+1)*8
+ |->BC_CALL_Z:
+ | mov RB, BASE // Save old BASE for vmeta_call.
+ | ldrd_iw CARG3, CARG4, BASE, RA
+ | sub NARGS8:RC, NARGS8:RC, #8
+ | add BASE, BASE, #8
+ | checkfunc CARG4, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_CALLMT:
+ | // RA = base*8, (RB = 0,) RC = extra_nargs
+ | ldr CARG1, SAVE_MULTRES
+ | add NARGS8:RC, CARG1, RC, lsl #3
+ | b ->BC_CALLT1_Z
+ break;
+ case BC_CALLT:
+ | lsl NARGS8:RC, RC, #3
+ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
+ |->BC_CALLT1_Z:
+ | ldrd_iw LFUNC:CARG3, CARG4, RA, BASE
+ | sub NARGS8:RC, NARGS8:RC, #8
+ | add RA, RA, #8
+ | checkfunc CARG4, ->vmeta_callt
+ | ldr PC, [BASE, FRAME_PC]
+ |->BC_CALLT2_Z:
+ | mov RB, #0
+ | ldrb CARG4, LFUNC:CARG3->ffid
+ | tst PC, #FRAME_TYPE
+ | bne >7
+ |1:
+ | str LFUNC:CARG3, [BASE, FRAME_FUNC] // Copy function down, but keep PC.
+ | cmp NARGS8:RC, #0
+ | beq >3
+ |2:
+ | ldrd_i CARG1, CARG2, RA, RB
+ | add INS, RB, #8
+ | cmp INS, NARGS8:RC
+ | strd_i CARG1, CARG2, BASE, RB
+ | mov RB, INS
+ | bne <2
+ |3:
+ | cmp CARG4, #1 // (> FF_C) Calling a fast function?
+ | bhi >5
+ |4:
+ | ins_callt
+ |
+ |5: // Tailcall to a fast function with a Lua frame below.
+ | ldr INS, [PC, #-4]
+ | decode_RA8 RA, INS
+ | sub CARG1, BASE, RA
+ | ldr LFUNC:CARG1, [CARG1, #-16]
+ | ldr CARG1, LFUNC:CARG1->field_pc
+ | ldr KBASE, [CARG1, #PC2PROTO(k)]
+ | b <4
+ |
+ |7: // Tailcall from a vararg function.
+ | eor PC, PC, #FRAME_VARG
+ | tst PC, #FRAME_TYPEP // Vararg frame below?
+ | it ne
+ | movne CARG4, #0 // Clear ffid if no Lua function below.
+ | bne <1
+ | sub BASE, BASE, PC
+ | ldr PC, [BASE, FRAME_PC]
+ | tst PC, #FRAME_TYPE
+ | it ne
+ | movne CARG4, #0 // Clear ffid if no Lua function below.
+ | b <1
+ break;
+
+ case BC_ITERC:
+ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
+ | add RA, BASE, RA
+ | mov RB, BASE // Save old BASE for vmeta_call.
+ | ldrd CARG3, CARG4, [RA, #-16]
+ | ldrd CARG1, CARG2, [RA, #-8]
+ | add BASE, RA, #8
+ | strd CARG3, CARG4, [RA, #8] // Copy state.
+ | strd CARG1, CARG2, [RA, #16] // Copy control var.
+ | // STALL: locked CARG3, CARG4.
+ | ldrd LFUNC:CARG3, CARG4, [RA, #-24]
+ | mov NARGS8:RC, #16 // Iterators get 2 arguments.
+ | // STALL: load CARG3, CARG4.
+ | strd LFUNC:CARG3, CARG4, [RA] // Copy callable.
+ | checkfunc CARG4, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_ITERN:
+ |.if JIT
+ | hotloop
+ |.endif
+ |->vm_IITERN:
+ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
+ | add RA, BASE, RA
+ | ldr TAB:RB, [RA, #-16]
+ | ldr CARG1, [RA, #-8] // Get index from control var.
+ | ldr INS, TAB:RB->asize
+ | ldr CARG2, TAB:RB->array
+ | add PC, PC, #4
+ |1: // Traverse array part.
+ | subs RC, CARG1, INS
+ | add CARG3, CARG2, CARG1, lsl #3
+ | bhs >5 // Index points after array part?
+ | ldrd CARG3, CARG4, [CARG3]
+ | checktp CARG4, LJ_TNIL
+ | it eq
+ | addeq CARG1, CARG1, #1 // Skip holes in array part.
+ | beq <1
+ | ldrh RC, [PC, #-2]
+ | mvn CARG2, #~LJ_TISNUM
+ | strd CARG3, CARG4, [RA, #8]
+ | add RC, PC, RC, lsl #2
+ | add RB, CARG1, #1
+ | strd CARG1, CARG2, [RA]
+ | sub PC, RC, #0x20000
+ | str RB, [RA, #-8] // Update control var.
+ |3:
+ | ins_next
+ |
+ |5: // Traverse hash part.
+ | ldr CARG4, TAB:RB->hmask
+ | ldr NODE:RB, TAB:RB->node
+ |6:
+ | add CARG1, RC, RC, lsl #1
+ | cmp RC, CARG4 // End of iteration? Branch to ITERL+1.
+ | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
+ | bhi <3
+ | ldrd CARG1, CARG2, NODE:CARG3->val
+ | checktp CARG2, LJ_TNIL
+ | add RC, RC, #1
+ | beq <6 // Skip holes in hash part.
+ | ldrh RB, [PC, #-2]
+ | add RC, RC, INS
+ | ldrd CARG3, CARG4, NODE:CARG3->key
+ | str RC, [RA, #-8] // Update control var.
+ | strd CARG1, CARG2, [RA, #8]
+ | add RC, PC, RB, lsl #2
+ | sub PC, RC, #0x20000
+ | strd CARG3, CARG4, [RA]
+ | b <3
+ break;
+
+ case BC_ISNEXT:
+ | // RA = base*8, RC = target (points to ITERN)
+ | add RA, BASE, RA
+ | add RC, PC, RC, lsl #2
+ | ldrd CFUNC:CARG1, CFUNC:CARG2, [RA, #-24]
+ | ldr CARG3, [RA, #-12]
+ | ldr CARG4, [RA, #-4]
+ | checktp CARG2, LJ_TFUNC
+ | it eq
+ | ldrbeq CARG1, CFUNC:CARG1->ffid
+ | checktpeq CARG3, LJ_TTAB
+ | checktpeq CARG4, LJ_TNIL
+ | it eq
+ | cmpeq CARG1, #FF_next_N
+ | it eq
+ | subeq PC, RC, #0x20000
+ | bne >5
+ | ins_next1
+ | ins_next2
+ | mov CARG1, #0
+ | mvn CARG2, #~LJ_KEYINDEX
+ | strd CARG1, CARG2, [RA, #-8] // Initialize control var.
+ |1:
+ | ins_next3
+ |5: // Despecialize bytecode if any of the checks fail.
+ | mov CARG1, #BC_JMP
+ | mov OP, #BC_ITERC
+ | strb CARG1, [PC, #-4]
+ | sub PC, RC, #0x20000
+ |.if JIT
+ | ldrb CARG1, [PC]
+ | cmp CARG1, #BC_ITERN
+ | bne >6
+ |.endif
+ | strb OP, [PC] // Subsumes ins_next1.
+ | ins_next2
+ | b <1
+ |.if JIT
+ |6: // Unpatch JLOOP.
+ | sub CARG2, DISPATCH, #-DISPATCH_J(trace)
+ | ldr CARG1, [CARG2]
+ | ldrh CARG2, [PC, #2]
+ | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
+ | // Subsumes ins_next1 and ins_next2.
+ | ldr INS, TRACE:CARG1->startins
+ | .long 0xf36c0e07 //BFI INS, OP, #0, #8
+ | str INS, [PC], #4
+ | b <1
+ |.endif
+ break;
+
+ case BC_VARG:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
+ | ldr CARG1, [BASE, FRAME_PC]
+ | add RC, BASE, RC
+ | add RA, BASE, RA
+ | add RC, RC, #FRAME_VARG
+ | add CARG4, RA, RB
+ | sub CARG3, BASE, #8 // CARG3 = vtop
+ | sub RC, RC, CARG1 // RC = vbase
+ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
+ | cmp RB, #0
+ | sub CARG1, CARG3, RC
+ | beq >5 // Copy all varargs?
+ | sub CARG4, CARG4, #16
+ |1: // Copy vararg slots to destination slots.
+ | cmp RC, CARG3
+ | ite lo
+ | ldrdlo CARG1, CARG2, [RC], #8
+ | mvnhs CARG2, #~LJ_TNIL
+ | cmp RA, CARG4
+ | strd CARG1, CARG2, [RA], #8
+ | blo <1
+ |2:
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | ldr CARG4, L->maxstack
+ | cmp CARG1, #0
+ | ite le
+ | movle RB, #8 // MULTRES = (0+1)*8
+ | addgt RB, CARG1, #8
+ | add CARG2, RA, CARG1
+ | str RB, SAVE_MULTRES
+ | ble <2
+ | cmp CARG2, CARG4
+ | bhi >7
+ |6:
+ | ldrd CARG1, CARG2, [RC], #8
+ | strd CARG1, CARG2, [RA], #8
+ | cmp RC, CARG3
+ | blo <6
+ | b <2
+ |
+ |7: // Grow stack for varargs.
+ | lsr CARG2, CARG1, #3
+ | str RA, L->top
+ | mov CARG1, L
+ | str BASE, L->base
+ | sub RC, RC, BASE // Need delta, because BASE may change.
+ | str PC, SAVE_PC
+ | sub RA, RA, BASE
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldr BASE, L->base
+ | add RA, BASE, RA
+ | add RC, BASE, RC
+ | sub CARG3, BASE, #8
+ | b <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | // RA = results*8, RC = extra results
+ | ldr CARG1, SAVE_MULTRES
+ | ldr PC, [BASE, FRAME_PC]
+ | add RA, BASE, RA
+ | add RC, CARG1, RC, lsl #3
+ | b ->BC_RETM_Z
+ break;
+
+ case BC_RET:
+ | // RA = results*8, RC = nresults+1
+ | ldr PC, [BASE, FRAME_PC]
+ | lsl RC, RC, #3
+ | add RA, BASE, RA
+ |->BC_RETM_Z:
+ | str RC, SAVE_MULTRES
+ |1:
+ | ands CARG1, PC, #FRAME_TYPE
+ | eor CARG2, PC, #FRAME_VARG
+ | bne ->BC_RETV2_Z
+ |
+ |->BC_RET_Z:
+ | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
+ | ldr INS, [PC, #-4]
+ | subs CARG4, RC, #8
+ | sub CARG3, BASE, #8
+ | beq >3
+ |2:
+ | ldrd CARG1, CARG2, [RA], #8
+ | add BASE, BASE, #8
+ | subs CARG4, CARG4, #8
+ | strd CARG1, CARG2, [BASE, #-16]
+ | bne <2
+ |3:
+ | decode_RA8 RA, INS
+ | sub CARG4, CARG3, RA
+ | decode_RB8 RB, INS
+ | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
+ |5:
+ | cmp RB, RC // More results expected?
+ | bhi >6
+ | mov BASE, CARG4
+ | ldr CARG2, LFUNC:CARG1->field_pc
+ | ins_next1
+ | ins_next2
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | ins_next3
+ |
+ |6: // Fill up results with nil.
+ | mvn CARG2, #~LJ_TNIL
+ | add BASE, BASE, #8
+ | add RC, RC, #8
+ | str CARG2, [BASE, #-12]
+ | b <5
+ |
+ |->BC_RETV1_Z: // Non-standard return case.
+ | add RA, BASE, RA
+ |->BC_RETV2_Z:
+ | tst CARG2, #FRAME_TYPEP
+ | bne ->vm_return
+ | // Return from vararg function: relocate BASE down.
+ | sub BASE, BASE, CARG2
+ | ldr PC, [BASE, FRAME_PC]
+ | b <1
+ break;
+
+ case BC_RET0: case BC_RET1:
+ | // RA = results*8, RC = nresults+1
+ | ldr PC, [BASE, FRAME_PC]
+ | lsl RC, RC, #3
+ | str RC, SAVE_MULTRES
+ | ands CARG1, PC, #FRAME_TYPE
+ | eor CARG2, PC, #FRAME_VARG
+ | it eq
+ | ldreq INS, [PC, #-4]
+ | bne ->BC_RETV1_Z
+ if (op == BC_RET1) {
+ | ldrd_i CARG1, CARG2, BASE, RA
+ }
+ | sub CARG4, BASE, #8
+ | decode_RA8 RA, INS
+ if (op == BC_RET1) {
+ | strd CARG1, CARG2, [CARG4]
+ }
+ | sub BASE, CARG4, RA
+ | decode_RB8 RB, INS
+ | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
+ |5:
+ | cmp RB, RC
+ | bhi >6
+ | ldr CARG2, LFUNC:CARG1->field_pc
+ | ins_next1
+ | ins_next2
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | ins_next3
+ |
+ |6: // Fill up results with nil.
+ | sub CARG2, CARG4, #4
+ | mvn CARG3, #~LJ_TNIL
+ | str CARG3, [CARG2, RC]
+ | add RC, RC, #8
+ | b <5
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
+ |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
+ |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
+ |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
+
+ case BC_FORL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IFORL follows.
+ break;
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ | // RA = base*8, RC = target (after end of loop or start of loop)
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | ldrd_iw CARG1, CARG2, RA, BASE
+ if (op != BC_JFORL) {
+ | add RC, PC, RC, lsl #2
+ }
+ if (!vk) {
+ | ldrd CARG3, CARG4, FOR_STOP
+ | checktp CARG2, LJ_TISNUM
+ | ldr RB, FOR_TSTEP
+ | bne >5
+ | checktp CARG4, LJ_TISNUM
+ | ldr CARG4, FOR_STEP
+ | checktpeq RB, LJ_TISNUM
+ | bne ->vmeta_for
+ | cmp CARG4, #0
+ | blt >4
+ | cmp CARG1, CARG3
+ } else {
+ | ldrd CARG3, CARG4, FOR_STEP
+ | checktp CARG2, LJ_TISNUM
+ | bne >5
+ | adds CARG1, CARG1, CARG3
+ | ldr CARG4, FOR_STOP
+ if (op == BC_IFORL) {
+ | it vs
+ | addvs RC, PC, #0x20000 // Overflow: prevent branch.
+ } else {
+ | bvs >2 // Overflow: do not enter mcode.
+ }
+ | cmp CARG3, #0
+ | blt >4
+ | cmp CARG1, CARG4
+ }
+ |1:
+ if (op == BC_FORI) {
+ | it gt
+ | subgt PC, RC, #0x20000
+ } else if (op == BC_JFORI) {
+ | sub PC, RC, #0x20000
+ | it le
+ | ldrhle RC, [PC, #-2]
+ } else if (op == BC_IFORL) {
+ | it le
+ | suble PC, RC, #0x20000
+ }
+ if (vk) {
+ | strd CARG1, CARG2, FOR_IDX
+ }
+ |2:
+ | ins_next1
+ | ins_next2
+ | strd CARG1, CARG2, FOR_EXT
+ if (op == BC_JFORI || op == BC_JFORL) {
+ | ble =>BC_JLOOP
+ }
+ |3:
+ | ins_next3
+ |
+ |4: // Invert check for negative step.
+ if (!vk) {
+ | cmp CARG3, CARG1
+ } else {
+ | cmp CARG4, CARG1
+ }
+ | b <1
+ |
+ |5: // FP loop.
+ if (!vk) {
+ | itt lo
+ | cmnlo CARG4, #-LJ_TISNUM
+ | cmnlo RB, #-LJ_TISNUM
+ | bhs ->vmeta_for
+ |.if FPU
+ | vldr d0, FOR_IDX
+ | vldr d1, FOR_STOP
+ | cmp RB, #0
+ | vstr d0, FOR_EXT
+ |.else
+ | cmp RB, #0
+ | strd CARG1, CARG2, FOR_EXT
+ | blt >8
+ |.endif
+ } else {
+ |.if FPU
+ | vldr d0, FOR_IDX
+ | vldr d2, FOR_STEP
+ | vldr d1, FOR_STOP
+ | cmp CARG4, #0
+ | vadd.f64 d0, d0, d2
+ |.else
+ | cmp CARG4, #0
+ | blt >8
+ | bl extern __aeabi_dadd
+ | strd CARG1, CARG2, FOR_IDX
+ | ldrd CARG3, CARG4, FOR_STOP
+ | strd CARG1, CARG2, FOR_EXT
+ |.endif
+ }
+ |6:
+ |.if FPU
+ | ite ge
+ | vcmpge.f64 d0, d1
+ | vcmplt.f64 d1, d0
+ | vmrs
+ |.else
+ | bl extern __aeabi_cdcmple
+ |.endif
+ if (vk) {
+ |.if FPU
+ | vstr d0, FOR_IDX
+ | vstr d0, FOR_EXT
+ |.endif
+ }
+ if (op == BC_FORI) {
+ | it hi
+ | subhi PC, RC, #0x20000
+ } else if (op == BC_JFORI) {
+ | sub PC, RC, #0x20000
+ | it ls
+ | ldrhls RC, [PC, #-2]
+ | bls =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | it ls
+ | subls PC, RC, #0x20000
+ } else {
+ | bls =>BC_JLOOP
+ }
+ | ins_next1
+ | ins_next2
+ | b <3
+ |
+ |.if not FPU
+ |8: // Invert check for negative step.
+ if (vk) {
+ | bl extern __aeabi_dadd
+ | strd CARG1, CARG2, FOR_IDX
+ | strd CARG1, CARG2, FOR_EXT
+ }
+ | mov CARG3, CARG1
+ | mov CARG4, CARG2
+ | ldrd CARG1, CARG2, FOR_STOP
+ | b <6
+ |.endif
+ break;
+
+ case BC_ITERL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IITERL follows.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | // RA = base*8, RC = target
+ | ldrd_iw CARG1, CARG2, RA, BASE
+ if (op == BC_JITERL) {
+ | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil.
+ | it ne
+ | strdne CARG1, CARG2, [RA, #-8]
+ | bne =>BC_JLOOP
+ } else {
+ | add RC, PC, RC, lsl #2
+ | // STALL: load CARG1, CARG2.
+ | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil.
+ | itt ne
+ | subne PC, RC, #0x20000 // Otherwise save control var + branch.
+ | strdne CARG1, CARG2, [RA, #-8]
+ }
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | // RA = base*8, RC = target (loop extent)
+ | // Note: RA/RC is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_ILOOP follows.
+ break;
+
+ case BC_ILOOP:
+ | // RA = base*8, RC = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+ |.if JIT
+ | // RA = base (ignored), RC = traceno
+ | sub RB, DISPATCH, #-DISPATCH_J(trace)
+ | ldr CARG1, [RB]
+ | mov CARG2, #0 // Traces on ARM don't store the trace number, so use 0.
+ | ldr TRACE:RC, [CARG1, RC, lsl #2]
+ | st_vmstate CARG2
+ | ldr RA, TRACE:RC->mcode
+ | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
+ | sub RB, DISPATCH, #-DISPATCH_GL(tmpbuf.L)
+ | str L, [RB]
+ | add RA, RA, #1
+ | bx RA
+ |.endif
+ break;
+
+ case BC_JMP:
+ | // RA = base*8 (only used by trace recorder), RC = target
+ | add RC, PC, RC, lsl #2
+ | sub PC, RC, #0x20000
+ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+ case BC_FUNCF:
+ |.if JIT
+ | hotcall
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
+ break;
+
+ case BC_JFUNCF:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IFUNCF:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
+ | ldr CARG1, L->maxstack
+ | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)]
+ | ldr KBASE, [PC, #-4+PC2PROTO(k)]
+ | cmp RA, CARG1
+ | bhi ->vm_growstack_l
+ if (op != BC_JFUNCF) {
+ | ins_next1
+ | ins_next2
+ }
+ |2:
+ | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters.
+ | mvn CARG4, #~LJ_TNIL
+ | blo >3
+ if (op == BC_JFUNCF) {
+ | decode_RD RC, INS
+ | b =>BC_JLOOP
+ } else {
+ | ins_next3
+ }
+ |
+ |3: // Clear missing parameters.
+ | strd_i CARG3, CARG4, BASE, NARGS8:RC
+ | add NARGS8:RC, NARGS8:RC, #8
+ | b <2
+ break;
+
+ case BC_JFUNCV:
+#if !LJ_HASJIT
+ break;
+#endif
+ | NYI // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
+ | ldr CARG1, L->maxstack
+ | add CARG4, BASE, RC
+ | add RA, RA, RC
+ | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC.
+ | add CARG2, RC, #8+FRAME_VARG
+ | ldr KBASE, [PC, #-4+PC2PROTO(k)]
+ | cmp RA, CARG1
+ | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG.
+ | bhs ->vm_growstack_l
+ | ldrb RB, [PC, #-4+PC2PROTO(numparams)]
+ | mov RA, BASE
+ | mov RC, CARG4
+ | cmp RB, #0
+ | add BASE, CARG4, #8
+ | beq >3
+ | mvn CARG3, #~LJ_TNIL
+ |1:
+ | cmp RA, RC // Less args than parameters?
+ | ite lo
+ | ldrdlo CARG1, CARG2, [RA], #8
+ | movhs CARG2, CARG3
+ | it lo
+ | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC).
+ |2:
+ | subs RB, RB, #1
+ | strd CARG1, CARG2, [CARG4, #8]!
+ | bne <1
+ |3:
+ | ins_next
+ break;
+
+ case BC_FUNCC:
+ case BC_FUNCCW:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
+ if (op == BC_FUNCC) {
+ | ldr CARG4, CFUNC:CARG3->f
+ } else {
+ | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)]
+ }
+ | add CARG2, RA, NARGS8:RC
+ | ldr CARG1, L->maxstack
+ | add RC, BASE, NARGS8:RC
+ | str BASE, L->base
+ | cmp CARG2, CARG1
+ | str RC, L->top
+ if (op == BC_FUNCCW) {
+ | ldr CARG2, CFUNC:CARG3->f
+ }
+ | mv_vmstate CARG3, C
+ | mov CARG1, L
+ | bhi ->vm_growstack_c // Need to grow stack.
+ | st_vmstate CARG3
+ | blx CARG4 // (lua_State *L [, lua_CFunction f])
+ | // Returns nresults.
+ | ldr BASE, L->base
+ | mv_vmstate CARG3, INTERP
+ | ldr CRET2, L->top
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
+ | lsl RC, CRET1, #3
+ | st_vmstate CARG3
+ | ldr PC, [BASE, FRAME_PC]
+ | sub RA, CRET2, RC // RA = L->top - nresults*8
+ | b ->vm_returnc
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+
+ dasm_growpc(Dst, BC__MAX);
+
+ build_subroutines(ctx);
+
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op);
+
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+ int i;
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.long .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.long 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -4\n"
+ "\t.byte 0xe\n" /* Return address is in lr. */
+ "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */
+ "\t.align 2\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.long .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.long .Lframe0\n"
+ "\t.long .Lbegin\n"
+ "\t.long %d\n"
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */
+ fcofs, CFRAME_SIZE);
+ for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));
+#if LJ_ARCH_HASFPU
+ for (i = 15; i >= 8; i--) /* offset d8-d15 */
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n",
+ 64+2*i, 10+2*(15-i));
+ fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */
+#endif
+ fprintf(ctx->fp,
+ "\t.align 2\n"
+ ".LEFDE0:\n\n");
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".LSFDE1:\n"
+ "\t.long .LEFDE1-.LASFDE1\n"
+ ".LASFDE1:\n"
+ "\t.long .Lframe0\n"
+ "\t.long lj_vm_ffi_call\n"
+ "\t.long %d\n"
+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
+ "\t.byte 0x8e\n\t.uleb128 1\n" /* offset lr */
+ "\t.byte 0x8b\n\t.uleb128 2\n" /* offset r11 */
+ "\t.byte 0x85\n\t.uleb128 3\n" /* offset r5 */
+ "\t.byte 0x84\n\t.uleb128 4\n" /* offset r4 */
+ "\t.byte 0xd\n\t.uleb128 0xb\n" /* def_cfa_register r11 */
+ "\t.align 2\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+ break;
+ default:
+ break;
+ }
+}
+