cd7869001f
Signed-off-by: Xu Xingliang <xuxingliang@xiaomi.com>
10331 lines
304 KiB
Diff
10331 lines
304 KiB
Diff
diff --git a/Makefile b/Makefile
|
|
index b0288b4d..f387077c 100644
|
|
--- a/Makefile
|
|
+++ b/Makefile
|
|
@@ -90,7 +90,7 @@ FILE_MAN= luajit.1
|
|
FILE_PC= luajit.pc
|
|
FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
|
|
FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
|
|
- dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
|
|
+ dis_x86.lua dis_x64.lua dis_arm.lua dis_armv7m.lua dis_arm64.lua \
|
|
dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
|
|
dis_mips64.lua dis_mips64el.lua vmdef.lua
|
|
|
|
diff --git a/dynasm/dasm_armv7m.h b/dynasm/dasm_armv7m.h
|
|
new file mode 100644
|
|
index 00000000..8f94ba40
|
|
--- /dev/null
|
|
+++ b/dynasm/dasm_armv7m.h
|
|
@@ -0,0 +1,563 @@
|
|
+/*
|
|
+** DynASM ARM encoding engine.
|
|
+** Copyright (C) 2018 Jernej Turnsek. All rights reserved.
|
|
+** Copyright (C) 2005-2017 Mike Pall. All rights reserved.
|
|
+** Released under the MIT license. See dynasm.lua for full copyright notice.
|
|
+*/
|
|
+
|
|
+#include <stddef.h>
|
|
+#include <stdarg.h>
|
|
+#include <string.h>
|
|
+#include <stdlib.h>
|
|
+
|
|
+#define DASM_ARCH "armv7m"
|
|
+
|
|
+#ifndef DASM_EXTERN
|
|
+#define DASM_EXTERN(a,b,c,d) 0
|
|
+#endif
|
|
+
|
|
+/* Action definitions. */
|
|
+enum {
|
|
+ DASM_STOP,
|
|
+ DASM_SECTION,
|
|
+ DASM_ESC,
|
|
+ DASM_REL_EXT,
|
|
+ /* The following actions need a buffer position. */
|
|
+ DASM_ALIGN,
|
|
+ DASM_REL_LG,
|
|
+ DASM_LABEL_LG,
|
|
+ /* The following actions also have an argument. */
|
|
+ DASM_REL_PC,
|
|
+ DASM_LABEL_PC,
|
|
+ DASM_IMM,
|
|
+ DASM_IMM12,
|
|
+ DASM_IMM16,
|
|
+ DASM_IMML8,
|
|
+ DASM_IMML12,
|
|
+ DASM_IMMV8,
|
|
+ DASM__MAX
|
|
+};
|
|
+
|
|
+/* Maximum number of section buffer positions for a single dasm_put() call. */
|
|
+#define DASM_MAXSECPOS 25
|
|
+
|
|
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
|
|
+#define DASM_S_OK 0x00000000
|
|
+#define DASM_S_NOMEM 0x01000000
|
|
+#define DASM_S_PHASE 0x02000000
|
|
+#define DASM_S_MATCH_SEC 0x03000000
|
|
+#define DASM_S_RANGE_I 0x11000000
|
|
+#define DASM_S_RANGE_SEC 0x12000000
|
|
+#define DASM_S_RANGE_LG 0x13000000
|
|
+#define DASM_S_RANGE_PC 0x14000000
|
|
+#define DASM_S_RANGE_REL 0x15000000
|
|
+#define DASM_S_UNDEF_LG 0x21000000
|
|
+#define DASM_S_UNDEF_PC 0x22000000
|
|
+
|
|
+/* Macros to convert positions (8 bit section + 24 bit index). */
|
|
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
|
|
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
|
|
+#define DASM_SEC2POS(sec) ((sec)<<24)
|
|
+#define DASM_POS2SEC(pos) ((pos)>>24)
|
|
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
|
|
+
|
|
+/* Action list type. */
|
|
+typedef const unsigned int *dasm_ActList;
|
|
+
|
|
+/* Per-section structure. */
|
|
+typedef struct dasm_Section {
|
|
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
|
|
+ int *buf; /* True buffer pointer. */
|
|
+ size_t bsize; /* Buffer size in bytes. */
|
|
+ int pos; /* Biased buffer position. */
|
|
+ int epos; /* End of biased buffer position - max single put. */
|
|
+ int ofs; /* Byte offset into section. */
|
|
+} dasm_Section;
|
|
+
|
|
+/* Core structure holding the DynASM encoding state. */
|
|
+struct dasm_State {
|
|
+ size_t psize; /* Allocated size of this structure. */
|
|
+ dasm_ActList actionlist; /* Current actionlist pointer. */
|
|
+ int *lglabels; /* Local/global chain/pos ptrs. */
|
|
+ size_t lgsize;
|
|
+ int *pclabels; /* PC label chains/pos ptrs. */
|
|
+ size_t pcsize;
|
|
+ void **globals; /* Array of globals (bias -10). */
|
|
+ dasm_Section *section; /* Pointer to active section. */
|
|
+ size_t codesize; /* Total size of all code sections. */
|
|
+ int maxsection; /* 0 <= sectionidx < maxsection. */
|
|
+ int status; /* Status code. */
|
|
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
|
|
+};
|
|
+
|
|
+/* The size of the core structure depends on the max. number of sections. */
|
|
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
|
|
+
|
|
+
|
|
+/* Initialize DynASM state. */
|
|
+void dasm_init(Dst_DECL, int maxsection)
|
|
+{
|
|
+ dasm_State *D;
|
|
+ size_t psz = 0;
|
|
+ int i;
|
|
+ Dst_REF = NULL;
|
|
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
|
|
+ D = Dst_REF;
|
|
+ D->psize = psz;
|
|
+ D->lglabels = NULL;
|
|
+ D->lgsize = 0;
|
|
+ D->pclabels = NULL;
|
|
+ D->pcsize = 0;
|
|
+ D->globals = NULL;
|
|
+ D->maxsection = maxsection;
|
|
+ for (i = 0; i < maxsection; i++) {
|
|
+ D->sections[i].buf = NULL; /* Need this for pass3. */
|
|
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
|
|
+ D->sections[i].bsize = 0;
|
|
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Free DynASM state. */
|
|
+void dasm_free(Dst_DECL)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ int i;
|
|
+ for (i = 0; i < D->maxsection; i++)
|
|
+ if (D->sections[i].buf)
|
|
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
|
|
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
|
|
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
|
|
+ DASM_M_FREE(Dst, D, D->psize);
|
|
+}
|
|
+
|
|
+/* Setup global label array. Must be called before dasm_setup(). */
|
|
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
|
|
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl)*sizeof(int));
|
|
+}
|
|
+
|
|
+/* Grow PC label array. Can be called after dasm_setup(), too. */
|
|
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ size_t osz = D->pcsize;
|
|
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
|
|
+ memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz);
|
|
+}
|
|
+
|
|
+/* Setup encoder. */
|
|
+void dasm_setup(Dst_DECL, const void *actionlist)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ int i;
|
|
+ D->actionlist = (dasm_ActList)actionlist;
|
|
+ D->status = DASM_S_OK;
|
|
+ D->section = &D->sections[0];
|
|
+ memset((void *)D->lglabels, 0, D->lgsize);
|
|
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
|
|
+ for (i = 0; i < D->maxsection; i++) {
|
|
+ D->sections[i].pos = DASM_SEC2POS(i);
|
|
+ D->sections[i].ofs = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+#ifdef DASM_CHECKS
|
|
+#define CK(x, st) \
|
|
+ do { if (!(x)) { \
|
|
+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
|
|
+#define CKPL(kind, st) \
|
|
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
|
|
+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
|
|
+#else
|
|
+#define CK(x, st) ((void)0)
|
|
+#define CKPL(kind, st) ((void)0)
|
|
+#endif
|
|
+
|
|
+static int dasm_imm12(unsigned int n)
|
|
+{
|
|
+ int i;
|
|
+ unsigned int m = n;
|
|
+
|
|
+ if (m <= 255) {
|
|
+ /* i:imm3 = 0000 */
|
|
+ return ((((m) & 0xff) << 16) | (((m) & 0x700) << 20) | (((m) & 0x800) >> 1));
|
|
+ }
|
|
+ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) {
|
|
+ /* i:imm3 = 0001 */
|
|
+ return ((((0x100 | (m & 0xff)) & 0xff) << 16) | (((0x100 | (m & 0xff)) & 0x700) << 20) | (((0x100 | (m & 0xff)) & 0x800) >> 1));
|
|
+ }
|
|
+ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) {
|
|
+ /* i:imm3 = 0010 */
|
|
+ return ((((0x200 | (m >> 8 & 0xff)) & 0xff) << 16) | (((0x200 | (m >> 8 & 0xff)) & 0x700) << 20) | (((0x200 | (m >> 8 & 0xff)) & 0x800) >> 1));
|
|
+ }
|
|
+ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) {
|
|
+ /* i:imm3 = 0011 */
|
|
+ return ((((0x300 | (m & 0xff)) & 0xff) << 16) | (((0x300 | (m & 0xff)) & 0x700) << 20) | (((0x300 | (m & 0xff)) & 0x800) >> 1));
|
|
+ }
|
|
+ else {
|
|
+ for (i = 0; i < 4096; i += 128, m = ((m << 1) | (m >> (-(unsigned int)(1)&(8*sizeof(m) - 1))))) {
|
|
+ if (m <= 255) {
|
|
+ if ((m & 0x80) && (i >= 128 * 8))
|
|
+ return ((((i | (m & 0x7f)) & 0xff) << 16) | (((i | (m & 0x7f)) & 0x700) << 20) | (((i | (m & 0x7f)) & 0x800) >> 1));
|
|
+ else
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if (n < 4096) {
|
|
+ return -2; /* Used for additional encoding of add/sub TODO: better solution! */
|
|
+ }
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
|
|
+void dasm_put(Dst_DECL, int start, ...)
|
|
+{
|
|
+ va_list ap;
|
|
+ dasm_State *D = Dst_REF;
|
|
+ dasm_ActList p = D->actionlist + start;
|
|
+ dasm_Section *sec = D->section;
|
|
+ int pos = sec->pos, ofs = sec->ofs;
|
|
+ int *b;
|
|
+
|
|
+ if (pos >= sec->epos) {
|
|
+ DASM_M_GROW(Dst,
|
|
+ int,
|
|
+ sec->buf,
|
|
+ sec->bsize,
|
|
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
|
|
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
|
|
+ sec->epos = (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos);
|
|
+ }
|
|
+
|
|
+ b = sec->rbuf;
|
|
+ b[pos++] = start;
|
|
+
|
|
+ va_start(ap, start);
|
|
+ while (1) {
|
|
+ unsigned int ins = *p++;
|
|
+ unsigned int action = (ins >> 16);
|
|
+ if (action >= DASM__MAX) {
|
|
+ ofs += 4;
|
|
+ }
|
|
+ else {
|
|
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
|
|
+ switch (action) {
|
|
+ case DASM_STOP: goto stop;
|
|
+ case DASM_SECTION:
|
|
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
|
|
+ D->section = &D->sections[n]; goto stop;
|
|
+ case DASM_ESC: p++; ofs += 4; break;
|
|
+ case DASM_REL_EXT: break;
|
|
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
|
|
+ case DASM_REL_LG:
|
|
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
|
|
+ /* Bkwd rel or global. */
|
|
+ if (n >= 0) { CK(n >= 10 || *pl < 0, RANGE_LG); CKPL(lg, LG); goto putrel; }
|
|
+ pl += 10; n = *pl;
|
|
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
|
|
+ goto linkrel;
|
|
+ case DASM_REL_PC:
|
|
+ pl = D->pclabels + n; CKPL(pc, PC);
|
|
+putrel:
|
|
+ n = *pl;
|
|
+ if (n < 0) {
|
|
+ /* Label exists. Get label pos and store it. */
|
|
+ b[pos] = -n;
|
|
+ }
|
|
+ else {
|
|
+linkrel:
|
|
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
|
|
+ *pl = pos;
|
|
+ }
|
|
+ pos++;
|
|
+ break;
|
|
+ case DASM_LABEL_LG:
|
|
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
|
|
+ case DASM_LABEL_PC:
|
|
+ pl = D->pclabels + n; CKPL(pc, PC);
|
|
+putlabel:
|
|
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
|
|
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; }
|
|
+ *pl = -pos; /* Label exists now. */
|
|
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
|
|
+ break;
|
|
+ case DASM_IMM:
|
|
+ case DASM_IMM16:
|
|
+#ifdef DASM_CHECKS
|
|
+ CK((n & ((1 << ((ins >> 10) & 31)) - 1)) == 0, RANGE_I);
|
|
+ if ((ins & 0x8000))
|
|
+ CK(((n + (1 << (((ins >> 5) & 31) - 1))) >> ((ins >> 5) & 31)) == 0, RANGE_I);
|
|
+ else
|
|
+ CK((n >> ((ins >> 5) & 31)) == 0, RANGE_I);
|
|
+#endif
|
|
+ b[pos++] = n;
|
|
+ break;
|
|
+ case DASM_IMMV8:
|
|
+ CK((n & 3) == 0, RANGE_I);
|
|
+ n >>= 2;
|
|
+ /* fallthrough */
|
|
+ case DASM_IMML8:
|
|
+ case DASM_IMML12:
|
|
+ CK(n >= 0 ? ((n >> ((ins >> 5) & 31)) == 0) :
|
|
+ (((-n) >> ((ins >> 5) & 31)) == 0),
|
|
+ RANGE_I);
|
|
+ b[pos++] = n;
|
|
+ break;
|
|
+ case DASM_IMM12:
|
|
+ CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
|
|
+ b[pos++] = n;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+stop:
|
|
+ va_end(ap);
|
|
+ sec->pos = pos;
|
|
+ sec->ofs = ofs;
|
|
+}
|
|
+#undef CK
|
|
+
|
|
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
|
|
+int dasm_link(Dst_DECL, size_t *szp)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ int secnum;
|
|
+ int ofs = 0;
|
|
+
|
|
+#ifdef DASM_CHECKS
|
|
+ *szp = 0;
|
|
+ if (D->status != DASM_S_OK) return D->status;
|
|
+ {
|
|
+ int pc;
|
|
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
|
|
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC | pc;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ {
|
|
+ /* Handle globals not defined in this translation unit. */
|
|
+ int idx;
|
|
+ for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
|
|
+ int n = D->lglabels[idx];
|
|
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
|
|
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Combine all code sections. No support for data sections (yet). */
|
|
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
|
|
+ dasm_Section *sec = D->sections + secnum;
|
|
+ int *b = sec->rbuf;
|
|
+ int pos = DASM_SEC2POS(secnum);
|
|
+ int lastpos = sec->pos;
|
|
+
|
|
+ while (pos != lastpos) {
|
|
+ dasm_ActList p = D->actionlist + b[pos++];
|
|
+ while (1) {
|
|
+ unsigned int ins = *p++;
|
|
+ unsigned int action = (ins >> 16);
|
|
+ switch (action) {
|
|
+ case DASM_STOP: case DASM_SECTION: goto stop;
|
|
+ case DASM_ESC: p++; break;
|
|
+ case DASM_REL_EXT: break;
|
|
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
|
|
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
|
|
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
|
|
+ case DASM_IMM: case DASM_IMM12: case DASM_IMM16:
|
|
+ case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break;
|
|
+ }
|
|
+ }
|
|
+stop: (void)0;
|
|
+ }
|
|
+ ofs += sec->ofs; /* Next section starts right after current section. */
|
|
+ }
|
|
+
|
|
+ D->codesize = ofs; /* Total size of all code sections */
|
|
+ *szp = ofs;
|
|
+ return DASM_S_OK;
|
|
+}
|
|
+
|
|
+#ifdef DASM_CHECKS
|
|
+#define CK(x, st) \
|
|
+ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
|
|
+#else
|
|
+#define CK(x, st) ((void)0)
|
|
+#endif
|
|
+
|
|
+/* Pass 3: Encode sections. */
|
|
+int dasm_encode(Dst_DECL, void *buffer)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ char *base = (char *)buffer;
|
|
+ unsigned int *cp = (unsigned int *)buffer;
|
|
+ int secnum;
|
|
+
|
|
+ /* Encode all code sections. No support for data sections (yet). */
|
|
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
|
|
+ dasm_Section *sec = D->sections + secnum;
|
|
+ int *b = sec->buf;
|
|
+ int *endb = sec->rbuf + sec->pos;
|
|
+
|
|
+ while (b != endb) {
|
|
+ dasm_ActList p = D->actionlist + *b++;
|
|
+ while (1) {
|
|
+ unsigned int ins = *p++;
|
|
+ unsigned int action = (ins >> 16);
|
|
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
|
|
+ switch (action) {
|
|
+ case DASM_STOP:
|
|
+ case DASM_SECTION:
|
|
+ goto stop;
|
|
+ case DASM_ESC:
|
|
+ //*cp++ = *p++; //jturnsek: do I need to swap this also?
|
|
+ *cp++ = ((*p >> 16) & 0x0000ffff) | ((*p << 16) & 0xffff0000); /* jturnsek: swap of half-words!!! */
|
|
+ p++;
|
|
+ break;
|
|
+ case DASM_REL_EXT:
|
|
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), !(ins & 2048));
|
|
+ goto patchrel;
|
|
+ case DASM_ALIGN:
|
|
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x8000f3af; /* jturnsek: NOP.W */
|
|
+ break;
|
|
+ case DASM_REL_LG:
|
|
+ CK(n >= 0, UNDEF_LG);
|
|
+ /* fallthrough */
|
|
+ case DASM_REL_PC:
|
|
+ CK(n >= 0, UNDEF_PC);
|
|
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base);
|
|
+patchrel:
|
|
+ if ((ins & 0x800) == 0) {
|
|
+ /* jturnsek: B or BL */
|
|
+ if (cp[-1] & 0x10000000) {
|
|
+ /* BL */
|
|
+ CK((n & 1) == 0 && ((n + 0x01000000) >> 25) == 0, RANGE_REL);
|
|
+ cp[-1] |= ((((n & 0x1000000) >> 24) & 0x1) << 10) |
|
|
+ (((~((n & 0x800000) >> 23) & 0x1) ^ (((n & 0x1000000) >> 24) & 0x1)) << 29) |
|
|
+ (((~((n & 0x400000) >> 22) & 0x1) ^ (((n & 0x1000000) >> 24) & 0x1)) << 27) |
|
|
+ ((n >> 12) & 0x3ff) |
|
|
+ (((n >> 1) & 0x7ff) << 16);
|
|
+ }
|
|
+ else {
|
|
+ /* B (T3) */
|
|
+ CK((n & 1) == 0 && ((n + 0x00100000) >> 21) == 0, RANGE_REL);
|
|
+ cp[-1] |= ((((n & 0x100000) >> 20) & 0x1) << 10) |
|
|
+ ((((n & 0x80000) >> 19) & 0x1) << 27) |
|
|
+ ((((n & 0x40000) >> 18) & 0x1) << 29) |
|
|
+ ((n >> 12) & 0x3f) |
|
|
+ (((n >> 1) & 0x7ff) << 16);
|
|
+ }
|
|
+ }
|
|
+ else if ((ins & 0x1000)) {
|
|
+ CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL);
|
|
+ goto patchimml8;
|
|
+ }
|
|
+ else if ((ins & 0x2000) == 0) {
|
|
+ CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL);
|
|
+ goto patchimml;
|
|
+ }
|
|
+ else {
|
|
+ CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL);
|
|
+ n >>= 2;
|
|
+ goto patchimmv;
|
|
+ }
|
|
+ break;
|
|
+ case DASM_LABEL_LG:
|
|
+ ins &= 2047; if (ins >= 20) D->globals[ins - 10] = (void *)(base + n);
|
|
+ break;
|
|
+ case DASM_LABEL_PC:
|
|
+ break;
|
|
+ case DASM_IMM:
|
|
+ if (((ins >> 5) & 31) == 2) {
|
|
+ /* 2 bit shift for load/store lsl */
|
|
+ cp[-1] |= ((n & 0x3) << 20);
|
|
+ }
|
|
+ else {
|
|
+ /* 5 bit shift */
|
|
+ cp[-1] |= ((n & 0x3) << 22) | ((n & 0x1c) << 26);
|
|
+ }
|
|
+ //cp[-1] |= ((n >> ((ins >> 10) & 31)) & ((1 << ((ins >> 5) & 31)) - 1)) << (ins & 31);
|
|
+ break;
|
|
+ case DASM_IMM12:
|
|
+ if (dasm_imm12((unsigned int)n) == -2) {
|
|
+ cp[-1] ^= 0x00000300;
|
|
+ cp[-1] &= ~0x00000010;
|
|
+ cp[-1] |= ((((n) & 0xff) << 16) | (((n) & 0x700) << 20) | (((n) & 0x800) >> 1));
|
|
+ }
|
|
+ else {
|
|
+ cp[-1] |= dasm_imm12((unsigned int)n);
|
|
+ }
|
|
+ break;
|
|
+ case DASM_IMM16:
|
|
+ cp[-1] |= ((n & 0xf000) >> 12) |
|
|
+ ((n & 0x0800) >> 1) |
|
|
+ ((n & 0x0700) << 20) |
|
|
+ ((n & 0x00ff) << 16);
|
|
+ break;
|
|
+ case DASM_IMML8:
|
|
+patchimml8:
|
|
+ cp[-1] |= n >= 0 ? (0x02000000 | ((n & 0xff) << 16)) : ((-n & 0xff) << 16);
|
|
+ break;
|
|
+ case DASM_IMML12:
|
|
+patchimml:
|
|
+ cp[-1] |= n >= 0 ? (0x00000080 | ((n & 0xfff) << 16)) : ((-n & 0xfff) << 16);
|
|
+ if (((cp[-1] & 0x0000000f) != 0x0000000f) && (n < 0)) {
|
|
+ CK(-255 <= n && n < 0, RANGE_I);
|
|
+ cp[-1] &= ~0x03000000;
|
|
+ cp[-1] |= 0x0c000000;
|
|
+ }
|
|
+ break;
|
|
+ case DASM_IMMV8:
|
|
+patchimmv:
|
|
+ cp[-1] |= n >= 0 ? (0x00000080 | ((n & 0xff) << 16)) : ((-n & 0xff) << 16);
|
|
+ break;
|
|
+ default:
|
|
+ *cp++ = ((ins >> 16) & 0x0000ffff) | ((ins << 16) & 0xffff0000); /* jturnsek: swap of half-words!!! */
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+stop: (void)0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
|
|
+ return DASM_S_PHASE;
|
|
+ return DASM_S_OK;
|
|
+}
|
|
+#undef CK
|
|
+
|
|
+/* Get PC label offset. */
|
|
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ if (pc*sizeof(int) < D->pcsize) {
|
|
+ int pos = D->pclabels[pc];
|
|
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
|
|
+ if (pos > 0) return -1; /* Undefined. */
|
|
+ }
|
|
+ return -2; /* Unused or out of range. */
|
|
+}
|
|
+
|
|
+#ifdef DASM_CHECKS
|
|
+/* Optional sanity checker to call between isolated encoding steps. */
|
|
+int dasm_checkstep(Dst_DECL, int secmatch)
|
|
+{
|
|
+ dasm_State *D = Dst_REF;
|
|
+ if (D->status == DASM_S_OK) {
|
|
+ int i;
|
|
+ for (i = 1; i <= 9; i++) {
|
|
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG | i; break; }
|
|
+ D->lglabels[i] = 0;
|
|
+ }
|
|
+ }
|
|
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
|
|
+ D->section != &D->sections[secmatch])
|
|
+ D->status = DASM_S_MATCH_SEC | (D->section - D->sections);
|
|
+ return D->status;
|
|
+}
|
|
+#endif
|
|
+
|
|
diff --git a/dynasm/dasm_armv7m.lua b/dynasm/dasm_armv7m.lua
|
|
new file mode 100644
|
|
index 00000000..8e877d26
|
|
--- /dev/null
|
|
+++ b/dynasm/dasm_armv7m.lua
|
|
@@ -0,0 +1,1010 @@
|
|
+------------------------------------------------------------------------------
|
|
+-- DynASM ARMV7M module.
|
|
+--
|
|
+-- Copyright (C) 2018 Jernej Turnsek. All rights reserved.
|
|
+-- Copyright (C) 2005-2017 Mike Pall. All rights reserved.
|
|
+-- See dynasm.lua for full copyright notice.
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Module information:
|
|
+local _info = {
|
|
+ arch = "armv7m",
|
|
+ description = "DynASM ARMV7M module",
|
|
+ version = "1.4.0",
|
|
+ vernum = 10400,
|
|
+ release = "2018-12-07",
|
|
+ author = "Jernej Turnsek",
|
|
+ license = "MIT",
|
|
+}
|
|
+
|
|
+-- Exported glue functions for the arch-specific module.
|
|
+local _M = { _info = _info }
|
|
+
|
|
+-- Cache library functions.
|
|
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
|
|
+local assert, setmetatable, rawget = assert, setmetatable, rawget
|
|
+local _s = string
|
|
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
|
|
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
|
|
+local concat, sort, insert = table.concat, table.sort, table.insert
|
|
+local bit = bit or require("bit")
|
|
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
|
|
+local ror, tohex = bit.ror, bit.tohex
|
|
+local bxor = bit.bxor
|
|
+
|
|
+-- Inherited tables and callbacks.
|
|
+local g_opt, g_arch
|
|
+local wline, werror, wfatal, wwarn
|
|
+
|
|
+-- Action name list.
|
|
+-- CHECK: Keep this in sync with the C code!
|
|
+local action_names = {
|
|
+ "STOP", "SECTION", "ESC", "REL_EXT",
|
|
+ "ALIGN", "REL_LG", "LABEL_LG",
|
|
+ "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8",
|
|
+}
|
|
+
|
|
+-- Maximum number of section buffer positions for dasm_put().
|
|
+-- CHECK: Keep this in sync with the C code!
|
|
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
|
|
+
|
|
+-- Action name -> action number.
|
|
+local map_action = {}
|
|
+for n,name in ipairs(action_names) do
|
|
+ map_action[name] = n-1
|
|
+end
|
|
+
|
|
+-- Action list buffer.
|
|
+local actlist = {}
|
|
+
|
|
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
|
|
+local actargs = { 0 }
|
|
+
|
|
+-- Current number of section buffer positions for dasm_put().
|
|
+local secpos = 1
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Dump action names and numbers.
|
|
+local function dumpactions(out)
|
|
+ out:write("DynASM encoding engine action codes:\n")
|
|
+ for n,name in ipairs(action_names) do
|
|
+ local num = map_action[name]
|
|
+ out:write(format(" %-10s %02X %d\n", name, num, num))
|
|
+ end
|
|
+ out:write("\n")
|
|
+end
|
|
+
|
|
+-- Write action list buffer as a huge static C array.
|
|
+local function writeactions(out, name)
|
|
+ local nn = #actlist
|
|
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
|
|
+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
|
|
+ for i = 1,nn-1 do
|
|
+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
|
|
+ end
|
|
+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Add word to action list.
|
|
+local function wputxw(n)
|
|
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
|
|
+ actlist[#actlist+1] = n
|
|
+end
|
|
+
|
|
+-- Add action to list with optional arg. Advance buffer pos, too.
|
|
+local function waction(action, val, a, num)
|
|
+ local w = assert(map_action[action], "bad action name `"..action.."'")
|
|
+ wputxw(w * 0x10000 + (val or 0))
|
|
+ if a then actargs[#actargs+1] = a end
|
|
+ if a or num then secpos = secpos + (num or 1) end
|
|
+end
|
|
+
|
|
+-- Flush action list (intervening C code or buffer pos overflow).
|
|
+local function wflush(term)
|
|
+ if #actlist == actargs[1] then return end -- Nothing to flush.
|
|
+ if not term then waction("STOP") end -- Terminate action list.
|
|
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
|
|
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
|
|
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
|
|
+end
|
|
+
|
|
+-- Put escaped word.
|
|
+local function wputw(n)
|
|
+ if n <= 0x000fffff then waction("ESC") end
|
|
+ wputxw(n)
|
|
+end
|
|
+
|
|
+-- Reserve position for word.
|
|
+local function wpos()
|
|
+ local pos = #actlist+1
|
|
+ actlist[pos] = ""
|
|
+ return pos
|
|
+end
|
|
+
|
|
+-- Store word to reserved position.
|
|
+local function wputpos(pos, n)
|
|
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
|
|
+ if n <= 0x000fffff then
|
|
+ insert(actlist, pos+1, n)
|
|
+ n = map_action.ESC * 0x10000
|
|
+ end
|
|
+ actlist[pos] = n
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Global label name -> global label number. With auto assignment on 1st use.
|
|
+local next_global = 20
|
|
+local map_global = setmetatable({}, { __index = function(t, name)
|
|
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
|
|
+ local n = next_global
|
|
+ if n > 2047 then werror("too many global labels") end
|
|
+ next_global = n + 1
|
|
+ t[name] = n
|
|
+ return n
|
|
+end})
|
|
+
|
|
+-- Dump global labels.
|
|
+local function dumpglobals(out, lvl)
|
|
+ local t = {}
|
|
+ for name, n in pairs(map_global) do t[n] = name end
|
|
+ out:write("Global labels:\n")
|
|
+ for i=20,next_global-1 do
|
|
+ out:write(format(" %s\n", t[i]))
|
|
+ end
|
|
+ out:write("\n")
|
|
+end
|
|
+
|
|
+-- Write global label enum.
|
|
+local function writeglobals(out, prefix)
|
|
+ local t = {}
|
|
+ for name, n in pairs(map_global) do t[n] = name end
|
|
+ out:write("enum {\n")
|
|
+ for i=20,next_global-1 do
|
|
+ out:write(" ", prefix, t[i], ",\n")
|
|
+ end
|
|
+ out:write(" ", prefix, "_MAX\n};\n")
|
|
+end
|
|
+
|
|
+-- Write global label names.
|
|
+local function writeglobalnames(out, name)
|
|
+ local t = {}
|
|
+ for name, n in pairs(map_global) do t[n] = name end
|
|
+ out:write("static const char *const ", name, "[] = {\n")
|
|
+ for i=20,next_global-1 do
|
|
+ out:write(" \"", t[i], "\",\n")
|
|
+ end
|
|
+ out:write(" (const char *)0\n};\n")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Extern label name -> extern label number. With auto assignment on 1st use.
|
|
+local next_extern = 0
|
|
+local map_extern_ = {}
|
|
+local map_extern = setmetatable({}, { __index = function(t, name)
|
|
+ -- No restrictions on the name for now.
|
|
+ local n = next_extern
|
|
+ if n > 2047 then werror("too many extern labels") end
|
|
+ next_extern = n + 1
|
|
+ t[name] = n
|
|
+ map_extern_[n] = name
|
|
+ return n
|
|
+end})
|
|
+
|
|
+-- Dump extern labels.
|
|
+local function dumpexterns(out, lvl)
|
|
+ out:write("Extern labels:\n")
|
|
+ for i=0,next_extern-1 do
|
|
+ out:write(format(" %s\n", map_extern_[i]))
|
|
+ end
|
|
+ out:write("\n")
|
|
+end
|
|
+
|
|
+-- Write extern label names.
|
|
+local function writeexternnames(out, name)
|
|
+ out:write("static const char *const ", name, "[] = {\n")
|
|
+ for i=0,next_extern-1 do
|
|
+ out:write(" \"", map_extern_[i], "\",\n")
|
|
+ end
|
|
+ out:write(" (const char *)0\n};\n")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Arch-specific maps.
|
|
+
|
|
+-- Ext. register name -> int. name.
|
|
+local map_archdef = { sp = "r13", lr = "r14", pc = "r15", }
|
|
+
|
|
+-- Int. register name -> ext. name.
|
|
+local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", }
|
|
+
|
|
+local map_type = {} -- Type name -> { ctype, reg }
|
|
+local ctypenum = 0 -- Type number (for Dt... macros).
|
|
+
|
|
+-- Reverse defines for registers.
|
|
+function _M.revdef(s)
|
|
+ return map_reg_rev[s] or s
|
|
+end
|
|
+
|
|
+local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, }
|
|
+
|
|
+local map_cond = {
|
|
+ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
|
|
+ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
|
|
+ hs = 2, lo = 3,
|
|
+}
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Template strings for ARM instructions.
|
|
+-- jturnsek: dasm_encode will do the swap of half-words!!!
|
|
+local map_op = {
|
|
+ and_3 = "ea000000DNPs",
|
|
+ eor_3 = "ea800000DNPs",
|
|
+ sub_3 = "eba00000DNPs",
|
|
+ rsb_3 = "ebc00000DNPs",
|
|
+ add_3 = "eb000000DNPs",
|
|
+ sbc_3 = "eb600000DNPs",
|
|
+ tst_2 = "ea100f00NP",
|
|
+ cmp_2 = "ebb00f00NP",
|
|
+ cmn_2 = "eb100f00NP",
|
|
+ orr_3 = "ea400000DNPs",
|
|
+ mov_2 = "ea4f0000DPs",
|
|
+ bic_3 = "ea200000DNPs",
|
|
+ mvn_2 = "ea6f0000DPs",
|
|
+
|
|
+ and_4 = "ea000000DNMps",
|
|
+ eor_4 = "ea800000DNMps",
|
|
+ sub_4 = "eba00000DNMps",
|
|
+ rsb_4 = "ebc00000DNMps",
|
|
+ add_4 = "eb000000DNMps",
|
|
+ sbc_4 = "eb600000DNMps",
|
|
+ tst_3 = "ea100f00NMp",
|
|
+ cmp_3 = "ebb00f00NMp",
|
|
+ cmn_3 = "eb100f00NMp",
|
|
+ orr_4 = "ea400000DNMps",
|
|
+ mov_3 = "ea4f0000DMps",
|
|
+ bic_4 = "ea200000DNMps",
|
|
+ mvn_3 = "ea6f0000DMps",
|
|
+
|
|
+ lsl_3 = "ea400000DNws",
|
|
+ lsr_3 = "ea400010DNws",
|
|
+ asr_3 = "ea400020DNws",
|
|
+ ror_3 = "ea400030DNws",
|
|
+
|
|
+ smull_4 = "fb800000SDNM",
|
|
+
|
|
+ clz_2 = "fab0f080Da", -- a is used for Consistent(M)
|
|
+ rbit_2 = "fa90f0a0Da", -- a is used for Consistent(M)
|
|
+
|
|
+ str_2 = "f8400000SL", str_3 = "f8400000SL", str_4 = "f8400000SL",
|
|
+ strb_2 = "f8000000SL", strb_3 = "f8000000SL", strb_4 = "f8000000SL",
|
|
+ ldr_2 = "f8500000SL", ldr_3 = "f8500000SL", ldr_4 = "f8500000SL",
|
|
+ ldrb_2 = "f8100000SL", ldrb_3 = "f8100000SL", ldrb_4 = "f8100000SL",
|
|
+ strh_2 = "f8200000SL", strh_3 = "f8200000SL",
|
|
+ ldrh_2 = "f8300000SL", ldrh_3 = "f8300000SL",
|
|
+ ldrd_3 = "e8500000SDL", ldrd_4 = "e8500000SDL",
|
|
+ strd_3 = "e8400000SDL", strd_4 = "e8400000SDL",
|
|
+
|
|
+ ldm_2 = "e8900000oR",
|
|
+ pop_1 = "e8bd0000R",
|
|
+ push_1 = "e92d0000R",
|
|
+
|
|
+ b_1 = "f0009000B",
|
|
+ bl_1 = "f000d000B",
|
|
+ bx_1 = "bf004700C",
|
|
+ blx_1 = "bf004780C",
|
|
+
|
|
+ nop_0 = "f3af8000",
|
|
+ bkpt_1 = "bf00be00K",
|
|
+
|
|
+ ["vadd.f64_3"] = "ee300b00Gdnm",
|
|
+ ["vsub.f64_3"] = "ee300b40Gdnm",
|
|
+ ["vmul.f64_3"] = "ee200b00Gdnm",
|
|
+ ["vdiv.f64_3"] = "ee800b00Gdnm",
|
|
+ ["vcmp.f64_2"] = "eeb40b40Gdm",
|
|
+ ["vcvt.f64.s32_2"] = "eeb80bc0GdFm",
|
|
+ ["vsqrt.f64_2"] = "eeb10bc0Gdm",
|
|
+
|
|
+ vldr_2 = "ed100a00dl|ed100b00Gdl",
|
|
+ vstr_2 = "ed000a00dl|ed000b00Gdl",
|
|
+ vldm_2 = "ec900a00or",
|
|
+ vpop_1 = "ecbd0a00r",
|
|
+ vstmdb_2 = "ed000a00or",
|
|
+ vpush_1 = "ed2d0a00r",
|
|
+
|
|
+ ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY",
|
|
+ vmov_2 = "ee100a10Sn|ee000a10nS",
|
|
+ vmov_3 = "ec500a10SNm|ec400a10mSN|ec500b10GSNm|ec400b10GmSN",
|
|
+ vmrs_0 = "eef1fa10",
|
|
+
|
|
+ it_1 = "bf00bf08c",
|
|
+ ite_1 = "bf00bf04c",
|
|
+ itt_1 = "bf00bf04c",
|
|
+ ittt_1 = "bf00bf02c",
|
|
+ itttt_1 = "bf00bf01c",
|
|
+ iteee_1 = "bf00bf01c",
|
|
+}
|
|
+
|
|
+-- Add mnemonics for "s" variants.
|
|
+do
|
|
+ local t = {}
|
|
+ for k,v in pairs(map_op) do
|
|
+ if sub(v, -1) == "s" then
|
|
+ local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2)
|
|
+ t[sub(k, 1, -3).."s"..sub(k, -2)] = v2
|
|
+ end
|
|
+ end
|
|
+ for k,v in pairs(t) do
|
|
+ map_op[k] = v
|
|
+ end
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+local function parse_gpr(expr)
|
|
+ local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$")
|
|
+ local tp = map_type[tname or expr]
|
|
+ if tp then
|
|
+ local reg = ovreg or tp.reg
|
|
+ if not reg then
|
|
+ werror("type `"..(tname or expr).."' needs a register override")
|
|
+ end
|
|
+ expr = reg
|
|
+ end
|
|
+ local r = match(expr, "^r(1?[0-9])$")
|
|
+ if r then
|
|
+ r = tonumber(r)
|
|
+ if r <= 15 then return r, tp end
|
|
+ end
|
|
+ werror("bad register name `"..expr.."'")
|
|
+end
|
|
+
|
|
+local function parse_gpr_pm(expr)
|
|
+ local pm, expr2 = match(expr, "^([+-]?)(.*)$")
|
|
+ return parse_gpr(expr2), (pm == "-")
|
|
+end
|
|
+
|
|
+local function parse_vr(expr, tp)
|
|
+ local t, r = match(expr, "^([sd])([0-9]+)$")
|
|
+ if t == tp then
|
|
+ r = tonumber(r)
|
|
+ if r <= 31 then
|
|
+ if t == "s" then return shr(r, 1), band(r, 1) end
|
|
+ return band(r, 15), shr(r, 4)
|
|
+ end
|
|
+ end
|
|
+ werror("bad register name `"..expr.."'")
|
|
+end
|
|
+
|
|
+local function parse_reglist(reglist)
|
|
+ reglist = match(reglist, "^{%s*([^}]*)}$")
|
|
+ if not reglist then werror("register list expected") end
|
|
+ local rr = 0
|
|
+ for p in gmatch(reglist..",", "%s*([^,]*),") do
|
|
+ local rbit = shl(1, parse_gpr(gsub(p, "%s+$", "")))
|
|
+ if band(rr, rbit) ~= 0 then
|
|
+ werror("duplicate register `"..p.."'")
|
|
+ end
|
|
+ rr = rr + rbit
|
|
+ end
|
|
+ return rr
|
|
+end
|
|
+
|
|
+local function parse_vrlist(reglist)
|
|
+ local ta, ra, tb, rb = match(reglist,
|
|
+ "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$")
|
|
+ ra, rb = tonumber(ra), tonumber(rb)
|
|
+ if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then
|
|
+ local nr = rb + 1 - ra
|
|
+ if ta == "s" then
|
|
+ return shl(shr(ra, 1), 12) + shl(band(ra, 1), 22) + nr
|
|
+ else
|
|
+ return shl(band(ra, 15), 12) + shl(shr(ra, 4), 22) + nr * 2 + 0x100
|
|
+ end
|
|
+ end
|
|
+ werror("register list expected")
|
|
+end
|
|
+
|
|
+local function parse_imm(imm, bits, shift, scale, signed)
|
|
+ imm = match(imm, "^#(.*)$")
|
|
+ if not imm then werror("expected immediate operand") end
|
|
+ local n = tonumber(imm)
|
|
+ if n then
|
|
+ local m = sar(n, scale)
|
|
+ if shl(m, scale) == n then
|
|
+ if signed then
|
|
+ local s = sar(m, bits-1)
|
|
+ if s == 0 then return shl(m, shift)
|
|
+ elseif s == -1 then return shl(m + shl(1, bits), shift) end
|
|
+ else
|
|
+ if sar(m, bits) == 0 then return shl(m, shift) end
|
|
+ end
|
|
+ end
|
|
+ werror("out of range immediate `"..imm.."'")
|
|
+ else
|
|
+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
|
|
+ return 0
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_imm12(imm)
|
|
+ local n = tonumber(imm)
|
|
+ if n then
|
|
+ if n <= 255 then
|
|
+ return band(n, 0xff)
|
|
+ elseif band(n, 0xff00ff00) == 0 and band(shr(n, 16), 0xff) == band(n, 0xff) then
|
|
+ return band(n, 0xff) + shl(1, 12)
|
|
+ elseif band(n, 0x00ff00ff) == 0 and band(shr(n, 16), 0xff00) == band(n, 0xff00) then
|
|
+ return band(shr(n, 8), 0xff) + shl(2, 12)
|
|
+ elseif band(shr(n, 24), 0xff) == band(n, 0xff) and
|
|
+ band(shr(n, 16), 0xff) == band(n, 0xff) and
|
|
+ band(shr(n, 8), 0xff) == band(n, 0xff) then
|
|
+ return band(n, 0xff) + shl(3, 12)
|
|
+ else
|
|
+ for i=31, 8, -1 do
|
|
+ n = ror(n, 1)
|
|
+ if n >= 128 and n <= 255 then
|
|
+ return shl(band(i, 0x10), 22) + shl(band(i, 0x0e), 11) + shl(band(i, 0x01), 7) + band(n, 0x7f)
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ werror("out of range immediate `"..imm.."'")
|
|
+ else
|
|
+ waction("IMM12", 0, imm)
|
|
+ return 0
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_imm16(imm)
|
|
+ imm = match(imm, "^#(.*)$")
|
|
+ if not imm then werror("expected immediate operand") end
|
|
+ local n = tonumber(imm)
|
|
+ if n then
|
|
+ if shr(n, 16) == 0 then
|
|
+ return band(n, 0x00ff) + shl(band(n, 0x0700), 4) + shl(band(n, 0x0800), 15) + shl(band(n, 0xf000), 4)
|
|
+ end
|
|
+ werror("out of range immediate `"..imm.."'")
|
|
+ else
|
|
+ waction("IMM16", 32*16, imm)
|
|
+ return 0
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_imm_load(imm, ext, flags)
|
|
+ local n = tonumber(imm)
|
|
+ local p, w = match(flags, "P"), match(flags, "W")
|
|
+ if n then
|
|
+ if ext then
|
|
+ if n >= -1020 and n <= 1020 then
|
|
+ local up = 0x00800000
|
|
+ if n < 0 then n = -n; up = 0 end
|
|
+ return n/4 + up + (p and 0x01000000 or 0) + (w and 0x00200000 or 0)
|
|
+ end
|
|
+ else
|
|
+ if w then
|
|
+ if n >= -255 and n <= 255 then
|
|
+ if n >= 0 then
|
|
+ return n + 0x00000a00 + (p and 0x00000400 or 0) + (w and 0x00000100 or 0)
|
|
+ else
|
|
+ return -n + 0x00000800 + (p and 0x00000400 or 0) + (w and 0x00000100 or 0)
|
|
+ end
|
|
+ end
|
|
+ else
|
|
+ if n >= 0 and n <= 4095 then
|
|
+ return n + 0x00800000
|
|
+ elseif n >= -255 and n < 0 then
|
|
+ return -n + 0x00000800 + (p and 0x00000400 or 0)
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ werror("out of range immediate `"..imm.."'")
|
|
+ else
|
|
+ waction(ext and "IMMV8" or "IMML12", 32768 + (ext and 32*8 or 32*12), imm)
|
|
+ local pw = 0
|
|
+ if p then pw = (ext and 0x01000000 or 0) end
|
|
+ if w then pw = (ext and 0x00200000 or 0) end
|
|
+ return pw
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_shift(shift)
|
|
+ if shift == "rrx" then
|
|
+ return 3 * 16
|
|
+ else
|
|
+ local s, s2 = match(shift, "^(%S+)%s*(.*)$")
|
|
+ s = map_shift[s]
|
|
+ if not s then werror("expected shift operand") end
|
|
+ if sub(s2, 1, 1) == "#" then
|
|
+ local imm = parse_imm(s2, 5, 0, 0, false)
|
|
+ return shl(band(imm, 0x1c), 10) + shl(band(imm, 0x03), 6) + shl(s, 4)
|
|
+ else
|
|
+ werror("expected immediate shift operand")
|
|
+ end
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_shift_load(shift)
|
|
+ if not match(shift, "lsl", 1) then
|
|
+ werror("expected lsl shift operand")
|
|
+ else
|
|
+ local s, s2 = match(shift, "^(%S+)%s*(.*)$")
|
|
+ if not s then werror("expected shift operand") end
|
|
+ if sub(s2, 1, 1) == "#" then
|
|
+ return parse_imm(s2, 2, 4, 0, false)
|
|
+ else
|
|
+ werror("expected immediate shift operand")
|
|
+ end
|
|
+ end
|
|
+end
|
|
+
|
|
+local function parse_label(label, def)
|
|
+ local prefix = sub(label, 1, 2)
|
|
+ -- =>label (pc label reference)
|
|
+ if prefix == "=>" then
|
|
+ return "PC", 0, sub(label, 3)
|
|
+ end
|
|
+ -- ->name (global label reference)
|
|
+ if prefix == "->" then
|
|
+ return "LG", map_global[sub(label, 3)]
|
|
+ end
|
|
+ if def then
|
|
+ -- [1-9] (local label definition)
|
|
+ if match(label, "^[1-9]$") then
|
|
+ return "LG", 10+tonumber(label)
|
|
+ end
|
|
+ else
|
|
+ -- [<>][1-9] (local label reference)
|
|
+ local dir, lnum = match(label, "^([<>])([1-9])$")
|
|
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
|
|
+ return "LG", lnum + (dir == ">" and 0 or 10)
|
|
+ end
|
|
+ -- extern label (extern label reference)
|
|
+ local extname = match(label, "^extern%s+(%S+)$")
|
|
+ if extname then
|
|
+ return "EXT", map_extern[extname]
|
|
+ end
|
|
+ end
|
|
+ werror("bad label `"..label.."'")
|
|
+end
|
|
+
|
|
+local function parse_load(params, nparams, n, op)
|
|
+ local ext = (band(op, 0x10000000) == 0)
|
|
+ local pn = params[n]
|
|
+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
|
|
+ local p2 = params[n+1]
|
|
+ if not p1 then
|
|
+ if not p2 then
|
|
+ if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then
|
|
+ local mode, n, s = parse_label(pn, false)
|
|
+ waction("REL_"..mode, n + (ext and 0x2800 or 0x0800), s, 1)
|
|
+ return op + 15 * 65536 + (ext and 0x01000000 or 0) --set P if ext==true
|
|
+ end
|
|
+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
|
|
+ if reg and tailr ~= "" then
|
|
+ local d, tp = parse_gpr(reg)
|
|
+ if tp then
|
|
+ waction(ext and "IMMV8" or "IMML12", 32768 + (ext and 32*8 or 32*12),
|
|
+ format(tp.ctypefmt, tailr))
|
|
+ return op + shl(d, 16) + (ext and 0x01000000 or 0) --set P if ext==true, using imm12 if ext==false
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ werror("expected address operand")
|
|
+ end
|
|
+ if p2 then
|
|
+ if wb == "!" then werror("bad use of '!'") end
|
|
+ local p3 = params[n+2]
|
|
+ op = op + shl(parse_gpr(p1), 16)
|
|
+ local imm = match(p2, "^#(.*)$")
|
|
+ if imm then
|
|
+ if p3 then werror("too many parameters") end
|
|
+ op = op + parse_imm_load(imm, ext, "W") --always imm8, set W
|
|
+ else
|
|
+ if ext then werror("not in ARMV7M") end
|
|
+ op = op + parse_gpr(p2)
|
|
+ if p3 then op = op + parse_shift_load(p3) end
|
|
+ end
|
|
+ else
|
|
+ local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$")
|
|
+ op = op + shl(parse_gpr(p1a), 16)
|
|
+ if p2 ~= "" then
|
|
+ local imm = match(p2, "^,%s*#(.*)$")
|
|
+ if imm then
|
|
+ op = op + parse_imm_load(imm, ext, (wb == "!" and "PW" or "P")) --set P (and W)
|
|
+ else
|
|
+ local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$")
|
|
+ if ext then werror("not in ARMV7M") end
|
|
+ op = op + parse_gpr(p2a)
|
|
+ if p3 ~= "" then
|
|
+ op = op + parse_shift_load(p3)
|
|
+ end
|
|
+ end
|
|
+ else
|
|
+ if wb == "!" then werror("bad use of '!'") end
|
|
+ op = op + (ext and 0x01000000 or 0) + 0x00800000 --no imm, thus using imm12 if ext==false, set U
|
|
+ end
|
|
+ end
|
|
+ return op
|
|
+end
|
|
+
|
|
+local function parse_vload(q)
|
|
+ local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$")
|
|
+ if reg then
|
|
+ local d = shl(parse_gpr(reg), 16)
|
|
+ if imm == "" then return d end
|
|
+ imm = match(imm, "^,%s*#(.*)$")
|
|
+ if imm then
|
|
+ local n = tonumber(imm)
|
|
+ if n then
|
|
+ if n >= -1020 and n <= 1020 and n%4 == 0 then
|
|
+ return d + (n >= 0 and n/4+0x00800000 or -n/4)
|
|
+ end
|
|
+ werror("out of range immediate `"..imm.."'")
|
|
+ else
|
|
+ waction("IMMV8", 32768 + 32*8, imm)
|
|
+ return d
|
|
+ end
|
|
+ end
|
|
+ else
|
|
+ if match(q, "^[<>=%-]") or match(q, "^extern%s+") then
|
|
+ local mode, n, s = parse_label(q, false)
|
|
+ waction("REL_"..mode, n + 0x2800, s, 1)
|
|
+ return 15 * 65536
|
|
+ end
|
|
+ local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$")
|
|
+ if reg and tailr ~= "" then
|
|
+ local d, tp = parse_gpr(reg)
|
|
+ if tp then
|
|
+ waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr))
|
|
+ return shl(d, 16)
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ werror("expected address operand")
|
|
+end
|
|
+
|
|
+local function parse_it(name, cond)
|
|
+ local mask, it = 0, match(name, "it", 1)
|
|
+ if not it then
|
|
+ werror("not IT instruction")
|
|
+ end
|
|
+ local it2 = sub(name, 3, -1)
|
|
+ if not it2 then
|
|
+ return shl(map_cond[cond], 4)
|
|
+ end
|
|
+ local shift = 3
|
|
+ for p in gmatch(it2, "[te]") do
|
|
+ if p == "t" then
|
|
+ mask = mask + shl(band(map_cond[cond], 1), shift)
|
|
+ elseif p == "e" then
|
|
+ mask = mask + shl(band(bxor(map_cond[cond], 15), 1), shift)
|
|
+ else
|
|
+ werror("wrong syntax")
|
|
+ end
|
|
+ if shift ~= 0 then shift = shift - 1 end
|
|
+ end
|
|
+ return shl(map_cond[cond], 4) + mask
|
|
+end
|
|
+
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Handle opcodes defined with template strings.
|
|
+local function parse_template(params, template, nparams, pos)
|
|
+ local op = tonumber(sub(template, 1, 8), 16)
|
|
+ local n = 1
|
|
+ local vr = "s"
|
|
+
|
|
+ -- Process each character.
|
|
+ for p in gmatch(sub(template, 9), ".") do
|
|
+ local q = params[n]
|
|
+ if p == "D" then
|
|
+ op = op + shl(parse_gpr(q), 8); n = n + 1
|
|
+ elseif p == "N" then
|
|
+ op = op + shl(parse_gpr(q), 16); n = n + 1
|
|
+ elseif p == "S" then
|
|
+ op = op + shl(parse_gpr(q), 12); n = n + 1
|
|
+ elseif p == "M" then
|
|
+ op = op + parse_gpr(q); n = n + 1
|
|
+ elseif p == "a" then
|
|
+ local m = parse_gpr(q)
|
|
+ op = op + m + shl(m, 16); n = n + 1
|
|
+ elseif p == "d" then
|
|
+ local r,h = parse_vr(q, vr); op = op + shl(r, 12) + shl(h, 22); n = n + 1
|
|
+ elseif p == "n" then
|
|
+ local r,h = parse_vr(q, vr); op = op + shl(r, 16) + shl(h, 7); n = n + 1
|
|
+ elseif p == "m" then
|
|
+ local r,h = parse_vr(q, vr); op = op + r + shl(h, 5); n = n + 1
|
|
+ elseif p == "P" then
|
|
+ local imm = match(q, "^#(.*)$")
|
|
+ if imm then
|
|
+ op = op + 0x6000000 + parse_imm12(imm)
|
|
+ else
|
|
+ op = op + parse_gpr(q)
|
|
+ end
|
|
+ n = n + 1
|
|
+ elseif p == "p" then
|
|
+ op = op + parse_shift(q); n = n + 1
|
|
+ elseif p == "L" then
|
|
+ op = parse_load(params, nparams, n, op)
|
|
+ elseif p == "l" then
|
|
+ op = op + parse_vload(q)
|
|
+ elseif p == "B" then
|
|
+ local mode, n, s = parse_label(q, false)
|
|
+ waction("REL_"..mode, n, s, 1)
|
|
+ elseif p == "C" then -- blx gpr only
|
|
+ if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then
|
|
+ local r = parse_gpr(q)
|
|
+ op = op + shl(r, 3)
|
|
+ else
|
|
+ werror("not supported")
|
|
+ end
|
|
+ elseif p == "c" then
|
|
+ op = op + parse_it(params.op, q)
|
|
+ elseif p == "F" then
|
|
+ vr = "s"
|
|
+ elseif p == "G" then
|
|
+ vr = "d"
|
|
+ elseif p == "o" then
|
|
+ local r, wb = match(q, "^([^!]*)(!?)$")
|
|
+ op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0)
|
|
+ n = n + 1
|
|
+ elseif p == "R" then
|
|
+ if params[1] == "{r15}" and params.op == "pop" then
|
|
+ op = 0xf85dfb04; -- pop {pc} coded as T3
|
|
+ elseif params[1] == "{r12}" and params.op == "pop" then
|
|
+ op = 0xf85dcb04; -- pop {r12} coded as T3
|
|
+ elseif params[1] == "{r12}" and params.op == "push" then
|
|
+ op = 0xf84dcd04; -- push {r12} coded as T3
|
|
+ else
|
|
+ op = op + parse_reglist(q)
|
|
+ end
|
|
+ n = n + 1
|
|
+ elseif p == "r" then
|
|
+ op = op + parse_vrlist(q); n = n + 1
|
|
+ elseif p == "w" then
|
|
+ local imm = match(q, "^#(.*)$")
|
|
+ if imm then
|
|
+ local imm5 = parse_imm(q, 5, 0, 0, false)
|
|
+ local m = band(op, 0x000f0000)
|
|
+ op = op - m + 0x000f0000 + shr(m, 16) + shl(band(imm5, 0x1c), 10) + shl(band(imm5, 0x03), 6); n = n + 1
|
|
+ else
|
|
+ local type = band(op, 0x00000030)
|
|
+ op = op - 0xea400000 + 0xfa00f000 - type + shl(type, 17) + parse_gpr(q)
|
|
+ end
|
|
+ elseif p == "Y" then
|
|
+ local imm = tonumber(match(q, "^#(.*)$")); n = n + 1
|
|
+ if not imm or shr(imm, 8) ~= 0 then
|
|
+ werror("bad immediate operand")
|
|
+ end
|
|
+ op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f)
|
|
+ elseif p == "K" then
|
|
+ local imm = tonumber(match(q, "^#(.*)$")); n = n + 1
|
|
+ if not imm or shr(imm, 8) ~= 0 then
|
|
+ werror("bad immediate operand")
|
|
+ end
|
|
+ op = op + band(imm, 0x00ff)
|
|
+ elseif p == "s" then
|
|
+ -- Ignored.
|
|
+ else
|
|
+ assert(false)
|
|
+ end
|
|
+ end
|
|
+ wputpos(pos, op)
|
|
+end
|
|
+
|
|
+map_op[".template__"] = function(params, template, nparams)
|
|
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
|
|
+
|
|
+ -- Limit number of section buffer positions used by a single dasm_put().
|
|
+ -- A single opcode needs a maximum of 3 positions.
|
|
+ if secpos+3 > maxsecpos then wflush() end
|
|
+ local pos = wpos()
|
|
+ local lpos, apos, spos = #actlist, #actargs, secpos
|
|
+
|
|
+ local ok, err
|
|
+ for t in gmatch(template, "[^|]+") do
|
|
+ ok, err = pcall(parse_template, params, t, nparams, pos)
|
|
+ if ok then return end
|
|
+ secpos = spos
|
|
+ actlist[lpos+1] = nil
|
|
+ actlist[lpos+2] = nil
|
|
+ actlist[lpos+3] = nil
|
|
+ actargs[apos+1] = nil
|
|
+ actargs[apos+2] = nil
|
|
+ actargs[apos+3] = nil
|
|
+ end
|
|
+ error(err, 0)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
|
|
+map_op[".actionlist_1"] = function(params)
|
|
+ if not params then return "cvar" end
|
|
+ local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
+ wline(function(out) writeactions(out, name) end)
|
|
+end
|
|
+
|
|
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
|
|
+map_op[".globals_1"] = function(params)
|
|
+ if not params then return "prefix" end
|
|
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
|
|
+ wline(function(out) writeglobals(out, prefix) end)
|
|
+end
|
|
+
|
|
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
|
|
+map_op[".globalnames_1"] = function(params)
|
|
+ if not params then return "cvar" end
|
|
+ local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
+ wline(function(out) writeglobalnames(out, name) end)
|
|
+end
|
|
+
|
|
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
|
|
+map_op[".externnames_1"] = function(params)
|
|
+ if not params then return "cvar" end
|
|
+ local name = params[1] -- No syntax check. You get to keep the pieces.
|
|
+ wline(function(out) writeexternnames(out, name) end)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Label pseudo-opcode (converted from trailing colon form).
|
|
+map_op[".label_1"] = function(params)
|
|
+ if not params then return "[1-9] | ->global | =>pcexpr" end
|
|
+ if secpos+1 > maxsecpos then wflush() end
|
|
+ local mode, n, s = parse_label(params[1], true)
|
|
+ if mode == "EXT" then werror("bad label definition") end
|
|
+ waction("LABEL_"..mode, n, s, 1)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Pseudo-opcodes for data storage.
|
|
+map_op[".long_*"] = function(params)
|
|
+ if not params then return "imm..." end
|
|
+ for _,p in ipairs(params) do
|
|
+ local n = tonumber(p)
|
|
+ if not n then werror("bad immediate `"..p.."'") end
|
|
+ if n < 0 then n = n + 2^32 end
|
|
+ wputw(n)
|
|
+ if secpos+2 > maxsecpos then wflush() end
|
|
+ end
|
|
+end
|
|
+
|
|
+-- Alignment pseudo-opcode.
|
|
+map_op[".align_1"] = function(params)
|
|
+ if not params then return "numpow2" end
|
|
+ if secpos+1 > maxsecpos then wflush() end
|
|
+ local align = tonumber(params[1])
|
|
+ if align then
|
|
+ local x = align
|
|
+ -- Must be a power of 2 in the range (2 ... 256).
|
|
+ for i=1,8 do
|
|
+ x = x / 2
|
|
+ if x == 1 then
|
|
+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
|
|
+ return
|
|
+ end
|
|
+ end
|
|
+ end
|
|
+ werror("bad alignment")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
|
|
+map_op[".type_3"] = function(params, nparams)
|
|
+ if not params then
|
|
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
|
|
+ end
|
|
+ local name, ctype, reg = params[1], params[2], params[3]
|
|
+ if not match(name, "^[%a_][%w_]*$") then
|
|
+ werror("bad type name `"..name.."'")
|
|
+ end
|
|
+ local tp = map_type[name]
|
|
+ if tp then
|
|
+ werror("duplicate type `"..name.."'")
|
|
+ end
|
|
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
|
|
+ map_archdef["#"..name] = "sizeof("..ctype..")"
|
|
+ -- Add new type and emit shortcut define.
|
|
+ local num = ctypenum + 1
|
|
+ map_type[name] = {
|
|
+ ctype = ctype,
|
|
+ ctypefmt = format("Dt%X(%%s)", num),
|
|
+ reg = reg,
|
|
+ }
|
|
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
|
|
+ ctypenum = num
|
|
+end
|
|
+map_op[".type_2"] = map_op[".type_3"]
|
|
+
|
|
+-- Dump type definitions.
|
|
+local function dumptypes(out, lvl)
|
|
+ local t = {}
|
|
+ for name in pairs(map_type) do t[#t+1] = name end
|
|
+ sort(t)
|
|
+ out:write("Type definitions:\n")
|
|
+ for _,name in ipairs(t) do
|
|
+ local tp = map_type[name]
|
|
+ local reg = tp.reg or ""
|
|
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
|
|
+ end
|
|
+ out:write("\n")
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Set the current section.
|
|
+function _M.section(num)
|
|
+ waction("SECTION", num)
|
|
+ wflush(true) -- SECTION is a terminal action.
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Dump architecture description.
|
|
+function _M.dumparch(out)
|
|
+ out:write(format("DynASM %s version %s, released %s\n\n",
|
|
+ _info.arch, _info.version, _info.release))
|
|
+ dumpactions(out)
|
|
+end
|
|
+
|
|
+-- Dump all user defined elements.
|
|
+function _M.dumpdef(out, lvl)
|
|
+ dumptypes(out, lvl)
|
|
+ dumpglobals(out, lvl)
|
|
+ dumpexterns(out, lvl)
|
|
+end
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
+-- Pass callbacks from/to the DynASM core.
|
|
+function _M.passcb(wl, we, wf, ww)
|
|
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
|
|
+ return wflush
|
|
+end
|
|
+
|
|
+-- Setup the arch-specific module.
|
|
+function _M.setup(arch, opt)
|
|
+ g_arch, g_opt = arch, opt
|
|
+end
|
|
+
|
|
+local map_cond_b = {
|
|
+ eq = "f0008000B", ne = "f0408000B", cs = "f0808000B", cc = "f0c08000B", mi = "f1008000B", pl = "f1408000B", vs = "f1808000B", vc = "f1c08000B",
|
|
+ hi = "f2008000B", ls = "f2408000B", ge = "f2808000B", lt = "f2c08000B", gt = "f3008000B", le = "f3408000B", al = "f3808000B",
|
|
+ hs = "f0808000B", lo = "f0c08000B",
|
|
+}
|
|
+
|
|
+-- Merge the core maps and the arch-specific maps.
|
|
+function _M.mergemaps(map_coreop, map_def)
|
|
+ setmetatable(map_op, { __index = function(t, k)
|
|
+ local v = map_coreop[k]
|
|
+ if v then return v end
|
|
+ local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$")
|
|
+ local cv = map_cond[cc]
|
|
+ if cv then
|
|
+ local v = rawget(t, k1..k2)
|
|
+ if type(v) == "string" and k1 == "b" then
|
|
+ local scv = map_cond_b[cc]
|
|
+ return scv
|
|
+ elseif type(v) == "string" then
|
|
+ return v
|
|
+ end
|
|
+ end
|
|
+ end })
|
|
+ setmetatable(map_def, { __index = map_archdef })
|
|
+ return map_op, map_def
|
|
+end
|
|
+
|
|
+return _M
|
|
+
|
|
+------------------------------------------------------------------------------
|
|
+
|
|
diff --git a/src/Makefile b/src/Makefile
|
|
old mode 100644
|
|
new mode 100755
|
|
index 30d64be2..83d592f3
|
|
--- a/src/Makefile
|
|
+++ b/src/Makefile
|
|
@@ -36,7 +36,8 @@ CC= $(DEFAULT_CC)
|
|
# to slow down the C part by not omitting it. Debugging, tracebacks and
|
|
# unwinding are not affected -- the assembler part has frame unwind
|
|
# information and GCC emits it where needed (x64) or with -g (see CCDEBUG).
|
|
-CCOPT= -O2 -fomit-frame-pointer
|
|
+#CCOPT= -O2 -fomit-frame-pointer
|
|
+CCOPT= -O2 -fomit-frame-pointer -D__ARM_ARCH_7M__ -DLUAJIT_NO_UNWIND -DLUAJIT_DISABLE_PROFILE
|
|
# Use this if you want to generate a smaller binary (but it's slower):
|
|
#CCOPT= -Os -fomit-frame-pointer
|
|
# Note: it's no longer recommended to use -O3 with GCC 4.x.
|
|
@@ -49,7 +50,7 @@ CCOPT= -O2 -fomit-frame-pointer
|
|
#
|
|
CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
|
|
CCOPT_x64=
|
|
-CCOPT_arm=
|
|
+CCOPT_arm= -mthumb -march=armv7e-m -mfloat-abi=hard -mfpu=fpv5-d16
|
|
CCOPT_arm64=
|
|
CCOPT_ppc=
|
|
CCOPT_mips=
|
|
@@ -71,7 +72,7 @@ CCWARN= -Wall
|
|
# as dynamic mode.
|
|
#
|
|
# Mixed mode creates a static + dynamic library and a statically linked luajit.
|
|
-BUILDMODE= mixed
|
|
+BUILDMODE= static
|
|
#
|
|
# Static mode creates a static library and a statically linked luajit.
|
|
#BUILDMODE= static
|
|
@@ -242,6 +243,9 @@ ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH)))
|
|
TARGET_LJARCH= x86
|
|
else
|
|
ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
|
|
+ ifneq (,$(findstring __ARM_ARCH_7M__ ,$(TARGET_TESTARCH)))
|
|
+ TARGET_ARCH= -D__ARM_ARCH_7M__=1
|
|
+ endif
|
|
TARGET_LJARCH= arm
|
|
else
|
|
ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
|
|
@@ -443,6 +447,9 @@ ifeq (x64,$(TARGET_LJARCH))
|
|
endif
|
|
else
|
|
ifeq (arm,$(TARGET_LJARCH))
|
|
+ ifneq (,$(findstring __ARM_ARCH_7M__ ,$(TARGET_TESTARCH)))
|
|
+ DASM_ARCH= armv7m
|
|
+ endif
|
|
ifeq (iOS,$(TARGET_SYS))
|
|
DASM_AFLAGS+= -D IOS
|
|
endif
|
|
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
|
|
index 9ee47ada..ca0ee47e 100644
|
|
--- a/src/host/buildvm.c
|
|
+++ b/src/host/buildvm.c
|
|
@@ -60,7 +60,11 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
|
|
#if LJ_TARGET_X86ORX64
|
|
#include "../dynasm/dasm_x86.h"
|
|
#elif LJ_TARGET_ARM
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+#include "../dynasm/dasm_armv7m.h"
|
|
+#else
|
|
#include "../dynasm/dasm_arm.h"
|
|
+#endif
|
|
#elif LJ_TARGET_ARM64
|
|
#include "../dynasm/dasm_arm64.h"
|
|
#elif LJ_TARGET_PPC
|
|
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
|
|
index 7baa011f..1fc72a9d 100644
|
|
--- a/src/host/buildvm_asm.c
|
|
+++ b/src/host/buildvm_asm.c
|
|
@@ -114,6 +114,20 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
|
|
emit_asm_words(ctx, p, n-4);
|
|
ins = *(uint32_t *)(p+n-4);
|
|
#if LJ_TARGET_ARM
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+ if ((ins & 0xd000f800) == 0xd000f000) {
|
|
+ fprintf(ctx->fp, "\tbl %s\n", sym);
|
|
+ }
|
|
+ else if ((ins & 0xd000f800) == 0x9000f000) {
|
|
+ fprintf(ctx->fp, "\tb %s\n", sym);
|
|
+ }
|
|
+ else {
|
|
+ fprintf(stderr,
|
|
+ "Error: unsupported opcode %08x for %s symbol relocation.\n",
|
|
+ ins, sym);
|
|
+ exit(1);
|
|
+ }
|
|
+#else
|
|
if ((ins & 0xff000000u) == 0xfa000000u) {
|
|
fprintf(ctx->fp, "\tblx %s\n", sym);
|
|
} else if ((ins & 0x0e000000u) == 0x0a000000u) {
|
|
@@ -125,6 +139,7 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
|
|
ins, sym);
|
|
exit(1);
|
|
}
|
|
+#endif
|
|
#elif LJ_TARGET_ARM64
|
|
if ((ins >> 26) == 0x25u) {
|
|
fprintf(ctx->fp, "\tbl %s\n", sym);
|
|
@@ -193,6 +208,16 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc
|
|
break;
|
|
}
|
|
#endif
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+ fprintf(ctx->fp,
|
|
+ "\n\t.globl %s\n"
|
|
+ "\t.thumb_func\n"
|
|
+ "\t.hidden %s\n"
|
|
+ "\t.type %s, " ELFASM_PX "%s\n"
|
|
+ "\t.size %s, %d\n"
|
|
+ "%s:\n",
|
|
+ name,name,name,isfunc ? "function" : "object",name,size,name);
|
|
+#else
|
|
fprintf(ctx->fp,
|
|
"\n\t.globl %s\n"
|
|
"\t.hidden %s\n"
|
|
@@ -200,6 +225,7 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc
|
|
"\t.size %s, %d\n"
|
|
"%s:\n",
|
|
name, name, name, isfunc ? "function" : "object", name, size, name);
|
|
+#endif
|
|
break;
|
|
case BUILD_coffasm:
|
|
fprintf(ctx->fp, "\n\t.globl %s\n", name);
|
|
@@ -242,8 +268,16 @@ void emit_asm(BuildCtx *ctx)
|
|
int i, rel;
|
|
|
|
fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+ fprintf(ctx->fp, "\t.syntax unified\n");
|
|
+ fprintf(ctx->fp, "\t.thumb\n");
|
|
+#endif
|
|
fprintf(ctx->fp, "\t.text\n");
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+ emit_asm_align(ctx, 2);
|
|
+#else
|
|
emit_asm_align(ctx, 4);
|
|
+#endif
|
|
|
|
#if LJ_TARGET_PS3
|
|
emit_asm_label(ctx, ctx->beginsym, ctx->codesz, 0);
|
|
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
|
|
index 20e60493..4cadd673 100644
|
|
--- a/src/lj_alloc.c
|
|
+++ b/src/lj_alloc.c
|
|
@@ -38,10 +38,17 @@
|
|
#define MAX_SIZE_T (~(size_t)0)
|
|
#define MALLOC_ALIGNMENT ((size_t)8U)
|
|
|
|
+#if LJ_TARGET_NUTTX
|
|
+#define DEFAULT_GRANULARITY ((size_t)32U * (size_t)1024U)
|
|
+#define DEFAULT_TRIM_THRESHOLD ((size_t)512U * (size_t)1024U)
|
|
+#define DEFAULT_MMAP_THRESHOLD ((size_t)32U * (size_t)1024U)
|
|
+#define MAX_RELEASE_CHECK_RATE 255
|
|
+#else
|
|
#define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U)
|
|
#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
|
|
#define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U)
|
|
#define MAX_RELEASE_CHECK_RATE 255
|
|
+#endif
|
|
|
|
/* ------------------- size_t and alignment properties -------------------- */
|
|
|
|
@@ -90,9 +97,36 @@
|
|
|
|
#include <errno.h>
|
|
/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
|
|
+#if LJ_TARGET_NUTTX
|
|
+#include <nuttx/config.h>
|
|
+#include <nuttx/mm/mm.h>
|
|
+
|
|
+static struct mm_heap_s *g_alloc_heap;
|
|
+
|
|
+static void init_mmap(void)
|
|
+{
|
|
+ static uint8_t buffer[CONFIG_LUAJIT_ALLOC_SIZE]
|
|
+ locate_data(CONFIG_LUAJIT_ALLOC_SECTION_NAME);
|
|
+ g_alloc_heap = mm_initialize("alloc",
|
|
+ (void *)buffer,
|
|
+ CONFIG_LUAJIT_ALLOC_SIZE);
|
|
+}
|
|
+#define INIT_MMAP() init_mmap()
|
|
+
|
|
+#define CALL_MMAP(prng, size) mm_malloc(g_alloc_heap, (size_t)size)
|
|
+#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s)
|
|
+
|
|
+static int CALL_MUNMAP(void *ptr, size_t size)
|
|
+{
|
|
+ if (ptr == NULL) return -1;
|
|
+ mm_free(g_alloc_heap, ptr);
|
|
+ return 0;
|
|
+}
|
|
+#else
|
|
#include <sys/mman.h>
|
|
|
|
#define LJ_ALLOC_MMAP 1
|
|
+#endif
|
|
|
|
#if LJ_64
|
|
|
|
diff --git a/src/lj_arch.h b/src/lj_arch.h
|
|
index 882c99cb..92fbae5e 100644
|
|
--- a/src/lj_arch.h
|
|
+++ b/src/lj_arch.h
|
|
@@ -39,6 +39,7 @@
|
|
#define LUAJIT_OS_OSX 3
|
|
#define LUAJIT_OS_BSD 4
|
|
#define LUAJIT_OS_POSIX 5
|
|
+#define LUAJIT_OS_NUTTX 6
|
|
|
|
/* Number mode. */
|
|
#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
|
|
@@ -110,6 +111,8 @@
|
|
#define LJ_OS_NAME "BSD"
|
|
#elif LUAJIT_OS == LUAJIT_OS_POSIX
|
|
#define LJ_OS_NAME "POSIX"
|
|
+#elif LUAJIT_OS == LUAJIT_OS_NUTTX
|
|
+#define LJ_OS_NAME "NUTTX"
|
|
#else
|
|
#define LJ_OS_NAME "Other"
|
|
#endif
|
|
@@ -119,6 +122,7 @@
|
|
#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
|
|
#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD)
|
|
#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
|
|
+#define LJ_TARGET_NUTTX (LUAJIT_OS == LUAJIT_OS_NUTTX)
|
|
#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
|
|
|
|
#if TARGET_OS_IPHONE
|
|
@@ -214,8 +218,11 @@
|
|
#endif
|
|
|
|
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
|
|
-
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+#define LJ_ARCH_NAME "armv7m"
|
|
+#else
|
|
#define LJ_ARCH_NAME "arm"
|
|
+#endif
|
|
#define LJ_ARCH_BITS 32
|
|
#define LJ_ARCH_ENDIAN LUAJIT_LE
|
|
#if !defined(LJ_ARCH_HASFPU) && __SOFTFP__
|
|
@@ -227,8 +234,12 @@
|
|
#define LJ_ABI_EABI 1
|
|
#define LJ_TARGET_ARM 1
|
|
#define LJ_TARGET_EHRETREG 0
|
|
-#define LJ_TARGET_EHRAREG 14
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+/* No need to test jump address range, because we use veeners. */
|
|
+//#define LJ_TARGET_JUMPRANGE 24 /* +-2^24 = +-16MB */
|
|
+#else
|
|
#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
|
|
+#endif
|
|
#define LJ_TARGET_MASKSHIFT 0
|
|
#define LJ_TARGET_MASKROT 1
|
|
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
|
|
@@ -236,7 +247,7 @@
|
|
|
|
#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
|
|
#define LJ_ARCH_VERSION 80
|
|
-#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
|
|
+#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
|
#define LJ_ARCH_VERSION 70
|
|
#elif __ARM_ARCH_6T2__
|
|
#define LJ_ARCH_VERSION 61
|
|
@@ -479,7 +490,7 @@
|
|
#if defined(__ARMEB__)
|
|
#error "No support for big-endian ARM"
|
|
#endif
|
|
-#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__
|
|
+#if __ARM_ARCH_6M__ /*|| __ARM_ARCH_7M__ || __ARM_ARCH_7EM__*/
|
|
#error "No support for Cortex-M CPUs"
|
|
#endif
|
|
#if !(__ARM_EABI__ || LJ_TARGET_IOS)
|
|
diff --git a/src/lj_asm.c b/src/lj_asm.c
|
|
index 6f5e0c45..429aa161 100644
|
|
--- a/src/lj_asm.c
|
|
+++ b/src/lj_asm.c
|
|
@@ -178,7 +178,11 @@ IRFLDEF(FLOFS)
|
|
#if LJ_TARGET_X86ORX64
|
|
#include "lj_emit_x86.h"
|
|
#elif LJ_TARGET_ARM
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+#include "lj_emit_armv7m.h"
|
|
+#else
|
|
#include "lj_emit_arm.h"
|
|
+#endif
|
|
#elif LJ_TARGET_ARM64
|
|
#include "lj_emit_arm64.h"
|
|
#elif LJ_TARGET_PPC
|
|
@@ -1655,7 +1659,11 @@ static void asm_loop(ASMState *as)
|
|
#if LJ_TARGET_X86ORX64
|
|
#include "lj_asm_x86.h"
|
|
#elif LJ_TARGET_ARM
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+#include "lj_asm_armv7m.h"
|
|
+#else
|
|
#include "lj_asm_arm.h"
|
|
+#endif
|
|
#elif LJ_TARGET_ARM64
|
|
#include "lj_asm_arm64.h"
|
|
#elif LJ_TARGET_PPC
|
|
diff --git a/src/lj_asm_armv7m.h b/src/lj_asm_armv7m.h
|
|
new file mode 100644
|
|
index 00000000..1bdd4a8a
|
|
--- /dev/null
|
|
+++ b/src/lj_asm_armv7m.h
|
|
@@ -0,0 +1,2520 @@
|
|
+/*
|
|
+** ARM IR assembler (SSA IR -> machine code).
|
|
+** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h
|
|
+** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h
|
|
+*/
|
|
+
|
|
+/* -- Register allocator extensions --------------------------------------- */
|
|
+
|
|
+/* Allocate a register with a hint. */
|
|
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
|
|
+{
|
|
+ Reg r = IR(ref)->r;
|
|
+ if (ra_noreg(r)) {
|
|
+ if (!ra_hashint(r) && !iscrossref(as, ref))
|
|
+ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
|
|
+ r = ra_allocref(as, ref, allow);
|
|
+ }
|
|
+ ra_noweak(as, r);
|
|
+ return r;
|
|
+}
|
|
+
|
|
+/* Allocate a scratch register pair. */
|
|
+static Reg ra_scratchpair(ASMState *as, RegSet allow)
|
|
+{
|
|
+ RegSet pick1 = as->freeset & allow;
|
|
+ RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN;
|
|
+ Reg r;
|
|
+ if (pick2) {
|
|
+ r = rset_picktop(pick2);
|
|
+ }
|
|
+ else {
|
|
+ RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN;
|
|
+ if (pick) {
|
|
+ r = rset_picktop(pick);
|
|
+ ra_restore(as, regcost_ref(as->cost[r + 1]));
|
|
+ }
|
|
+ else {
|
|
+ pick = pick1 & (allow << 1) & RSET_GPRODD;
|
|
+ if (pick) {
|
|
+ r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick) - 1]));
|
|
+ }
|
|
+ else {
|
|
+ r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN);
|
|
+ ra_restore(as, regcost_ref(as->cost[r + 1]));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r);
|
|
+ ra_modified(as, r);
|
|
+ ra_modified(as, r + 1);
|
|
+ RA_DBGX((as, "scratchpair $r $r", r, r + 1));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+#if !LJ_SOFTFP
|
|
+/* Allocate two source registers for three-operand instructions. */
|
|
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
|
|
+{
|
|
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
|
|
+ Reg left = irl->r, right = irr->r;
|
|
+ if (ra_hasreg(left)) {
|
|
+ ra_noweak(as, left);
|
|
+ if (ra_noreg(right))
|
|
+ right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
|
|
+ else
|
|
+ ra_noweak(as, right);
|
|
+ }
|
|
+ else if (ra_hasreg(right)) {
|
|
+ ra_noweak(as, right);
|
|
+ left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
|
|
+ }
|
|
+ else if (ra_hashint(right)) {
|
|
+ right = ra_allocref(as, ir->op2, allow);
|
|
+ left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
|
|
+ }
|
|
+ else {
|
|
+ left = ra_allocref(as, ir->op1, allow);
|
|
+ right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
|
|
+ }
|
|
+ return left | (right << 8);
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* -- Guard handling ------------------------------------------------------ */
|
|
+
|
|
+/* Generate an exit stub group at the bottom of the reserved MCode memory. */
|
|
+static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
|
|
+{
|
|
+ MCode *mxp = as->mcbot;
|
|
+ int i;
|
|
+ if (mxp + 8 * 4 + 4*EXITSTUBS_PER_GROUP >= as->mctop)
|
|
+ asm_mclimit(as);
|
|
+ /* lj_vm_exit_interp_veneer */
|
|
+ *mxp++ = ARMI_LDR | ARMF_N(RID_PC) | ARMF_T(RID_PC); /* LDR.W PC, [PC, #-0] */
|
|
+ *mxp++ = (MCode)lj_vm_exit_interp;
|
|
+ /* lj_vm_exit_handler_veneer */
|
|
+ *mxp++ = ARMI_LDR | ARMF_N(RID_PC) | ARMF_T(RID_PC); /* LDR.W PC, [PC, #-0] */
|
|
+ *mxp++ = (MCode)lj_vm_exit_handler;
|
|
+ /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */
|
|
+ *mxp++ = ARMI_STR | ARMI_LSX_U | ARMF_T(RID_LR) | ARMF_N(RID_SP);
|
|
+ *mxp = ARMI_BL | ARMC_BL((-4) << 1); /* lj_vm_exit_handler_veneer */
|
|
+ mxp++;
|
|
+ *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */
|
|
+ *mxp++ = group*EXITSTUBS_PER_GROUP;
|
|
+ for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
|
|
+ *mxp++ = ARMI_B_T4 | ARMC_BL((-5 - i) << 1);
|
|
+ lj_mcode_sync(as->mcbot, mxp);
|
|
+ lj_mcode_commitbot(as->J, mxp);
|
|
+ as->mcbot = mxp;
|
|
+ as->mclim = as->mcbot + MCLIM_REDZONE;
|
|
+ return mxp - EXITSTUBS_PER_GROUP;
|
|
+}
|
|
+
|
|
+/* Setup all needed exit stubs. */
|
|
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
|
|
+{
|
|
+ ExitNo i;
|
|
+ if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
|
|
+ lj_trace_err(as->J, LJ_TRERR_SNAPOV);
|
|
+ for (i = 0; i < (nexits + EXITSTUBS_PER_GROUP - 1) / EXITSTUBS_PER_GROUP; i++)
|
|
+ if (as->J->exitstubgroup[i] == NULL)
|
|
+ as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
|
|
+}
|
|
+
|
|
+/* Emit conditional branch to exit for guard. */
|
|
+static void asm_guardcc(ASMState *as, ARMCC cc)
|
|
+{
|
|
+ MCode *target = exitstub_addr(as->J, as->snapno);
|
|
+ MCode *p = as->mcp;
|
|
+ if (LJ_UNLIKELY(p == as->invmcp)) {
|
|
+ as->loopinv = 1;
|
|
+ *p = ARMI_BL | ARMC_BL((target - p - 1) << 1);
|
|
+ emit_branch(as, ARMF_CC(ARMI_B, cc ^ 1), p);
|
|
+ return;
|
|
+ }
|
|
+ emit_branchlink(as, ARMI_BL, target);
|
|
+ ARMI_IT(cc);
|
|
+}
|
|
+
|
|
+/* -- Operand fusion ------------------------------------------------------ */
|
|
+
|
|
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
|
|
+#define CONFLICT_SEARCH_LIM 31
|
|
+
|
|
+/* Check if there's no conflicting instruction between curins and ref. */
|
|
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
|
|
+{
|
|
+ IRIns *ir = as->ir;
|
|
+ IRRef i = as->curins;
|
|
+ if (i > ref + CONFLICT_SEARCH_LIM)
|
|
+ return 0; /* Give up, ref is too far away. */
|
|
+ while (--i > ref)
|
|
+ if (ir[i].o == conflict)
|
|
+ return 0; /* Conflict found. */
|
|
+ return 1; /* Ok, no conflict. */
|
|
+}
|
|
+
|
|
+/* Fuse the array base of colocated arrays. */
|
|
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
|
|
+ !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
|
|
+ return (int32_t)sizeof(GCtab);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Fuse array/hash/upvalue reference into register+offset operand. */
|
|
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, int lim)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (ra_noreg(ir->r)) {
|
|
+ if (ir->o == IR_AREF) {
|
|
+ if (mayfuse(as, ref)) {
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ IRRef tab = IR(ir->op1)->op1;
|
|
+ int32_t ofs = asm_fuseabase(as, tab);
|
|
+ IRRef refa = ofs ? tab : ir->op1;
|
|
+ ofs += 8*IR(ir->op2)->i;
|
|
+ if (ofs > -lim && ofs < lim) {
|
|
+ *ofsp = ofs;
|
|
+ return ra_alloc1(as, refa, allow);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ else if (ir->o == IR_HREFK) {
|
|
+ if (mayfuse(as, ref)) {
|
|
+ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
|
|
+ if (ofs < lim) {
|
|
+ *ofsp = ofs;
|
|
+ return ra_alloc1(as, ir->op1, allow);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ else if (ir->o == IR_UREFC) {
|
|
+ if (irref_isk(ir->op1)) {
|
|
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
+ int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
|
|
+ *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
|
|
+ return ra_allock(as, (ofs & ~255), allow);
|
|
+ }
|
|
+ } else if (ir->o == IR_TMPREF) {
|
|
+ *ofsp = 0;
|
|
+ return RID_SP;
|
|
+ }
|
|
+ }
|
|
+ *ofsp = 0;
|
|
+ return ra_alloc1(as, ref, allow);
|
|
+}
|
|
+
|
|
+/* Fuse m operand into arithmetic/logic instructions. */
|
|
+static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow, uint32_t *rs)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (ra_hasreg(ir->r)) {
|
|
+ ra_noweak(as, ir->r);
|
|
+ return ARMF_M(ir->r);
|
|
+ }
|
|
+ else if (irref_isk(ref)) {
|
|
+ uint32_t k = emit_isk12(ai, ir->i);
|
|
+ if (k)
|
|
+ return k;
|
|
+ }
|
|
+ else if (mayfuse(as, ref)) {
|
|
+ if (ir->o >= IR_BSHL && ir->o <= IR_BROR) {
|
|
+ Reg m = ra_alloc1(as, ir->op1, allow);
|
|
+ ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL :
|
|
+ ir->o == IR_BSHR ? ARMSH_LSR :
|
|
+ ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR;
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ return ARMF_M(m) | ARMF_SH(sh, (IR(ir->op2)->i & 31));
|
|
+ }
|
|
+ else {
|
|
+ Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m));
|
|
+ *rs = ARMF_RSH(sh, s);
|
|
+ return ARMF_M(m);
|
|
+ }
|
|
+ }
|
|
+ else if (ir->o == IR_ADD && ir->op1 == ir->op2) {
|
|
+ Reg m = ra_alloc1(as, ir->op1, allow);
|
|
+ return ARMF_M(m) | ARMF_SH(ARMSH_LSL, 1);
|
|
+ }
|
|
+ }
|
|
+ return ARMF_M(ra_allocref(as, ref, allow));
|
|
+}
|
|
+
|
|
+/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */
|
|
+static IRRef asm_fuselsl2(ASMState *as, IRRef ref)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL &&
|
|
+ irref_isk(ir->op2) && IR(ir->op2)->i == 2)
|
|
+ return ir->op1;
|
|
+ return 0; /* No fusion. */
|
|
+}
|
|
+
|
|
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
|
|
+static void asm_fusexref(ASMState *as,
|
|
+ ARMIns ai,
|
|
+ Reg rd,
|
|
+ IRRef ref,
|
|
+ RegSet allow,
|
|
+ int32_t ofs)
|
|
+{
|
|
+ IRIns *ir = IR(ref);
|
|
+ Reg base;
|
|
+ if (ra_noreg(ir->r) && canfuse(as, ir)) {
|
|
+ int32_t lim = (!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00)) ? 1024 :
|
|
+ (ai & 0x00000080) ? 4096 : 256;
|
|
+ if (ir->o == IR_ADD) {
|
|
+ int32_t ofs2;
|
|
+ if (irref_isk(ir->op2) &&
|
|
+ (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim &&
|
|
+ (!(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00)) || !(ofs2 & 3))) {
|
|
+ ofs = ofs2;
|
|
+ ref = ir->op1;
|
|
+ }
|
|
+ else if (ofs == 0 && !(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00))) {
|
|
+ IRRef lref = ir->op1, rref = ir->op2;
|
|
+ Reg rn, rm;
|
|
+ IRRef sref = asm_fuselsl2(as, rref);
|
|
+ if (sref) {
|
|
+ rref = sref;
|
|
+ ai |= ARMF_LSL(2);
|
|
+ }
|
|
+ else if ((sref = asm_fuselsl2(as, lref)) != 0) {
|
|
+ lref = rref;
|
|
+ rref = sref;
|
|
+ ai |= ARMF_LSL(2);
|
|
+ }
|
|
+ rn = ra_alloc1(as, lref, allow);
|
|
+ rm = ra_alloc1(as, rref, rset_exclude(allow, rn));
|
|
+ emit_tnm(as, ai, rd, rn, rm);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00))) {
|
|
+ lj_assertA(ofs == 0, "bad usage");
|
|
+ ofs = (int32_t)sizeof(GCstr);
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ ofs += IR(ir->op2)->i;
|
|
+ ref = ir->op1;
|
|
+ }
|
|
+ else if (irref_isk(ir->op1)) {
|
|
+ ofs += IR(ir->op1)->i;
|
|
+ ref = ir->op2;
|
|
+ }
|
|
+ else {
|
|
+ /* NYI: Fuse ADD with constant. */
|
|
+ Reg rn = ra_alloc1(as, ir->op1, allow);
|
|
+ uint32_t rs = 0;
|
|
+ uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn), &rs);
|
|
+ if ((ai & 0x0000fe00) == 0x0000f800)
|
|
+ emit_lso(as, ai, rd, rd, ofs);
|
|
+ else
|
|
+ emit_lsox(as, ai, rd, rd, ofs);
|
|
+ emit_dn(as, ARMI_ADD ^ m, rd, rn);
|
|
+ if (rs)
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16));
|
|
+ return;
|
|
+ }
|
|
+ if (ofs <= -lim || ofs >= lim) {
|
|
+ Reg rn = ra_alloc1(as, ref, allow);
|
|
+ Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
|
|
+ emit_tnm(as, ai, rd, rn, rm);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ base = ra_alloc1(as, ref, allow);
|
|
+#if !LJ_SOFTFP
|
|
+ if ((ai & 0x0000ff00) == 0x0000ed00)
|
|
+ emit_vlso(as, ai, rd, base, ofs);
|
|
+ else
|
|
+#endif
|
|
+ if ((ai & 0x0000fe00) == 0x0000f800)
|
|
+ emit_lso(as, ai, rd, base, ofs);
|
|
+ else
|
|
+ emit_lsox(as, ai, rd, base, ofs);
|
|
+}
|
|
+
|
|
+#if !LJ_SOFTFP
|
|
+/* Fuse to multiply-add/sub instruction. */
|
|
+static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
|
|
+{
|
|
+ IRRef lref = ir->op1, rref = ir->op2;
|
|
+ IRIns *irm;
|
|
+ if (lref != rref &&
|
|
+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
|
|
+ ra_noreg(irm->r)) ||
|
|
+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
|
|
+ (rref = lref, ai = air, ra_noreg(irm->r))))) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
|
|
+ Reg right, left = ra_alloc2(as,
|
|
+ irm,
|
|
+ rset_exclude(rset_exclude(RSET_FPR, dest), add));
|
|
+ right = (left >> 8); left &= 255;
|
|
+ emit_tnm(as, ai, (dest & 15), (left & 15), (right & 15));
|
|
+ if (dest != add) emit_tm(as, ARMI_VMOV_D, (dest & 15), (add & 15));
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* -- Calls --------------------------------------------------------------- */
|
|
+
|
|
+/* Generate a call to a C function. */
|
|
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
|
+{
|
|
+ uint32_t n, nargs = CCI_XNARGS(ci);
|
|
+ int32_t ofs = 0;
|
|
+#if LJ_SOFTFP
|
|
+ Reg gpr = REGARG_FIRSTGPR;
|
|
+#else
|
|
+ Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0;
|
|
+#endif
|
|
+ if ((void *)ci->func)
|
|
+ emit_call(as, (void *)ci->func);
|
|
+#if !LJ_SOFTFP
|
|
+ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
|
|
+ as->cost[gpr] = REGCOST(~0u, ASMREF_L);
|
|
+ gpr = REGARG_FIRSTGPR;
|
|
+#endif
|
|
+ for (n = 0; n < nargs; n++) {
|
|
+ /* Setup args. */
|
|
+ IRRef ref = args[n];
|
|
+ IRIns *ir = IR(ref);
|
|
+#if !LJ_SOFTFP
|
|
+ if (ref && irt_isfp(ir->t)) {
|
|
+ RegSet of = as->freeset;
|
|
+ Reg src;
|
|
+ if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ if (fpr <= REGARG_LASTFPR) {
|
|
+ ra_leftov(as, fpr, ref);
|
|
+ fpr++;
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ else if (fprodd) {
|
|
+ /* Ick. */
|
|
+ src = ra_alloc1(as, ref, RSET_FPR);
|
|
+ emit_tm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00000040);
|
|
+ fprodd = 0;
|
|
+ continue;
|
|
+ }
|
|
+ else if (fpr <= REGARG_LASTFPR) {
|
|
+ ra_leftov(as, fpr, ref);
|
|
+ fprodd = fpr++;
|
|
+ continue;
|
|
+ }
|
|
+ /* Workaround to protect argument GPRs from being used for remat. */
|
|
+ as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1);
|
|
+ src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */
|
|
+ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1));
|
|
+ fprodd = 0;
|
|
+ goto stackfp;
|
|
+ }
|
|
+ /* Workaround to protect argument GPRs from being used for remat. */
|
|
+ as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1);
|
|
+ src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */
|
|
+ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1));
|
|
+ if (irt_isnum(ir->t)) gpr = (gpr + 1) & ~1u;
|
|
+ if (gpr <= REGARG_LASTGPR) {
|
|
+ lj_assertA(rset_test(as->freeset, gpr),
|
|
+ "reg %d not free", gpr); /* Must have been evicted. */
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ lj_assertA(rset_test(as->freeset, gpr+1),
|
|
+ "reg %d not free", gpr+1); /* Ditto. */
|
|
+ emit_tnm(as, ARMI_VMOV_RR_D, gpr, gpr + 1, (src & 15));
|
|
+ gpr += 2;
|
|
+ }
|
|
+ else {
|
|
+ emit_tn(as, ARMI_VMOV_R_S, gpr, (src & 15));
|
|
+ gpr++;
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+stackfp:
|
|
+ if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
|
|
+ emit_spstore(as, ir, src, ofs);
|
|
+ ofs += irt_isnum(ir->t) ? 8 : 4;
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+#endif
|
|
+ {
|
|
+ if (gpr <= REGARG_LASTGPR) {
|
|
+ lj_assertA(rset_test(as->freeset, gpr),
|
|
+ "reg %d not free", gpr); /* Must have been evicted. */
|
|
+ if (ref) ra_leftov(as, gpr, ref);
|
|
+ gpr++;
|
|
+ }
|
|
+ else {
|
|
+ if (ref) {
|
|
+ Reg r = ra_alloc1(as, ref, RSET_GPR);
|
|
+ emit_spstore(as, ir, r, ofs);
|
|
+ }
|
|
+ ofs += 4;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Setup result reg/sp for call. Evict scratch regs. */
|
|
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
|
+{
|
|
+ RegSet drop = RSET_SCRATCH;
|
|
+ int hiop = ((ir + 1)->o == IR_HIOP && !irt_isnil((ir + 1)->t));
|
|
+ if (ra_hasreg(ir->r))
|
|
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
|
|
+ if (hiop && ra_hasreg((ir + 1)->r))
|
|
+ rset_clear(drop, (ir + 1)->r); /* Dest reg handled below. */
|
|
+ ra_evictset(as, drop); /* Evictions must be performed first. */
|
|
+ if (ra_used(ir)) {
|
|
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
|
|
+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
|
|
+ if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64 | CCI_VARARG))) {
|
|
+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
|
|
+ if (irt_isnum(ir->t))
|
|
+ emit_tnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest);
|
|
+ else
|
|
+ emit_tn(as, ARMI_VMOV_S_R, RID_RET, dest);
|
|
+ }
|
|
+ else {
|
|
+ ra_destreg(as, ir, RID_FPRET);
|
|
+ }
|
|
+ }
|
|
+ else if (hiop) {
|
|
+ ra_destpair(as, ir);
|
|
+ }
|
|
+ else {
|
|
+ ra_destreg(as, ir, RID_RET);
|
|
+ }
|
|
+ }
|
|
+ UNUSED(ci);
|
|
+}
|
|
+
|
|
+static void asm_callx(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IRRef args[CCI_NARGS_MAX * 2];
|
|
+ CCallInfo ci;
|
|
+ IRRef func;
|
|
+ IRIns *irf;
|
|
+ ci.flags = asm_callx_flags(as, ir);
|
|
+ asm_collectargs(as, ir, &ci, args);
|
|
+ asm_setupresult(as, ir, &ci);
|
|
+ func = ir->op2; irf = IR(func);
|
|
+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
|
|
+ if (irref_isk(func)) {
|
|
+ /* Call to constant address. */
|
|
+ ci.func = (ASMFunction)(void *)(irf->i);
|
|
+ }
|
|
+ else {
|
|
+ /* Need a non-argument register for indirect calls. */
|
|
+ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12 + 1));
|
|
+ emit_m(as, ARMI_BLXr, freg);
|
|
+ ci.func = (ASMFunction)(void *)0;
|
|
+ }
|
|
+ asm_gencall(as, &ci, args);
|
|
+}
|
|
+
|
|
+/* -- Returns ------------------------------------------------------------- */
|
|
+
|
|
+/* Return to lower frame. Guard that it goes to the right spot. */
|
|
+static void asm_retf(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
|
|
+ void *pc = ir_kptr(IR(ir->op2));
|
|
+ int32_t delta = 1 + LJ_FR2 + bc_a(*((const BCIns *)pc - 1));
|
|
+ as->topslot -= (BCReg)delta;
|
|
+ if ((int32_t)as->topslot < 0) as->topslot = 0;
|
|
+ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
|
|
+ /* Need to force a spill on REF_BASE now to update the stack slot. */
|
|
+ emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE)));
|
|
+ emit_setgl(as, base, jit_base);
|
|
+ emit_addptr(as, base, -8*delta);
|
|
+ asm_guardcc(as, CC_NE);
|
|
+ emit_nm(as,
|
|
+ ARMI_CMP,
|
|
+ RID_TMP,
|
|
+ ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
|
|
+ emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
|
|
+}
|
|
+
|
|
+/* -- Buffer operations --------------------------------------------------- */
|
|
+
|
|
+#if LJ_HASBUFFER
|
|
+static void asm_bufhdr_write(ASMState *as, Reg sb)
|
|
+{
|
|
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
|
|
+ IRIns irgc;
|
|
+ int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
|
|
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
|
|
+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
|
|
+ //if ((as->flags & JIT_F_ARMV6T2)) { //jturnsek!!!
|
|
+ // emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
|
|
+ //} else {
|
|
+ emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
|
|
+ emit_dn(as, ARMC_K12(ARMI_AND, 255), tmp, tmp);
|
|
+ //}
|
|
+ emit_lso(as, ARMI_LDR, RID_TMP,
|
|
+ ra_allock(as, (addr & ~4095),
|
|
+ rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
|
|
+ (addr & 4095));
|
|
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* -- Type conversions ---------------------------------------------------- */
|
|
+
|
|
+#if !LJ_SOFTFP
|
|
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
|
|
+{
|
|
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ asm_guardcc(as, CC_NE);
|
|
+ emit_t(as, ARMI_VMRS, 0);
|
|
+ emit_tm(as, ARMI_VCMP_D, (tmp & 15), (left & 15));
|
|
+ emit_tm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15));
|
|
+ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
|
|
+ emit_tm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15));
|
|
+}
|
|
+
|
|
+static void asm_tobit(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ RegSet allow = RSET_FPR;
|
|
+ Reg left = ra_alloc1(as, ir->op1, allow);
|
|
+ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
|
|
+ Reg tmp = ra_scratch(as, rset_clear(allow, right));
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
|
|
+ emit_tnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15));
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void asm_conv(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
|
|
+#if !LJ_SOFTFP
|
|
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
|
|
+#endif
|
|
+ IRRef lref = ir->op1;
|
|
+ /* 64 bit integer conversions are handled by SPLIT. */
|
|
+ lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64),
|
|
+ "IR %04d has unsplit 64 bit type",
|
|
+ (int)(ir - as->ir) - REF_BIAS);
|
|
+#if LJ_SOFTFP
|
|
+ /* FP conversions are handled by SPLIT. */
|
|
+ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
|
|
+ "IR %04d has FP type",
|
|
+ (int)(ir - as->ir) - REF_BIAS);
|
|
+ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
|
|
+#else
|
|
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
|
|
+ if (irt_isfp(ir->t)) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ if (stfp) {
|
|
+ /* FP to FP conversion. */
|
|
+ emit_tm(as,
|
|
+ st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32,
|
|
+ (dest & 15),
|
|
+ (ra_alloc1(as, lref, RSET_FPR) & 15));
|
|
+ }
|
|
+ else {
|
|
+ /* Integer to FP conversion. */
|
|
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
+ ARMIns ai = irt_isfloat(ir->t) ?
|
|
+ (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) :
|
|
+ (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32);
|
|
+ emit_tm(as, ai, (dest & 15), (dest & 15));
|
|
+ emit_tn(as, ARMI_VMOV_S_R, left, (dest & 15));
|
|
+ }
|
|
+ }
|
|
+ else if (stfp) {
|
|
+ /* FP to integer conversion. */
|
|
+ if (irt_isguard(ir->t)) {
|
|
+ /* Checked conversions are only supported from number to int. */
|
|
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
|
|
+ "bad type for checked CONV");
|
|
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
|
|
+ }
|
|
+ else {
|
|
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
|
|
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ ARMIns ai;
|
|
+ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
|
|
+ ai = irt_isint(ir->t) ?
|
|
+ (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :
|
|
+ (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);
|
|
+ emit_tm(as, ai, (tmp & 15), (left & 15));
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+#endif
|
|
+ {
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ if (st >= IRT_I8 && st <= IRT_U16) {
|
|
+ /* Extend to 32 bit integer. */
|
|
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
|
|
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
|
|
+ if ((as->flags & JIT_F_ARMV6)) {
|
|
+ ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
|
|
+ st == IRT_U8 ? ARMI_UXTB :
|
|
+ st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH;
|
|
+ emit_dm(as, ai, dest, left);
|
|
+ }
|
|
+ else if (st == IRT_U8) {
|
|
+ emit_dn(as, ARMC_K12(ARMI_AND, 255), dest, left);
|
|
+ }
|
|
+ else {
|
|
+ uint32_t shift = st == IRT_I8 ? 24 : 16;
|
|
+ ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR;
|
|
+ emit_dm(as, ARMI_MOV | ARMF_SH(sh, shift), dest, RID_TMP);
|
|
+ emit_dm(as, ARMI_MOV | ARMF_SH(ARMSH_LSL, shift), RID_TMP, left);
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ /* Handle 32/32 bit no-op (cast). */
|
|
+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_strto(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
|
|
+ IRRef args[2];
|
|
+ Reg rlo = 0, rhi = 0, tmp;
|
|
+ int destused = ra_used(ir);
|
|
+ int32_t ofs = 0;
|
|
+ ra_evictset(as, RSET_SCRATCH);
|
|
+#if LJ_SOFTFP
|
|
+ if (destused) {
|
|
+ if (ra_hasspill(ir->s) && ra_hasspill((ir + 1)->s) &&
|
|
+ (ir->s & 1) == 0 && ir->s + 1 == (ir + 1)->s) {
|
|
+ int i;
|
|
+ for (i = 0; i < 2; i++) {
|
|
+ Reg r = (ir + i)->r;
|
|
+ if (ra_hasreg(r)) {
|
|
+ ra_free(as, r);
|
|
+ ra_modified(as, r);
|
|
+ emit_spload(as, ir + i, r, sps_scale((ir + i)->s));
|
|
+ }
|
|
+ }
|
|
+ ofs = sps_scale(ir->s);
|
|
+ destused = 0;
|
|
+ }
|
|
+ else {
|
|
+ rhi = ra_dest(as, ir + 1, RSET_GPR);
|
|
+ rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
|
|
+ }
|
|
+ }
|
|
+ asm_guardcc(as, CC_EQ);
|
|
+ if (destused) {
|
|
+ emit_lso(as, ARMI_LDR, rhi, RID_SP, 4);
|
|
+ emit_lso(as, ARMI_LDR, rlo, RID_SP, 0);
|
|
+ }
|
|
+#else
|
|
+ UNUSED(rhi);
|
|
+ if (destused) {
|
|
+ if (ra_hasspill(ir->s)) {
|
|
+ ofs = sps_scale(ir->s);
|
|
+ destused = 0;
|
|
+ if (ra_hasreg(ir->r)) {
|
|
+ ra_free(as, ir->r);
|
|
+ ra_modified(as, ir->r);
|
|
+ emit_spload(as, ir, ir->r, ofs);
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ rlo = ra_dest(as, ir, RSET_FPR);
|
|
+ }
|
|
+ }
|
|
+ asm_guardcc(as, CC_EQ);
|
|
+ if (destused)
|
|
+ emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0);
|
|
+#endif
|
|
+ emit_n(as, ARMC_K12(ARMI_CMP, 0), RID_RET); /* Test return status. */
|
|
+ args[0] = ir->op1; /* GCstr *str */
|
|
+ args[1] = ASMREF_TMP1; /* TValue *n */
|
|
+ asm_gencall(as, ci, args);
|
|
+ tmp = ra_releasetmp(as, ASMREF_TMP1);
|
|
+ if (ofs == 0)
|
|
+ emit_dm(as, ARMI_MOV, tmp, RID_SP);
|
|
+ else
|
|
+ emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
|
|
+}
|
|
+
|
|
+/* -- Memory references --------------------------------------------------- */
|
|
+
|
|
+/* Get pointer to TValue. */
|
|
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
|
|
+{
|
|
+ if ((mode & IRTMPREF_IN1)) {
|
|
+ IRIns *ir = IR(ref);
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ if ((mode & IRTMPREF_OUT1)) {
|
|
+#if LJ_SOFTFP
|
|
+ lj_assertA(irref_isk(ref), "unsplit FP op");
|
|
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
|
|
+ emit_lso(as, ARMI_STR,
|
|
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
|
|
+ RID_SP, 0);
|
|
+ emit_lso(as, ARMI_STR,
|
|
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
|
|
+ RID_SP, 4);
|
|
+#else
|
|
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
|
|
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
|
|
+ emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
|
|
+#endif
|
|
+ } else if (irref_isk(ref)) {
|
|
+ /* Use the number constant itself as a TValue. */
|
|
+ ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
|
|
+ } else {
|
|
+#if LJ_SOFTFP
|
|
+ lj_assertA(0, "unsplit FP op");
|
|
+#else
|
|
+ /* Otherwise force a spill and use the spill slot. */
|
|
+ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
|
|
+#endif
|
|
+ }
|
|
+ } else {
|
|
+ /* Otherwise use [sp] and [sp+4] to hold the TValue.
|
|
+ ** This assumes the following call has max. 4 args.
|
|
+ */
|
|
+ Reg type;
|
|
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
|
|
+ if (!irt_ispri(ir->t)) {
|
|
+ Reg src = ra_alloc1(as, ref, RSET_GPR);
|
|
+ emit_lso(as, ARMI_STR, src, RID_SP, 0);
|
|
+ }
|
|
+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
|
|
+ type = ra_alloc1(as, ref+1, RSET_GPR);
|
|
+ else
|
|
+ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
|
|
+ emit_lso(as, ARMI_STR, type, RID_SP, 4);
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_aref(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg idx, base;
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ IRRef tab = IR(ir->op1)->op1;
|
|
+ int32_t ofs = asm_fuseabase(as, tab);
|
|
+ IRRef refa = ofs ? tab : ir->op1;
|
|
+ uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i);
|
|
+ if (k) {
|
|
+ base = ra_alloc1(as, refa, RSET_GPR);
|
|
+ emit_dn(as, ARMI_ADD ^ k, dest, base);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ base = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
|
|
+ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 3), dest, base, idx);
|
|
+}
|
|
+
|
|
+/* Inlined hash lookup. Specialized for key type and for const keys.
|
|
+** The equivalent C code is:
|
|
+** Node *n = hashkey(t, key);
|
|
+** do {
|
|
+** if (lj_obj_equal(&n->key, key)) return &n->val;
|
|
+** } while ((n = nextnode(n)));
|
|
+** return niltv(L);
|
|
+*/
|
|
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
|
|
+{
|
|
+ RegSet allow = RSET_GPR;
|
|
+ int destused = ra_used(ir);
|
|
+ Reg dest = ra_dest(as, ir, allow);
|
|
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
|
|
+ Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP;
|
|
+ IRRef refkey = ir->op2;
|
|
+ IRIns *irkey = IR(refkey);
|
|
+ IRType1 kt = irkey->t;
|
|
+ int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt));
|
|
+ uint32_t khash;
|
|
+ MCLabel l_end, l_loop;
|
|
+ rset_clear(allow, tab);
|
|
+ if (!irref_isk(refkey) || irt_isstr(kt)) {
|
|
+#if LJ_SOFTFP
|
|
+ key = ra_alloc1(as, refkey, allow);
|
|
+ rset_clear(allow, key);
|
|
+ if (irkey[1].o == IR_HIOP) {
|
|
+ if (ra_hasreg((irkey + 1)->r)) {
|
|
+ keynumhi = (irkey + 1)->r;
|
|
+ keyhi = RID_TMP;
|
|
+ ra_noweak(as, keynumhi);
|
|
+ }
|
|
+ else {
|
|
+ keyhi = keynumhi = ra_allocref(as, refkey + 1, allow);
|
|
+ }
|
|
+ rset_clear(allow, keynumhi);
|
|
+ khi = 0;
|
|
+ }
|
|
+#else
|
|
+ if (irt_isnum(kt)) {
|
|
+ key = ra_scratch(as, allow);
|
|
+ rset_clear(allow, key);
|
|
+ keyhi = keynumhi = ra_scratch(as, allow);
|
|
+ rset_clear(allow, keyhi);
|
|
+ khi = 0;
|
|
+ }
|
|
+ else {
|
|
+ key = ra_alloc1(as, refkey, allow);
|
|
+ rset_clear(allow, key);
|
|
+ }
|
|
+#endif
|
|
+ }
|
|
+ else if (irt_isnum(kt)) {
|
|
+ int32_t val = (int32_t)ir_knum(irkey)->u32.lo;
|
|
+ k = emit_isk12(ARMI_CMP, val);
|
|
+ if (!k) {
|
|
+ key = ra_allock(as, val, allow);
|
|
+ rset_clear(allow, key);
|
|
+ }
|
|
+ val = (int32_t)ir_knum(irkey)->u32.hi;
|
|
+ khi = emit_isk12(ARMI_CMP, val);
|
|
+ if (!khi) {
|
|
+ keyhi = ra_allock(as, val, allow);
|
|
+ rset_clear(allow, keyhi);
|
|
+ }
|
|
+ }
|
|
+ else if (!irt_ispri(kt)) {
|
|
+ k = emit_isk12(ARMI_CMP, irkey->i);
|
|
+ if (!k) {
|
|
+ key = ra_alloc1(as, refkey, allow);
|
|
+ rset_clear(allow, key);
|
|
+ }
|
|
+ }
|
|
+ if (!irt_ispri(kt))
|
|
+ tmp = ra_scratchpair(as, allow);
|
|
+
|
|
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
|
|
+ l_end = emit_label(as);
|
|
+ as->invmcp = NULL;
|
|
+ if (merge == IR_NE)
|
|
+ asm_guardcc(as, CC_AL);
|
|
+ else if (destused)
|
|
+ emit_loada(as, dest, niltvg(J2G(as->J)));
|
|
+
|
|
+ /* Follow hash chain until the end. */
|
|
+ l_loop = --as->mcp;
|
|
+ emit_n(as, ARMC_K12(ARMI_CMP, 0), dest);
|
|
+ emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next));
|
|
+
|
|
+ /* Type and value comparison. */
|
|
+ if (merge == IR_EQ)
|
|
+ asm_guardcc(as, CC_EQ);
|
|
+ else {
|
|
+ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
|
|
+ }
|
|
+ if (!irt_ispri(kt)) {
|
|
+ emit_nm(as, ARMI_CMP ^ k, tmp, key);
|
|
+ ARMI_IT(CC_EQ);
|
|
+ emit_nm(as, ARMI_CMP ^ khi, tmp + 1, keyhi);
|
|
+ emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key));
|
|
+ }
|
|
+ else {
|
|
+ emit_n(as, ARMI_CMP ^ khi, tmp);
|
|
+ emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it));
|
|
+ }
|
|
+ *l_loop = ARMF_CC(ARMI_B, CC_NE) | ARMC_B((as->mcp - l_loop - 1) << 1);
|
|
+
|
|
+ /* Load main position relative to tab->node into dest. */
|
|
+ khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1;
|
|
+ if (khash == 0) {
|
|
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
|
|
+ }
|
|
+ else {
|
|
+ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
|
|
+ emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
|
|
+ if (irt_isstr(kt)) {
|
|
+ /* Fetch of str->sid is cheaper than ra_allock. */
|
|
+ emit_dnm(as, ARMI_AND, tmp, tmp + 1, RID_TMP);
|
|
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
|
|
+ emit_lso(as, ARMI_LDR, tmp + 1, key, (int32_t)offsetof(GCstr, sid));
|
|
+ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
|
|
+ }
|
|
+ else if (irref_isk(refkey)) {
|
|
+ emit_opk(as,
|
|
+ ARMI_AND,
|
|
+ tmp,
|
|
+ RID_TMP,
|
|
+ (int32_t)khash,
|
|
+ rset_exclude(rset_exclude(RSET_GPR, tab), dest));
|
|
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
|
|
+ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
|
|
+ }
|
|
+ else {
|
|
+ /* Must match with hash*() in lj_tab.c. */
|
|
+ if (ra_hasreg(keynumhi)) {
|
|
+ /* Canonicalize +-0.0 to 0.0. */
|
|
+ if (keyhi == RID_TMP) {
|
|
+ emit_dm(as, ARMI_MOV, keyhi, keynumhi);
|
|
+ ARMI_IT(CC_NE);
|
|
+ }
|
|
+ emit_d(as, ARMC_K12(ARMI_MOV, 0), keyhi);
|
|
+ ARMI_IT(CC_EQ);
|
|
+ }
|
|
+ emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP);
|
|
+ emit_dnm(as, ARMI_SUB | ARMF_SH(ARMSH_ROR, 32 - HASH_ROT3), tmp, tmp, tmp + 1);
|
|
+ emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
|
|
+ emit_dnm(as,
|
|
+ ARMI_EOR|ARMF_SH(ARMSH_ROR, 32 - ((HASH_ROT2 + HASH_ROT1) & 31)),
|
|
+ tmp,
|
|
+ tmp+1,
|
|
+ tmp);
|
|
+ emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
|
|
+ emit_dnm(as, ARMI_SUB | ARMF_SH(ARMSH_ROR, 32 - HASH_ROT1), tmp + 1, tmp + 1, tmp);
|
|
+ if (ra_hasreg(keynumhi)) {
|
|
+ emit_dnm(as, ARMI_EOR, tmp + 1, tmp, key);
|
|
+ emit_dnm(as, ARMI_ORR | ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */
|
|
+ emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi);
|
|
+#if !LJ_SOFTFP
|
|
+ emit_tnm(as,
|
|
+ ARMI_VMOV_RR_D,
|
|
+ key,
|
|
+ keynumhi,
|
|
+ (ra_alloc1(as, refkey, RSET_FPR) & 15));
|
|
+#endif
|
|
+ }
|
|
+ else {
|
|
+ emit_dnm(as, ARMI_EOR, tmp + 1, tmp, key);
|
|
+ emit_opk(as,
|
|
+ ARMI_ADD,
|
|
+ tmp,
|
|
+ key,
|
|
+ (int32_t)HASH_BIAS,
|
|
+ rset_exclude(rset_exclude(RSET_GPR, tab), key));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_hrefk(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ IRIns *kslot = IR(ir->op2);
|
|
+ IRIns *irkey = IR(kslot->op1);
|
|
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
|
|
+ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
|
|
+ Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
|
|
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ Reg key = RID_NONE, type = RID_TMP, idx = node;
|
|
+ RegSet allow = rset_exclude(RSET_GPR, node);
|
|
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
|
|
+ if (ofs > 4095) {
|
|
+ idx = dest;
|
|
+ rset_clear(allow, dest);
|
|
+ kofs = (int32_t)offsetof(Node, key);
|
|
+ }
|
|
+ else if (ra_hasreg(dest)) {
|
|
+ emit_opk(as, ARMI_ADD, dest, node, ofs, allow);
|
|
+ }
|
|
+ asm_guardcc(as, CC_NE);
|
|
+ if (!irt_ispri(irkey->t)) {
|
|
+ RegSet even = (as->freeset & allow);
|
|
+ even = even & (even >> 1) & RSET_GPREVEN;
|
|
+ if (even) {
|
|
+ key = ra_scratch(as, even);
|
|
+ if (rset_test(as->freeset, key + 1)) {
|
|
+ type = key + 1;
|
|
+ ra_modified(as, type);
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ key = ra_scratch(as, allow);
|
|
+ }
|
|
+ rset_clear(allow, key);
|
|
+ }
|
|
+ rset_clear(allow, type);
|
|
+ if (irt_isnum(irkey->t)) {
|
|
+ emit_opk(as,
|
|
+ ARMI_CMP,
|
|
+ 0,
|
|
+ type,
|
|
+ (int32_t)ir_knum(irkey)->u32.hi,
|
|
+ allow);
|
|
+ ARMI_IT(CC_EQ);
|
|
+ emit_opk(as,
|
|
+ ARMI_CMP,
|
|
+ 0,
|
|
+ key,
|
|
+ (int32_t)ir_knum(irkey)->u32.lo,
|
|
+ allow);
|
|
+ }
|
|
+ else {
|
|
+ if (ra_hasreg(key)) {
|
|
+ emit_opk(as, ARMI_CMP, 0, key, irkey->i, allow);
|
|
+ ARMI_IT(CC_EQ);
|
|
+ }
|
|
+ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype(irkey->t)), type);
|
|
+ }
|
|
+ emit_lso(as, ARMI_LDR, type, idx, kofs + 4);
|
|
+ if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs);
|
|
+ if (ofs > 4095)
|
|
+ emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
|
|
+}
|
|
+
|
|
+static void asm_uref(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ if (irref_isk(ir->op1)) {
|
|
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
|
|
+ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
|
|
+ emit_lsptr(as, ARMI_LDR, dest, v);
|
|
+ }
|
|
+ else {
|
|
+ Reg uv = ra_scratch(as, RSET_GPR);
|
|
+ Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ if (ir->o == IR_UREFC) {
|
|
+ asm_guardcc(as, CC_NE);
|
|
+ emit_n(as, ARMC_K12(ARMI_CMP, 1), RID_TMP);
|
|
+ emit_opk(as,
|
|
+ ARMI_ADD,
|
|
+ dest,
|
|
+ uv,
|
|
+ (int32_t)offsetof(GCupval, tv),
|
|
+ RSET_GPR);
|
|
+ emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
|
|
+ }
|
|
+ else {
|
|
+ emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v));
|
|
+ }
|
|
+ emit_lso(as,
|
|
+ ARMI_LDR,
|
|
+ uv,
|
|
+ func,
|
|
+ (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_fref(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ UNUSED(as); UNUSED(ir);
|
|
+ lj_assertA(!ra_used(ir), "unfused FREF");
|
|
+}
|
|
+
|
|
+static void asm_strref(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ IRRef ref = ir->op2, refk = ir->op1;
|
|
+ Reg r;
|
|
+ if (irref_isk(ref)) {
|
|
+ IRRef tmp = refk; refk = ref; ref = tmp;
|
|
+ }
|
|
+ else if (!irref_isk(refk)) {
|
|
+ uint32_t k, m = ARMC_K12(0, sizeof(GCstr));
|
|
+ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ IRIns *irr = IR(ir->op2);
|
|
+ if (ra_hasreg(irr->r)) {
|
|
+ ra_noweak(as, irr->r);
|
|
+ right = irr->r;
|
|
+ }
|
|
+ else if (mayfuse(as, irr->op2) &&
|
|
+ irr->o == IR_ADD && irref_isk(irr->op2) &&
|
|
+ (k = emit_isk12(ARMI_ADD,
|
|
+ (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) {
|
|
+ m = k;
|
|
+ right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
|
|
+ }
|
|
+ else {
|
|
+ right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ }
|
|
+ emit_dn(as, ARMI_ADD ^ m, dest, dest);
|
|
+ emit_dnm(as, ARMI_ADD, dest, left, right);
|
|
+ return;
|
|
+ }
|
|
+ r = ra_alloc1(as, ref, RSET_GPR);
|
|
+ emit_opk(as,
|
|
+ ARMI_ADD,
|
|
+ dest,
|
|
+ r,
|
|
+ sizeof(GCstr) + IR(refk)->i,
|
|
+ rset_exclude(RSET_GPR, r));
|
|
+}
|
|
+
|
|
+/* -- Loads and stores ---------------------------------------------------- */
|
|
+
|
|
+static ARMIns asm_fxloadins(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ UNUSED(as);
|
|
+ switch (irt_type(ir->t)) {
|
|
+ case IRT_I8: return ARMI_LDRSB;
|
|
+ case IRT_U8: return ARMI_LDRB;
|
|
+ case IRT_I16: return ARMI_LDRSH;
|
|
+ case IRT_U16: return ARMI_LDRH;
|
|
+ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D;
|
|
+ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */
|
|
+ default: return ARMI_LDR;
|
|
+ }
|
|
+}
|
|
+
|
|
+static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ UNUSED(as);
|
|
+ switch (irt_type(ir->t)) {
|
|
+ case IRT_I8: case IRT_U8: return ARMI_STRB;
|
|
+ case IRT_I16: case IRT_U16: return ARMI_STRH;
|
|
+ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D;
|
|
+ case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */
|
|
+ default: return ARMI_STR;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_fload(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ ARMIns ai = asm_fxloadins(as, ir);
|
|
+ Reg idx;
|
|
+ int32_t ofs;
|
|
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
|
|
+ idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR);
|
|
+ ofs = 0;
|
|
+ } else {
|
|
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ if (ir->op2 == IRFL_TAB_ARRAY) {
|
|
+ ofs = asm_fuseabase(as, ir->op1);
|
|
+ if (ofs) {
|
|
+ /* Turn the t->array load into an add for colocated arrays. */
|
|
+ emit_dn(as, ARMC_K12(ARMI_ADD, ofs), dest, idx);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ ofs = field_ofs[ir->op2];
|
|
+ }
|
|
+ emit_lso(as, ai, dest, idx, ofs);
|
|
+}
|
|
+
|
|
+static void asm_fstore(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (ir->r != RID_SINK) {
|
|
+ Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
|
|
+ IRIns *irf = IR(ir->op1);
|
|
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
|
|
+ int32_t ofs = field_ofs[irf->op2];
|
|
+ ARMIns ai = asm_fxstoreins(as, ir);
|
|
+ emit_lso(as, ai, src, idx, ofs);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_xload(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as,
|
|
+ ir,
|
|
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
|
|
+ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
|
|
+ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
|
|
+}
|
|
+
|
|
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
|
|
+{
|
|
+ if (ir->r != RID_SINK) {
|
|
+ Reg src = ra_alloc1(as,
|
|
+ ir->op2,
|
|
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
|
|
+ asm_fusexref(as,
|
|
+ asm_fxstoreins(as, ir),
|
|
+ src,
|
|
+ ir->op1,
|
|
+ rset_exclude(RSET_GPR, src),
|
|
+ ofs);
|
|
+ }
|
|
+}
|
|
+
|
|
+#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
|
|
+
|
|
+static void asm_ahuvload(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP);
|
|
+ IRType t = hiop ? IRT_NUM : irt_type(ir->t);
|
|
+ Reg dest = RID_NONE, type = RID_NONE, idx;
|
|
+ RegSet allow = RSET_GPR;
|
|
+ int32_t ofs = 0;
|
|
+ if (hiop && ra_used(ir + 1)) {
|
|
+ type = ra_dest(as, ir + 1, allow);
|
|
+ rset_clear(allow, type);
|
|
+ }
|
|
+ if (ra_used(ir)) {
|
|
+ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
|
|
+ irt_isint(ir->t) || irt_isaddr(ir->t),
|
|
+ "bad load type %d", irt_type(ir->t));
|
|
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
|
|
+ rset_clear(allow, dest);
|
|
+ }
|
|
+ idx = asm_fuseahuref(as,
|
|
+ ir->op1,
|
|
+ &ofs,
|
|
+ allow,
|
|
+ (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
|
|
+ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
|
|
+ if (!hiop || type == RID_NONE) {
|
|
+ rset_clear(allow, idx);
|
|
+ if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
|
|
+ rset_test((as->freeset & allow), dest + 1)) {
|
|
+ type = dest + 1;
|
|
+ ra_modified(as, type);
|
|
+ }
|
|
+ else {
|
|
+ type = RID_TMP;
|
|
+ }
|
|
+ }
|
|
+ asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
|
|
+ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype_(t)), type);
|
|
+ if (ra_hasreg(dest)) {
|
|
+#if !LJ_SOFTFP
|
|
+ if (t == IRT_NUM)
|
|
+ emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs);
|
|
+ else
|
|
+#endif
|
|
+ emit_lso(as, ARMI_LDR, dest, idx, ofs);
|
|
+ }
|
|
+ emit_lso(as, ARMI_LDR, type, idx, ofs + 4);
|
|
+}
|
|
+
|
|
+static void asm_ahustore(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (ir->r != RID_SINK) {
|
|
+ RegSet allow = RSET_GPR;
|
|
+ Reg idx, src = RID_NONE, type = RID_NONE;
|
|
+ int32_t ofs = 0;
|
|
+#if !LJ_SOFTFP
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ src = ra_alloc1(as, ir->op2, RSET_FPR);
|
|
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024);
|
|
+ emit_vlso(as, ARMI_VSTR_D, src, idx, ofs);
|
|
+ }
|
|
+ else
|
|
+#endif
|
|
+ {
|
|
+ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP);
|
|
+ if (!irt_ispri(ir->t)) {
|
|
+ src = ra_alloc1(as, ir->op2, allow);
|
|
+ rset_clear(allow, src);
|
|
+ }
|
|
+ if (hiop)
|
|
+ type = ra_alloc1(as, (ir + 1)->op2, allow);
|
|
+ else
|
|
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
|
|
+ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096);
|
|
+ if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
|
|
+ emit_lso(as, ARMI_STR, type, idx, ofs + 4);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void asm_sload(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ int32_t ofs = 8*((int32_t)ir->op1 - 1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
|
|
+ int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP);
|
|
+ IRType t = hiop ? IRT_NUM : irt_type(ir->t);
|
|
+ Reg dest = RID_NONE, type = RID_NONE, base;
|
|
+ RegSet allow = RSET_GPR;
|
|
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
|
|
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
|
|
+ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
|
|
+ "inconsistent SLOAD variant");
|
|
+#if LJ_SOFTFP
|
|
+ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
|
|
+ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
|
|
+ if (hiop && ra_used(ir + 1)) {
|
|
+ type = ra_dest(as, ir + 1, allow);
|
|
+ rset_clear(allow, type);
|
|
+ }
|
|
+#else
|
|
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) {
|
|
+ dest = ra_scratch(as, RSET_FPR);
|
|
+ asm_tointg(as, ir, dest);
|
|
+ t = IRT_NUM; /* Continue with a regular number type check. */
|
|
+ }
|
|
+ else
|
|
+#endif
|
|
+ if (ra_used(ir)) {
|
|
+ Reg tmp = RID_NONE;
|
|
+ if ((ir->op2 & IRSLOAD_CONVERT))
|
|
+ tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
|
|
+ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
|
|
+ irt_isint(ir->t) || irt_isaddr(ir->t),
|
|
+ "bad SLOAD type %d", irt_type(ir->t));
|
|
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
|
|
+ rset_clear(allow, dest);
|
|
+ base = ra_alloc1(as, REF_BASE, allow);
|
|
+ if ((ir->op2 & IRSLOAD_CONVERT)) {
|
|
+ if (t == IRT_INT) {
|
|
+ emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
|
|
+ emit_tm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15));
|
|
+ t = IRT_NUM; /* Check for original type. */
|
|
+ }
|
|
+ else {
|
|
+ emit_tm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15));
|
|
+ emit_tn(as, ARMI_VMOV_S_R, tmp, (dest & 15));
|
|
+ t = IRT_INT; /* Check for original type. */
|
|
+ }
|
|
+ dest = tmp;
|
|
+ }
|
|
+ goto dotypecheck;
|
|
+ }
|
|
+ base = ra_alloc1(as, REF_BASE, allow);
|
|
+dotypecheck:
|
|
+ rset_clear(allow, base);
|
|
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
|
|
+ if (ra_noreg(type)) {
|
|
+ if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
|
|
+ rset_test((as->freeset & allow), dest + 1)) {
|
|
+ type = dest + 1;
|
|
+ ra_modified(as, type);
|
|
+ }
|
|
+ else {
|
|
+ type = RID_TMP;
|
|
+ }
|
|
+ }
|
|
+ asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
|
|
+ if ((ir->op2 & IRSLOAD_KEYINDEX)) {
|
|
+ emit_n(as, ARMC_K12(ARMI_CMN, 1), type);
|
|
+ emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type);
|
|
+ } else {
|
|
+ emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype_(t)), type);
|
|
+ }
|
|
+ }
|
|
+ if (ra_hasreg(dest)) {
|
|
+#if !LJ_SOFTFP
|
|
+ if (t == IRT_NUM) {
|
|
+ if (ofs < 1024) {
|
|
+ emit_vlso(as, ARMI_VLDR_D, dest, base, ofs);
|
|
+ }
|
|
+ else {
|
|
+ if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs + 4);
|
|
+ emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0);
|
|
+ emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+#endif
|
|
+ emit_lso(as, ARMI_LDR, dest, base, ofs);
|
|
+ }
|
|
+ if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs + 4);
|
|
+}
|
|
+
|
|
+/* -- Allocations --------------------------------------------------------- */
|
|
+
|
|
+#if LJ_HASFFI
|
|
+static void asm_cnew(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ CTState *cts = ctype_ctsG(J2G(as->J));
|
|
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
|
|
+ CTSize sz;
|
|
+ CTInfo info = lj_ctype_info(cts, id, &sz);
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
|
|
+ IRRef args[4];
|
|
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
|
|
+ RegSet drop = RSET_SCRATCH;
|
|
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
|
|
+ "bad CNEW/CNEWI operands");
|
|
+
|
|
+ as->gcsteps++;
|
|
+ if (ra_hasreg(ir->r))
|
|
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
|
|
+ ra_evictset(as, drop);
|
|
+ if (ra_used(ir))
|
|
+ ra_destreg(as, ir, RID_RET); /* GCcdata * */
|
|
+
|
|
+ /* Initialize immutable cdata object. */
|
|
+ if (ir->o == IR_CNEWI) {
|
|
+ int32_t ofs = sizeof(GCcdata);
|
|
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
|
|
+ if (sz == 8) {
|
|
+ ofs += 4; ir++;
|
|
+ lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI");
|
|
+ }
|
|
+ for (;;) {
|
|
+ Reg r = ra_alloc1(as, ir->op2, allow);
|
|
+ emit_lso(as, ARMI_STR, r, RID_RET, ofs);
|
|
+ rset_clear(allow, r);
|
|
+ if (ofs == sizeof(GCcdata)) break;
|
|
+ ofs -= 4; ir--;
|
|
+ }
|
|
+ }
|
|
+ else if (ir->op2 != REF_NIL) {
|
|
+ /* Create VLA/VLS/aligned cdata. */
|
|
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
|
|
+ args[0] = ASMREF_L; /* lua_State *L */
|
|
+ args[1] = ir->op1; /* CTypeID id */
|
|
+ args[2] = ir->op2; /* CTSize sz */
|
|
+ args[3] = ASMREF_TMP1; /* CTSize align */
|
|
+ asm_gencall(as, ci, args);
|
|
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
|
|
+ {
|
|
+ uint32_t k = emit_isk12(ARMI_MOV, id);
|
|
+ Reg r = k ? RID_R1 : ra_allock(as, id, allow);
|
|
+ emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
|
|
+ emit_lso(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
|
|
+ emit_d(as, ARMC_K12(ARMI_MOV, ~LJ_TCDATA), RID_TMP);
|
|
+ if (k) emit_d(as, ARMI_MOV ^ k, RID_R1);
|
|
+ }
|
|
+ args[0] = ASMREF_L; /* lua_State *L */
|
|
+ args[1] = ASMREF_TMP1; /* MSize size */
|
|
+ asm_gencall(as, ci, args);
|
|
+ ra_allockreg(as,
|
|
+ (int32_t)(sz + sizeof(GCcdata)),
|
|
+ ra_releasetmp(as, ASMREF_TMP1));
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* -- Write barriers ------------------------------------------------------ */
|
|
+
|
|
+static void asm_tbar(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
|
|
+ Reg gr = ra_allock(as,
|
|
+ i32ptr(J2G(as->J)),
|
|
+ rset_exclude(rset_exclude(RSET_GPR, tab), link));
|
|
+ Reg mark = RID_TMP;
|
|
+ MCLabel l_end = emit_label(as);
|
|
+ emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist));
|
|
+ emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
|
|
+ emit_lso(as,
|
|
+ ARMI_STR,
|
|
+ tab,
|
|
+ gr,
|
|
+ (int32_t)offsetof(global_State, gc.grayagain));
|
|
+ emit_dn(as, ARMC_K12(ARMI_BIC, LJ_GC_BLACK), mark, mark);
|
|
+ emit_lso(as,
|
|
+ ARMI_LDR,
|
|
+ link,
|
|
+ gr,
|
|
+ (int32_t)offsetof(global_State, gc.grayagain));
|
|
+ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
|
|
+ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_BLACK), mark);
|
|
+ emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
|
|
+}
|
|
+
|
|
+static void asm_obar(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
|
|
+ IRRef args[2];
|
|
+ MCLabel l_end;
|
|
+ Reg obj, val, tmp;
|
|
+ /* No need for other object barriers (yet). */
|
|
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
|
|
+ ra_evictset(as, RSET_SCRATCH);
|
|
+ l_end = emit_label(as);
|
|
+ args[0] = ASMREF_TMP1; /* global_State *g */
|
|
+ args[1] = ir->op1; /* TValue *tv */
|
|
+ asm_gencall(as, ci, args);
|
|
+ //if ((l_end[-1] >> 28) == CC_AL) jturnsek!!!
|
|
+ // l_end[-1] = ARMF_CC(l_end[-1], CC_NE);
|
|
+ //else {
|
|
+ emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end);
|
|
+ //}
|
|
+ ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1));
|
|
+ obj = IR(ir->op1)->r;
|
|
+ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
|
|
+ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_BLACK), tmp);
|
|
+ ARMI_IT(CC_NE);
|
|
+ emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_WHITES), RID_TMP);
|
|
+ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
|
|
+ emit_lso(as,
|
|
+ ARMI_LDRB,
|
|
+ tmp,
|
|
+ obj,
|
|
+ (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
|
|
+ emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
|
|
+}
|
|
+
|
|
+/* -- Arithmetic and logic operations ------------------------------------- */
|
|
+
|
|
+#if !LJ_SOFTFP
|
|
+static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
|
|
+ right = (left >> 8); left &= 255;
|
|
+ emit_tnm(as, ai, (dest & 15), (left & 15), (right & 15));
|
|
+}
|
|
+
|
|
+static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_FPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
|
|
+ emit_tm(as, ai, (dest & 15), (left & 15));
|
|
+}
|
|
+
|
|
+static void asm_callround(ASMState *as, IRIns *ir, int id)
|
|
+{
|
|
+ /* The modified regs must match with the *.dasc implementation. */
|
|
+ RegSet drop = RID2RSET(RID_R0) | RID2RSET(RID_R1) | RID2RSET(RID_R2) |
|
|
+ RID2RSET(RID_R3) | RID2RSET(RID_R12);
|
|
+ RegSet of;
|
|
+ Reg dest, src;
|
|
+ ra_evictset(as, drop);
|
|
+ dest = ra_dest(as, ir, RSET_FPR);
|
|
+ emit_tnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
|
|
+ emit_call(as,
|
|
+ id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
|
|
+ id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
|
|
+ (void *)lj_vm_trunc_sf);
|
|
+ /* Workaround to protect argument GPRs from being used for remat. */
|
|
+ of = as->freeset;
|
|
+ as->freeset &= ~RSET_RANGE(RID_R0, RID_R1 + 1);
|
|
+ as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
|
|
+ src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
|
|
+ as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1 + 1));
|
|
+ emit_tnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
|
|
+}
|
|
+
|
|
+static void asm_fpmath(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ if (ir->op2 <= IRFPM_TRUNC)
|
|
+ asm_callround(as, ir, ir->op2);
|
|
+ else if (ir->op2 == IRFPM_SQRT)
|
|
+ asm_fpunary(as, ir, ARMI_VSQRT_D);
|
|
+ else
|
|
+ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
|
|
+{
|
|
+ IRIns *ir;
|
|
+ if (irref_isk(rref))
|
|
+ return 0; /* Don't swap constants to the left. */
|
|
+ if (irref_isk(lref))
|
|
+ return 1; /* But swap constants to the right. */
|
|
+ ir = IR(rref);
|
|
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
|
|
+ (ir->o == IR_ADD && ir->op1 == ir->op2))
|
|
+ return 0; /* Don't swap fusable operands to the left. */
|
|
+ ir = IR(lref);
|
|
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) ||
|
|
+ (ir->o == IR_ADD && ir->op1 == ir->op2))
|
|
+ return 1; /* But swap fusable operands to the right. */
|
|
+ return 0; /* Otherwise don't swap. */
|
|
+}
|
|
+
|
|
+static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai)
|
|
+{
|
|
+ IRRef lref = ir->op1, rref = ir->op2;
|
|
+ Reg left, dest = ra_dest(as, ir, RSET_GPR);
|
|
+ uint32_t m, rs = 0;
|
|
+ if (asm_swapops(as, lref, rref)) {
|
|
+ IRRef tmp = lref; lref = rref; rref = tmp;
|
|
+ if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC)
|
|
+ ai ^= (ARMI_SUB ^ ARMI_RSB);
|
|
+ }
|
|
+ left = ra_hintalloc(as, lref, dest, RSET_GPR);
|
|
+ m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left), &rs);
|
|
+ if (irt_isguard(ir->t)) {
|
|
+ /* For IR_ADDOV etc. */
|
|
+ asm_guardcc(as, CC_VS);
|
|
+ ai |= ARMI_S;
|
|
+ }
|
|
+ emit_dn(as, ai ^ m, dest, left);
|
|
+ if (rs)
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16));
|
|
+}
|
|
+
|
|
+/* Try to drop cmp r, #0. */
|
|
+static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai) //jturnsek!!!
|
|
+{
|
|
+ if (as->flagmcp == as->mcp) {
|
|
+ uint32_t cc = ((as->mcp[1] >> 20) & 0xf);
|
|
+ as->flagmcp = NULL;
|
|
+ if (cc <= CC_NE) {
|
|
+ as->mcp++;
|
|
+ ai |= ARMI_S;
|
|
+ }
|
|
+ else if (cc == CC_GE) {
|
|
+ * ++as->mcp ^= ((CC_GE ^ CC_PL) << 20);
|
|
+ ai |= ARMI_S;
|
|
+ }
|
|
+ else if (cc == CC_LT) {
|
|
+ * ++as->mcp ^= ((CC_LT ^ CC_MI) << 20);
|
|
+ ai |= ARMI_S;
|
|
+ } /* else: other conds don't work in general. */
|
|
+ }
|
|
+ return ai;
|
|
+}
|
|
+
|
|
+static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
|
|
+{
|
|
+ asm_intop(as, ir, asm_drop_cmp0(as, ai));
|
|
+}
|
|
+
|
|
+static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ emit_dn(as, ARMC_K12(ai, 0), dest, left);
|
|
+}
|
|
+
|
|
+/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
|
|
+static void asm_intmul(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
|
|
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ Reg tmp = RID_NONE;
|
|
+ /* ARMv5 restriction: dest != left and dest_hi != left. */
|
|
+ if (dest == left && left != right) { left = right; right = dest; }
|
|
+ if (irt_isguard(ir->t)) {
|
|
+ /* IR_MULOV */
|
|
+ if (!(as->flags & JIT_F_ARMV6) && dest == left)
|
|
+ tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left));
|
|
+ asm_guardcc(as, CC_NE);
|
|
+ emit_nm(as, ARMI_TEQ | ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest);
|
|
+ emit_dnm(as, ARMI_SMULL | ARMF_T(dest), RID_TMP, left, right);
|
|
+ }
|
|
+ else {
|
|
+ if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP;
|
|
+ emit_dnm(as, ARMI_MUL, dest, left, right);
|
|
+ }
|
|
+ /* Only need this for the dest == left == right case. */
|
|
+ if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right);
|
|
+}
|
|
+
|
|
+static void asm_add(ASMState *as, IRIns *ir)
|
|
+{
|
|
+#if !LJ_SOFTFP
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D))
|
|
+ asm_fparith(as, ir, ARMI_VADD_D);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ asm_intop_s(as, ir, ARMI_ADD);
|
|
+}
|
|
+
|
|
+static void asm_sub(ASMState *as, IRIns *ir)
|
|
+{
|
|
+#if !LJ_SOFTFP
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D))
|
|
+ asm_fparith(as, ir, ARMI_VSUB_D);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ asm_intop_s(as, ir, ARMI_SUB);
|
|
+}
|
|
+
|
|
+static void asm_mul(ASMState *as, IRIns *ir)
|
|
+{
|
|
+#if !LJ_SOFTFP
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ asm_fparith(as, ir, ARMI_VMUL_D);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ asm_intmul(as, ir);
|
|
+}
|
|
+
|
|
+#define asm_addov(as, ir) asm_add(as, ir)
|
|
+#define asm_subov(as, ir) asm_sub(as, ir)
|
|
+#define asm_mulov(as, ir) asm_mul(as, ir)
|
|
+
|
|
+#if !LJ_SOFTFP
|
|
+#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
|
|
+#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
|
|
+#endif
|
|
+
|
|
+static void asm_neg(ASMState *as, IRIns *ir)
|
|
+{
|
|
+#if !LJ_SOFTFP
|
|
+ if (irt_isnum(ir->t)) {
|
|
+ asm_fpunary(as, ir, ARMI_VNEG_D);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ asm_intneg(as, ir, ARMI_RSB);
|
|
+}
|
|
+
|
|
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
|
|
+{
|
|
+ ai = asm_drop_cmp0(as, ai);
|
|
+ if (ir->op2 == 0) {
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ uint32_t rs = 0;
|
|
+ uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR, &rs);
|
|
+ emit_d(as, ai ^ m, dest);
|
|
+ if (rs)
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16));
|
|
+ }
|
|
+ else {
|
|
+ /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
|
|
+ asm_intop(as, ir, ai);
|
|
+ }
|
|
+}
|
|
+
|
|
+#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
|
|
+
|
|
+static void asm_bswap(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ if ((as->flags & JIT_F_ARMV6)) {
|
|
+ emit_dnm(as, ARMI_REV, dest, left, left);
|
|
+ }
|
|
+ else {
|
|
+ Reg tmp2 = dest;
|
|
+ if (tmp2 == left)
|
|
+ tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left));
|
|
+ emit_dnm(as, ARMI_EOR | ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP);
|
|
+ emit_dm(as, ARMI_MOV | ARMF_SH(ARMSH_ROR, 8), tmp2, left);
|
|
+ emit_dn(as, ARMC_K12(ARMI_BIC, 256 * 8 | 127), RID_TMP, RID_TMP);
|
|
+ emit_dnm(as, ARMI_EOR | ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left);
|
|
+ }
|
|
+}
|
|
+
|
|
+#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
|
|
+#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
|
|
+#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
|
|
+
|
|
+static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
|
|
+{
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ /* Constant shifts. */
|
|
+ /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */
|
|
+ /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ int32_t shift = (IR(ir->op2)->i & 31);
|
|
+ emit_dm(as, ARMI_MOV | ARMF_SH(sh, shift), dest, left);
|
|
+ }
|
|
+ else {
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
|
|
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | ARMF_RSH(sh, right), dest, left);
|
|
+ }
|
|
+}
|
|
+
|
|
+#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
|
|
+#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
|
|
+#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
|
|
+#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
|
|
+#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
|
|
+
|
|
+static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
|
|
+{
|
|
+ uint32_t kcmp = 0, kmov = 0;
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ Reg right = 0;
|
|
+ if (irref_isk(ir->op2)) {
|
|
+ kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i);
|
|
+ if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i);
|
|
+ }
|
|
+ if (!kmov) {
|
|
+ kcmp = 0;
|
|
+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
|
|
+ }
|
|
+ if (kmov || dest != right) {
|
|
+ emit_dm(as, ARMI_MOV ^ kmov, dest, right);
|
|
+ ARMI_IT(cc);
|
|
+ cc ^= 1; /* Must use opposite conditions for paired moves. */
|
|
+ }
|
|
+ else {
|
|
+ cc ^= (CC_LT ^ CC_GT); /* Otherwise may swap CC_LT <-> CC_GT. */
|
|
+ }
|
|
+ if (dest != left) {
|
|
+ emit_dm(as, ARMI_MOV, dest, left);
|
|
+ ARMI_IT(cc);
|
|
+ }
|
|
+ emit_nm(as, ARMI_CMP ^ kcmp, left, right);
|
|
+}
|
|
+
|
|
+#if LJ_SOFTFP
|
|
+static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc)
|
|
+{
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
|
|
+ RegSet drop = RSET_SCRATCH;
|
|
+ Reg r;
|
|
+ IRRef args[4];
|
|
+ args[0] = ir->op1; args[1] = (ir + 1)->op1;
|
|
+ args[2] = ir->op2; args[3] = (ir + 1)->op2;
|
|
+ /* __aeabi_cdcmple preserves r0-r3. */
|
|
+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
|
|
+ if (ra_hasreg((ir + 1)->r)) rset_clear(drop, (ir + 1)->r);
|
|
+ if (!rset_test(as->freeset, RID_R2) &&
|
|
+ regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2);
|
|
+ if (!rset_test(as->freeset, RID_R3) &&
|
|
+ regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3);
|
|
+ ra_evictset(as, drop);
|
|
+ ra_destpair(as, ir);
|
|
+ emit_dm(as, ARMI_MOV, RID_RETHI, RID_R3);
|
|
+ ARMI_IT(cc);
|
|
+ emit_dm(as, ARMI_MOV, RID_RETLO, RID_R2);
|
|
+ ARMI_IT(cc);
|
|
+ emit_call(as, (void *)ci->func);
|
|
+ for (r = RID_R0; r <= RID_R3; r++)
|
|
+ ra_leftov(as, r, args[r - RID_R0]);
|
|
+}
|
|
+#else
|
|
+static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc)
|
|
+{
|
|
+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
|
|
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
|
|
+ right = ((left >> 8) & 15); left &= 15;
|
|
+ if (dest != left) {
|
|
+ emit_tm(as, ARMI_VMOV_D, dest, left);
|
|
+ ARMI_IT(cc ^ 1);
|
|
+ }
|
|
+ if (dest != right) {
|
|
+ emit_tm(as, ARMI_VMOV_D, dest, right);
|
|
+ ARMI_IT(cc);
|
|
+ }
|
|
+ emit_t(as, ARMI_VMRS, 0);
|
|
+ emit_tm(as, ARMI_VCMP_D, left, right);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
|
|
+{
|
|
+#if LJ_SOFTFP
|
|
+ UNUSED(fcc);
|
|
+#else
|
|
+ if (irt_isnum(ir->t))
|
|
+ asm_fpmin_max(as, ir, fcc);
|
|
+ else
|
|
+#endif
|
|
+ asm_intmin_max(as, ir, cc);
|
|
+}
|
|
+
|
|
+#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL)
|
|
+#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE)
|
|
+
|
|
+/* -- Comparisons --------------------------------------------------------- */
|
|
+
|
|
+/* Map of comparisons to flags. ORDER IR. */
|
|
+static const uint8_t asm_compmap[IR_ABC + 1] = {
|
|
+ /* op FP swp int cc FP cc */
|
|
+ /* LT */ CC_GE + (CC_HS << 4),
|
|
+ /* GE x */ CC_LT + (CC_HI << 4),
|
|
+ /* LE */ CC_GT + (CC_HI << 4),
|
|
+ /* GT x */ CC_LE + (CC_HS << 4),
|
|
+ /* ULT x */ CC_HS + (CC_LS << 4),
|
|
+ /* UGE */ CC_LO + (CC_LO << 4),
|
|
+ /* ULE x */ CC_HI + (CC_LO << 4),
|
|
+ /* UGT */ CC_LS + (CC_LS << 4),
|
|
+ /* EQ */ CC_NE + (CC_NE << 4),
|
|
+ /* NE */ CC_EQ + (CC_EQ << 4),
|
|
+ /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */
|
|
+};
|
|
+
|
|
+#if LJ_SOFTFP
|
|
+/* FP comparisons. */
|
|
+static void asm_sfpcomp(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
|
|
+ RegSet drop = RSET_SCRATCH;
|
|
+ Reg r;
|
|
+ IRRef args[4];
|
|
+ int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1);
|
|
+ args[swp ^ 0] = ir->op1; args[swp ^ 1] = (ir + 1)->op1;
|
|
+ args[swp ^ 2] = ir->op2; args[swp ^ 3] = (ir + 1)->op2;
|
|
+ /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */
|
|
+ for (r = RID_R0; r <= RID_R3; r++)
|
|
+ if (!rset_test(as->freeset, r) &&
|
|
+ regcost_ref(as->cost[r]) == args[r - RID_R0]) rset_clear(drop, r);
|
|
+ ra_evictset(as, drop);
|
|
+ asm_guardcc(as, (asm_compmap[ir->o] >> 4));
|
|
+ emit_call(as, (void *)ci->func);
|
|
+ for (r = RID_R0; r <= RID_R3; r++)
|
|
+ ra_leftov(as, r, args[r - RID_R0]);
|
|
+}
|
|
+#else
|
|
+/* FP comparisons. */
|
|
+static void asm_fpcomp(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ Reg left, right;
|
|
+ ARMIns ai;
|
|
+ int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
|
|
+ if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
|
|
+ left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15);
|
|
+ right = 0;
|
|
+ ai = ARMI_VCMPZ_D;
|
|
+ }
|
|
+ else {
|
|
+ left = ra_alloc2(as, ir, RSET_FPR);
|
|
+ if (swp) {
|
|
+ right = (left & 15); left = ((left >> 8) & 15);
|
|
+ }
|
|
+ else {
|
|
+ right = ((left >> 8) & 15); left &= 15;
|
|
+ }
|
|
+ ai = ARMI_VCMP_D;
|
|
+ }
|
|
+ asm_guardcc(as, (asm_compmap[ir->o] >> 4));
|
|
+ emit_t(as, ARMI_VMRS, 0);
|
|
+ emit_tm(as, ai, left, right);
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* Integer comparisons. */
|
|
+static void asm_intcomp(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ ARMCC cc = (asm_compmap[ir->o] & 15);
|
|
+ IRRef lref = ir->op1, rref = ir->op2;
|
|
+ Reg left;
|
|
+ uint32_t m, rs = 0;
|
|
+ int cmpprev0 = 0;
|
|
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
|
|
+ "bad comparison data type %d", irt_type(ir->t));
|
|
+ if (asm_swapops(as, lref, rref)) {
|
|
+ Reg tmp = lref; lref = rref; rref = tmp;
|
|
+ if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
|
|
+ else if(cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
|
|
+ }
|
|
+ if (irref_isk(rref) && IR(rref)->i == 0) {
|
|
+ IRIns *irl = IR(lref);
|
|
+ cmpprev0 = (irl + 1 == ir);
|
|
+ /* Combine comp(BAND(left, right), 0) into tst left, right. */
|
|
+ if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
|
|
+ IRRef blref = irl->op1, brref = irl->op2;
|
|
+ uint32_t m2 = 0;
|
|
+ Reg bleft;
|
|
+ if (asm_swapops(as, blref, brref)) {
|
|
+ Reg tmp = blref; blref = brref; brref = tmp;
|
|
+ }
|
|
+ if (irref_isk(brref)) {
|
|
+ m2 = emit_isk12(ARMI_AND, IR(brref)->i);
|
|
+ if ((m2 & (ARMI_AND ^ ARMI_BIC)))
|
|
+ goto notst; /* Not beneficial if we miss a constant operand. */
|
|
+ }
|
|
+ if (cc == CC_GE) cc = CC_PL;
|
|
+ else if (cc == CC_LT) cc = CC_MI;
|
|
+ else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */
|
|
+ bleft = ra_alloc1(as, blref, RSET_GPR);
|
|
+ if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft), &rs);
|
|
+ asm_guardcc(as, cc);
|
|
+ emit_n(as, ARMI_TST ^ m2, bleft);
|
|
+ if (rs)
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m2 >> 16), (m2 >> 16));
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+notst:
|
|
+ left = ra_alloc1(as, lref, RSET_GPR);
|
|
+ m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left), &rs);
|
|
+ asm_guardcc(as, cc);
|
|
+ emit_n(as, ARMI_CMP ^ m, left);
|
|
+ if (rs)
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16));
|
|
+ /* Signed comparison with zero and referencing previous ins? */
|
|
+ if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE))
|
|
+ as->flagmcp = as->mcp; /* Allow elimination of the compare. */
|
|
+}
|
|
+
|
|
+static void asm_comp(ASMState *as, IRIns *ir)
|
|
+{
|
|
+#if !LJ_SOFTFP
|
|
+ if (irt_isnum(ir->t))
|
|
+ asm_fpcomp(as, ir);
|
|
+ else
|
|
+#endif
|
|
+ asm_intcomp(as, ir);
|
|
+}
|
|
+
|
|
+#define asm_equal(as, ir) asm_comp(as, ir)
|
|
+
|
|
+#if LJ_HASFFI
|
|
+/* 64 bit integer comparisons. */
|
|
+static void asm_int64comp(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ int signedcomp = (ir->o <= IR_GT);
|
|
+ ARMCC cclo, cchi;
|
|
+ Reg leftlo, lefthi;
|
|
+ uint32_t mlo, mhi, rslo = 0, rshi = 0;
|
|
+ RegSet allow = RSET_GPR, oldfree;
|
|
+
|
|
+ /* Always use unsigned comparison for loword. */
|
|
+ cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15;
|
|
+ leftlo = ra_alloc1(as, ir->op1, allow);
|
|
+ oldfree = as->freeset;
|
|
+ mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo), &rslo);
|
|
+ allow &= ~(oldfree & ~as->freeset); /* Update for allocs of asm_fuseopm. */
|
|
+
|
|
+ /* Use signed or unsigned comparison for hiword. */
|
|
+ cchi = asm_compmap[ir->o] & 15;
|
|
+ lefthi = ra_alloc1(as, (ir + 1)->op1, allow);
|
|
+ mhi = asm_fuseopm(as, ARMI_CMP, (ir + 1)->op2, rset_clear(allow, lefthi), &rshi);
|
|
+
|
|
+ /* All register allocations must be performed _before_ this point. */
|
|
+ if (signedcomp) {
|
|
+ MCLabel l_around = emit_label(as);
|
|
+ asm_guardcc(as, cclo);
|
|
+ emit_n(as, ARMI_CMP ^ mlo, leftlo);
|
|
+ if (rslo)
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rslo, (mlo >> 16), (mlo >> 16));
|
|
+ emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around);
|
|
+ if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6; /* GE -> GT, LE -> LT */
|
|
+ asm_guardcc(as, cchi);
|
|
+ }
|
|
+ else {
|
|
+ asm_guardcc(as, cclo);
|
|
+ emit_n(as, ARMI_CMP ^ mlo, leftlo);
|
|
+ if (rslo)
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rslo, (mlo >> 16), (mlo >> 16));
|
|
+ ARMI_IT(CC_EQ);
|
|
+ }
|
|
+ emit_n(as, ARMI_CMP ^ mhi, lefthi);
|
|
+ if (rshi)
|
|
+ emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rshi, (mhi >> 16), (mhi >> 16));
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* -- Split register ops -------------------------------------------------- */
|
|
+
|
|
+/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
|
|
+static void asm_hiop(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ /* HIOP is marked as a store because it needs its own DCE logic. */
|
|
+ int uselo = ra_used(ir - 1), usehi = ra_used(ir); /* Loword/hiword used? */
|
|
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
|
|
+#if LJ_HASFFI || LJ_SOFTFP
|
|
+ if ((ir - 1)->o <= IR_NE) {
|
|
+ /* 64 bit integer or FP comparisons. ORDER IR. */
|
|
+ as->curins--; /* Always skip the loword comparison. */
|
|
+#if LJ_SOFTFP
|
|
+ if (!irt_isint(ir->t)) {
|
|
+ asm_sfpcomp(as, ir - 1);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+#if LJ_HASFFI
|
|
+ asm_int64comp(as, ir - 1);
|
|
+#endif
|
|
+ return;
|
|
+#if LJ_SOFTFP
|
|
+ }
|
|
+ else if ((ir - 1)->o == IR_MIN || (ir - 1)->o == IR_MAX) {
|
|
+ as->curins--; /* Always skip the loword min/max. */
|
|
+ if (uselo || usehi)
|
|
+ asm_sfpmin_max(as, ir - 1, (ir - 1)->o == IR_MIN ? CC_PL : CC_LE);
|
|
+ return;
|
|
+#elif LJ_HASFFI
|
|
+ }
|
|
+ else if ((ir - 1)->o == IR_CONV) {
|
|
+ as->curins--; /* Always skip the CONV. */
|
|
+ if (usehi || uselo)
|
|
+ asm_conv64(as, ir);
|
|
+ return;
|
|
+#endif
|
|
+ }
|
|
+ else if ((ir - 1)->o == IR_XSTORE) {
|
|
+ if ((ir - 1)->r != RID_SINK)
|
|
+ asm_xstore_(as, ir, 4);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
|
|
+ switch ((ir - 1)->o) {
|
|
+#if LJ_HASFFI
|
|
+ case IR_ADD:
|
|
+ as->curins--;
|
|
+ asm_intop(as, ir, ARMI_ADC);
|
|
+ asm_intop(as, ir - 1, ARMI_ADD | ARMI_S);
|
|
+ break;
|
|
+ case IR_SUB:
|
|
+ as->curins--;
|
|
+ asm_intop(as, ir, ARMI_SBC);
|
|
+ asm_intop(as, ir - 1, ARMI_SUB | ARMI_S);
|
|
+ break;
|
|
+ case IR_NEG:
|
|
+ as->curins--;
|
|
+ {
|
|
+ /* asm_intnegr */
|
|
+ Reg dest = ra_dest(as, ir, RSET_GPR);
|
|
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
|
|
+ emit_dn(as, ARMC_K12(ARMI_SBC, 0), left, dest);
|
|
+ }
|
|
+ asm_intneg(as, ir - 1, ARMI_RSB | ARMI_S);
|
|
+ break;
|
|
+ case IR_CNEWI:
|
|
+ /* Nothing to do here. Handled by lo op itself. */
|
|
+ break;
|
|
+#endif
|
|
+#if LJ_SOFTFP
|
|
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
|
|
+ case IR_STRTO:
|
|
+ if (!uselo)
|
|
+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
|
|
+ break;
|
|
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
|
|
+ /* Nothing to do here. Handled by lo op itself. */
|
|
+ break;
|
|
+#endif
|
|
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
|
|
+ if (!uselo)
|
|
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
|
|
+ break;
|
|
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* -- Profiling ----------------------------------------------------------- */
|
|
+
|
|
+static void asm_prof(ASMState *as, IRIns *ir)
|
|
+{
|
|
+ UNUSED(ir);
|
|
+ asm_guardcc(as, CC_NE);
|
|
+ emit_n(as, ARMC_K12(ARMI_TST, HOOK_PROFILE), RID_TMP);
|
|
+ emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
|
|
+}
|
|
+
|
|
+/* -- Stack handling ------------------------------------------------------ */
|
|
+
|
|
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
|
|
+static void asm_stack_check(ASMState *as,
|
|
+ BCReg topslot,
|
|
+ IRIns *irp,
|
|
+ RegSet allow,
|
|
+ ExitNo exitno)
|
|
+{
|
|
+ Reg pbase;
|
|
+ uint32_t k;
|
|
+ if (irp) {
|
|
+ if (!ra_hasspill(irp->s)) {
|
|
+ pbase = irp->r;
|
|
+ lj_assertA(ra_hasreg(pbase), "base reg lost");
|
|
+ }
|
|
+ else if (allow) {
|
|
+ pbase = rset_pickbot(allow);
|
|
+ }
|
|
+ else {
|
|
+ pbase = RID_RET;
|
|
+ emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ pbase = RID_BASE;
|
|
+ }
|
|
+ emit_branchlink(as, ARMI_BL, exitstub_addr(as->J, exitno));
|
|
+ ARMI_IT(CC_LS);
|
|
+ k = emit_isk12(0, (int32_t)(8*topslot));
|
|
+ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
|
|
+ emit_n(as, ARMI_CMP ^ k, RID_TMP);
|
|
+ emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
|
|
+ emit_lso(as,
|
|
+ ARMI_LDR,
|
|
+ RID_TMP,
|
|
+ RID_TMP,
|
|
+ (int32_t)offsetof(lua_State, maxstack));
|
|
+ if (irp) {
|
|
+ /* Must not spill arbitrary registers in head of side trace. */
|
|
+ int32_t i = i32ptr(&J2G(as->J)->cur_L);
|
|
+ if (ra_hasspill(irp->s))
|
|
+ emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
|
|
+ emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
|
|
+ if (ra_hasspill(irp->s) && !allow)
|
|
+ emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
|
|
+ emit_loadi(as, RID_TMP, (i & ~4095));
|
|
+ }
|
|
+ else {
|
|
+ emit_getgl(as, RID_TMP, cur_L);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Restore Lua stack from on-trace state. */
|
|
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
|
|
+{
|
|
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
|
|
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap) - 1];
|
|
+ MSize n, nent = snap->nent;
|
|
+ /* Store the value of all modified slots to the Lua stack. */
|
|
+ for (n = 0; n < nent; n++) {
|
|
+ SnapEntry sn = map[n];
|
|
+ BCReg s = snap_slot(sn);
|
|
+ int32_t ofs = 8*((int32_t)s - 1);
|
|
+ IRRef ref = snap_ref(sn);
|
|
+ IRIns *ir = IR(ref);
|
|
+ if ((sn & SNAP_NORESTORE))
|
|
+ continue;
|
|
+ if (irt_isnum(ir->t)) {
|
|
+#if LJ_SOFTFP
|
|
+ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
|
|
+ Reg tmp;
|
|
+ /* LJ_SOFTFP: must be a number constant. */
|
|
+ lj_assertA(irref_isk(ref), "unsplit FP op");
|
|
+ tmp = ra_allock(as,
|
|
+ (int32_t)ir_knum(ir)->u32.lo,
|
|
+ rset_exclude(RSET_GPREVEN, RID_BASE));
|
|
+ emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
|
|
+ if (rset_test(as->freeset, tmp + 1)) odd = RID2RSET(tmp + 1);
|
|
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd);
|
|
+ emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs + 4);
|
|
+#else
|
|
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
|
|
+ emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs);
|
|
+#endif
|
|
+ }
|
|
+ else {
|
|
+ RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
|
|
+ Reg type;
|
|
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
|
|
+ "restore of IR type %d", irt_type(ir->t));
|
|
+ if (!irt_ispri(ir->t)) {
|
|
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
|
|
+ emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
|
|
+ if (rset_test(as->freeset, src + 1)) odd = RID2RSET(src + 1);
|
|
+ }
|
|
+ if ((sn & (SNAP_CONT | SNAP_FRAME))) {
|
|
+ if (s == 0) continue; /* Do not overwrite link to previous frame. */
|
|
+ type = ra_allock(as, (int32_t)(*flinks--), odd);
|
|
+#if LJ_SOFTFP
|
|
+ }
|
|
+ else if ((sn & SNAP_SOFTFPNUM)) {
|
|
+ type = ra_alloc1(as, ref + 1, rset_exclude(RSET_GPRODD, RID_BASE));
|
|
+#endif
|
|
+ } else if ((sn & SNAP_KEYINDEX)) {
|
|
+ type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
|
|
+ }
|
|
+ else {
|
|
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
|
|
+ }
|
|
+ emit_lso(as, ARMI_STR, type, RID_BASE, ofs + 4);
|
|
+ }
|
|
+ checkmclim(as);
|
|
+ }
|
|
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
|
|
+}
|
|
+
|
|
+/* -- GC handling --------------------------------------------------------- */
|
|
+
|
|
+/* Marker to prevent patching the GC check exit. */
|
|
+#define ARM_NOPATCH_GC_CHECK (ARMC_K12(ARMI_BIC, 0))
|
|
+
|
|
+/* Check GC threshold and do one or more GC steps. */
|
|
+static void asm_gc_check(ASMState *as)
|
|
+{
|
|
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
|
|
+ IRRef args[2];
|
|
+ MCLabel l_end;
|
|
+ Reg tmp1, tmp2;
|
|
+ ra_evictset(as, RSET_SCRATCH);
|
|
+ l_end = emit_label(as);
|
|
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
|
|
+ asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */
|
|
+ * --as->mcp = ARM_NOPATCH_GC_CHECK;
|
|
+ emit_n(as, ARMC_K12(ARMI_CMP, 0), RID_RET);
|
|
+ args[0] = ASMREF_TMP1; /* global_State *g */
|
|
+ args[1] = ASMREF_TMP2; /* MSize steps */
|
|
+ asm_gencall(as, ci, args);
|
|
+ tmp1 = ra_releasetmp(as, ASMREF_TMP1);
|
|
+ tmp2 = ra_releasetmp(as, ASMREF_TMP2);
|
|
+ emit_loadi(as, tmp2, as->gcsteps);
|
|
+ /* Jump around GC step if GC total < GC threshold. */
|
|
+ emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end);
|
|
+ emit_nm(as, ARMI_CMP, RID_TMP, tmp2);
|
|
+ emit_lso(as,
|
|
+ ARMI_LDR,
|
|
+ tmp2,
|
|
+ tmp1,
|
|
+ (int32_t)offsetof(global_State, gc.threshold));
|
|
+ emit_lso(as,
|
|
+ ARMI_LDR,
|
|
+ RID_TMP,
|
|
+ tmp1,
|
|
+ (int32_t)offsetof(global_State, gc.total));
|
|
+ ra_allockreg(as, i32ptr(J2G(as->J)), tmp1);
|
|
+ as->gcsteps = 0;
|
|
+ checkmclim(as);
|
|
+}
|
|
+
|
|
+/* -- Loop handling ------------------------------------------------------- */
|
|
+
|
|
+/* Fixup the loop branch. */
|
|
+static void asm_loop_fixup(ASMState *as)
|
|
+{
|
|
+ MCode *p = as->mctop;
|
|
+ MCode *target = as->mcp;
|
|
+ if (as->loopinv) {
|
|
+ /* Inverted loop branch? */
|
|
+ /* asm_guardcc already inverted the bcc and patched the final bl. */
|
|
+ p[-2] |= ARMC_B((uint32_t)((target - p + 1) << 1));
|
|
+ }
|
|
+ else {
|
|
+ p[-1] = ARMI_B_T4 | ARMC_BL((uint32_t)((target - p) << 1));
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Fixup the tail of the loop. */
|
|
+static void asm_loop_tail_fixup(ASMState *as)
|
|
+{
|
|
+ UNUSED(as); /* Nothing to do. */
|
|
+}
|
|
+
|
|
+/* -- Head of trace ------------------------------------------------------- */
|
|
+
|
|
+/* Reload L register from g->cur_L. */
|
|
+static void asm_head_lreg(ASMState *as)
|
|
+{
|
|
+ IRIns *ir = IR(ASMREF_L);
|
|
+ if (ra_used(ir)) {
|
|
+ Reg r = ra_dest(as, ir, RSET_GPR);
|
|
+ emit_getgl(as, r, cur_L);
|
|
+ ra_evictk(as);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Coalesce BASE register for a root trace. */
|
|
+static void asm_head_root_base(ASMState *as)
|
|
+{
|
|
+ IRIns *ir;
|
|
+ asm_head_lreg(as);
|
|
+ ir = IR(REF_BASE);
|
|
+ if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
|
|
+ ra_spill(as, ir);
|
|
+ ra_destreg(as, ir, RID_BASE);
|
|
+}
|
|
+
|
|
+/* Coalesce BASE register for a side trace. */
|
|
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
|
|
+{
|
|
+ IRIns *ir;
|
|
+ asm_head_lreg(as);
|
|
+ ir = IR(REF_BASE);
|
|
+ if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
|
|
+ ra_spill(as, ir);
|
|
+ if (ra_hasspill(irp->s)) {
|
|
+ rset_clear(allow, ra_dest(as, ir, allow));
|
|
+ }
|
|
+ else {
|
|
+ Reg r = irp->r;
|
|
+ lj_assertA(ra_hasreg(r), "base reg lost");
|
|
+ rset_clear(allow, r);
|
|
+ if (r != ir->r && !rset_test(as->freeset, r))
|
|
+ ra_restore(as, regcost_ref(as->cost[r]));
|
|
+ ra_destreg(as, ir, r);
|
|
+ }
|
|
+ return allow;
|
|
+}
|
|
+
|
|
+/* -- Tail of trace ------------------------------------------------------- */
|
|
+
|
|
+/* Fixup the tail code. */
|
|
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
|
|
+{
|
|
+ MCode *p = as->mctop;
|
|
+ MCode *target;
|
|
+ int32_t spadj = as->T->spadjust;
|
|
+ if (spadj == 0) {
|
|
+ as->mctop = --p;
|
|
+ }
|
|
+ else {
|
|
+ /* Patch stack adjustment. */
|
|
+ uint32_t k = emit_isk12(ARMI_ADD, spadj);
|
|
+ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
|
|
+ p[-2] = (ARMI_ADD ^ k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
|
|
+ }
|
|
+ /* Patch exit branch. */
|
|
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)as->J->exitstubgroup[0] - 8;//lj_vm_exit_interp;
|
|
+ p[-1] = ARMI_B_T4 | ARMC_BL((target - p) << 1);
|
|
+}
|
|
+
|
|
+/* Prepare tail of code. */
|
|
+static void asm_tail_prep(ASMState *as)
|
|
+{
|
|
+ MCode *p = as->mctop - 1; /* Leave room for exit branch. */
|
|
+ if (as->loopref) {
|
|
+ as->invmcp = as->mcp = p;
|
|
+ }
|
|
+ else {
|
|
+ as->mcp = p - 1; /* Leave room for stack pointer adjustment. */
|
|
+ as->invmcp = NULL;
|
|
+ }
|
|
+ *p = 0; /* Prevent load/store merging. */
|
|
+}
|
|
+
|
|
+/* -- Trace setup --------------------------------------------------------- */
|
|
+
|
|
+/* Ensure there are enough stack slots for call arguments. */
|
|
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
|
+{
|
|
+ IRRef args[CCI_NARGS_MAX * 2];
|
|
+ uint32_t i, nargs = CCI_XNARGS(ci);
|
|
+ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
|
|
+ asm_collectargs(as, ir, ci, args);
|
|
+ for (i = 0; i < nargs; i++) {
|
|
+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
|
|
+ if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) {
|
|
+ if (irt_isnum(IR(args[i])->t)) {
|
|
+ if (nfpr > 0) nfpr--;
|
|
+ else fprodd = 0, nslots = (nslots + 3) & ~1;
|
|
+ }
|
|
+ else {
|
|
+ if (fprodd) fprodd--;
|
|
+ else if (nfpr > 0) fprodd = 1, nfpr--;
|
|
+ else nslots++;
|
|
+ }
|
|
+ }
|
|
+ else if (irt_isnum(IR(args[i])->t)) {
|
|
+ ngpr &= ~1;
|
|
+ if (ngpr > 0) ngpr -= 2;
|
|
+ else nslots += 2;
|
|
+ }
|
|
+ else {
|
|
+ if (ngpr > 0) ngpr--;
|
|
+ else nslots++;
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ if (ngpr > 0) ngpr--;
|
|
+ else nslots++;
|
|
+ }
|
|
+ }
|
|
+ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
|
|
+ as->evenspill = nslots;
|
|
+ return REGSP_HINT(RID_RET);
|
|
+}
|
|
+
|
|
+static void asm_setup_target(ASMState *as)
|
|
+{
|
|
+ /* May need extra exit for asm_stack_check on side traces. */
|
|
+ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
|
|
+}
|
|
+
|
|
+/* -- Trace patching ------------------------------------------------------ */
|
|
+
|
|
+/* Patch exit jumps of existing machine code to a new target. */
|
|
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
|
|
+{
|
|
+ MCode *p = T->mcode;
|
|
+ MCode *pe = (MCode *)((char *)p + T->szmcode);
|
|
+ MCode *cstart = NULL, *cend = p;
|
|
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
|
|
+ MCode *px = exitstub_addr(J, exitno) - 1;
|
|
+ for (; p < pe; p++) {
|
|
+ /* Look for bl_cc exitstub, replace with b_cc target. */
|
|
+ uint32_t ins = *p;
|
|
+ if ((ins & 0xd000f800u) == 0xd000f000u &&
|
|
+ (((ARMC_BL_READ(ins) >> 1) ^ (px - p)) & 0x007fffffu) == 0 &&
|
|
+ p[-1] != ARM_NOPATCH_GC_CHECK) {
|
|
+ *p = ARMI_B_T4 | ARMC_BL((uint32_t)(((target - p) - 1) << 1));
|
|
+ cend = p + 1;
|
|
+ if (!cstart) cstart = p;
|
|
+ }
|
|
+ }
|
|
+ lj_assertJ(cstart != NULL, "exit stub %d not found", exitno);
|
|
+ lj_mcode_sync(cstart, cend);
|
|
+ lj_mcode_patch(J, mcarea, 1);
|
|
+}
|
|
+
|
|
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
|
|
index 43e44305..2e6b6fd4 100644
|
|
--- a/src/lj_ccallback.c
|
|
+++ b/src/lj_ccallback.c
|
|
@@ -151,7 +151,7 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
|
|
uint32_t *p = page;
|
|
void *target = (void *)lj_vm_ffi_callback;
|
|
MSize slot;
|
|
- /* This must match with the saveregs macro in buildvm_arm.dasc. */
|
|
+ /* This must match with the saveregs macro in buildvm_arm.dasc. */ //jturnsek!!!
|
|
*p++ = ARMI_SUB|ARMF_D(RID_R12)|ARMF_N(RID_R12)|ARMF_M(RID_PC);
|
|
*p++ = ARMI_PUSH|ARMF_N(RID_SP)|RSET_RANGE(RID_R4,RID_R11+1)|RID2RSET(RID_LR);
|
|
*p++ = ARMI_SUB|ARMI_K12|ARMF_D(RID_R12)|ARMF_N(RID_R12)|CALLBACK_MCODE_HEAD;
|
|
@@ -295,7 +295,7 @@ static void callback_mcode_new(CTState *cts)
|
|
DWORD oprot;
|
|
LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
|
|
}
|
|
-#elif LJ_TARGET_POSIX
|
|
+#elif LJ_TARGET_POSIX && !LJ_TARGET_NUTTX
|
|
mprotect(p, sz, (PROT_READ|PROT_EXEC));
|
|
#endif
|
|
}
|
|
diff --git a/src/lj_clib.c b/src/lj_clib.c
|
|
index f0ef6edd..0fc6419b 100644
|
|
--- a/src/lj_clib.c
|
|
+++ b/src/lj_clib.c
|
|
@@ -50,6 +50,8 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L)
|
|
#define CLIB_SOEXT "%s.dylib"
|
|
#elif LJ_TARGET_CYGWIN
|
|
#define CLIB_SOEXT "%s.dll"
|
|
+#elif LJ_TARGET_NUTTX
|
|
+#define CLIB_SOEXT "%s"
|
|
#else
|
|
#define CLIB_SOEXT "%s.so"
|
|
#endif
|
|
@@ -428,7 +430,11 @@ void lj_clib_unload(CLibrary *cl)
|
|
void lj_clib_default(lua_State *L, GCtab *mt)
|
|
{
|
|
CLibrary *cl = clib_new(L, mt);
|
|
+#if LJ_TARGET_NUTTX
|
|
+ cl->handle = clib_loadlib(L, "c", 0);
|
|
+#else
|
|
cl->handle = CLIB_DEFHANDLE;
|
|
+#endif
|
|
}
|
|
|
|
#endif
|
|
diff --git a/src/lj_def.h b/src/lj_def.h
|
|
index b61297aa..03f60c3f 100644
|
|
--- a/src/lj_def.h
|
|
+++ b/src/lj_def.h
|
|
@@ -87,6 +87,7 @@ typedef unsigned int uintptr_t;
|
|
#define LJ_MAX_EXITSTUBGR 16 /* Max. # of exit stub groups. */
|
|
|
|
/* Various macros. */
|
|
+#undef UNUSED /* NuttX UNUSED macro is giving us problems. Use our own. */
|
|
#ifndef UNUSED
|
|
#define UNUSED(x) ((void)(x)) /* to avoid warnings */
|
|
#endif
|
|
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
|
|
index 52762eea..909d4d5a 100644
|
|
--- a/src/lj_dispatch.h
|
|
+++ b/src/lj_dispatch.h
|
|
@@ -87,25 +87,23 @@ typedef uint16_t HotCount;
|
|
|
|
/* Global state, main thread and extra fields are allocated together. */
|
|
typedef struct GG_State {
|
|
- lua_State L; /* Main thread. */
|
|
- global_State g; /* Global state. */
|
|
-#if LJ_TARGET_ARM && !LJ_TARGET_NX
|
|
- /* Make g reachable via K12 encoded DISPATCH-relative addressing. */
|
|
- uint8_t align1[(16-sizeof(global_State))&15];
|
|
-#endif
|
|
+ lua_State L; /* Main thread. */
|
|
#if LJ_TARGET_MIPS
|
|
- ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */
|
|
+ ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */
|
|
#endif
|
|
#if LJ_HASJIT
|
|
- jit_State J; /* JIT state. */
|
|
- HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
|
|
+ jit_State J; /* JIT state. */
|
|
+ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
|
|
#if LJ_TARGET_ARM && !LJ_TARGET_NX
|
|
/* Ditto for J. */
|
|
- uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
|
|
+ uint8_t align1[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
|
|
#endif
|
|
#endif
|
|
- ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */
|
|
- BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */
|
|
+ global_State g; /* Global state. */ /* jturnsek: moved here in order to avoid excessive negative offsets when LJ_HASJIT */
|
|
+ /* Make g reachable via K12 encoded DISPATCH-relative addressing. */
|
|
+ uint8_t align2[(16-sizeof(global_State))&15];
|
|
+ ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */
|
|
+ BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */
|
|
} GG_State;
|
|
|
|
#define GG_OFS(field) ((int)offsetof(GG_State, field))
|
|
diff --git a/src/lj_emit_armv7m.h b/src/lj_emit_armv7m.h
|
|
new file mode 100644
|
|
index 00000000..5381df8b
|
|
--- /dev/null
|
|
+++ b/src/lj_emit_armv7m.h
|
|
@@ -0,0 +1,474 @@
|
|
+/*
|
|
+** ARMv7-M instruction emitter.
|
|
+** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h
|
|
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
|
|
+*/
|
|
+
|
|
+/* -- Constant encoding --------------------------------------------------- */
|
|
+
|
|
+#define INVAI_MASK 0xfbe0
|
|
+
|
|
+static uint32_t emit_invai[16] = {
|
|
+ /* AND, TST */ ((ARMI_AND ^ 0x1a00) ^ (ARMI_BIC ^ 0x1a00)) & INVAI_MASK,
|
|
+ /* BIC */ ((ARMI_BIC ^ 0x1a00) ^ (ARMI_AND ^ 0x1a00)) & INVAI_MASK,
|
|
+ /* MOV, ORR */ ((ARMI_MOV ^ 0x1a00) ^ (ARMI_MVN ^ 0x1a00)) & INVAI_MASK,
|
|
+ /* MVN, ORN */ ((ARMI_MVN ^ 0x1a00) ^ (ARMI_MOV ^ 0x1a00)) & INVAI_MASK,
|
|
+ /* EOR, TEQ */ 0,
|
|
+ 0,
|
|
+ 0,
|
|
+ 0,
|
|
+ /* ADD, CMN */ ((ARMI_ADD ^ 0x1a00) ^ (ARMI_SUB ^ 0x1a00)) & INVAI_MASK,
|
|
+ 0,
|
|
+ /* ADC */ ((ARMI_ADC ^ 0x1a00) ^ (ARMI_SBC ^ 0x1a00)) & INVAI_MASK,
|
|
+ /* SBC */ ((ARMI_SBC ^ 0x1a00) ^ (ARMI_ADC ^ 0x1a00)) & INVAI_MASK,
|
|
+ 0,
|
|
+ /* SUB, CMP */ ((ARMI_SUB ^ 0x1a00) ^ (ARMI_ADD ^ 0x1a00)) & INVAI_MASK,
|
|
+ /* RSB */ 0,
|
|
+ 0
|
|
+};
|
|
+
|
|
+/* Encode constant in K12 format for data processing instructions. */
|
|
+static unsigned int emit_isk12(ARMIns ai, signed int n)
|
|
+{
|
|
+ unsigned int invai, i, m = (unsigned int)n;
|
|
+ /* K12: 1bcdefgh value, rotated in steps of one bit. */
|
|
+ if (m <= 255) {
|
|
+ /* i:imm3 = 0000 */
|
|
+ return ARMC_K12(0, m);
|
|
+ }
|
|
+ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) {
|
|
+ /* i:imm3 = 0001 */
|
|
+ return ARMC_K12(0, 0x100 | (m & 0xff));
|
|
+ }
|
|
+ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) {
|
|
+ /* i:imm3 = 0010 */
|
|
+ return ARMC_K12(0, 0x200 | (m >> 8 & 0xff));
|
|
+ }
|
|
+ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) {
|
|
+ /* i:imm3 = 0011 */
|
|
+ return ARMC_K12(0, 0x300 | (m & 0xff));
|
|
+ }
|
|
+ else {
|
|
+ for (i = 0; i < 4096; i += 128, m = lj_rol(m, 1)) {
|
|
+ if (m <= 255) {
|
|
+ if ((m & 0x80) && (i >= 128 * 8))
|
|
+ return ARMC_K12(0, i | (m & 0x7f));
|
|
+ else
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Otherwise try negation/complement with the inverse instruction. */
|
|
+ invai = emit_invai[(ai >> 5) & 0xf];
|
|
+ if (!invai) return 0; /* Failed. No inverse instruction. */
|
|
+ m = ~(unsigned int)n;
|
|
+ if (invai == (((ARMI_SUB ^ 0x1a00) ^ (ARMI_ADD ^ 0x1a00)) & INVAI_MASK) ||
|
|
+ invai == (((ARMI_CMP ^ 0x1a00) ^ (ARMI_CMN ^ 0x1a00)) & INVAI_MASK)) m++;
|
|
+ if (m <= 255) {
|
|
+ /* i:imm3 = 0000 */
|
|
+ return ARMC_K12(invai, m);
|
|
+ }
|
|
+ else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) {
|
|
+ /* i:imm3 = 0001 */
|
|
+ return ARMC_K12(invai, 0x100 | (m & 0xff));
|
|
+ }
|
|
+ else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) {
|
|
+ /* i:imm3 = 0010 */
|
|
+ return ARMC_K12(invai, 0x200 | (m >> 8 & 0xff));
|
|
+ }
|
|
+ else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) {
|
|
+ /* i:imm3 = 0011 */
|
|
+ return ARMC_K12(invai, 0x300 | (m & 0xff));
|
|
+ }
|
|
+ else {
|
|
+ for (i = 0; i < 4096; i += 128, m = lj_rol(m, 1)) {
|
|
+ if (m <= 255) {
|
|
+ if ((m & 0x80) && (i >= 128 * 8))
|
|
+ return ARMC_K12(invai, i | (m & 0x7f));
|
|
+ else
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* -- Emit basic instructions --------------------------------------------- */
|
|
+
|
|
+static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm);
|
|
+}
|
|
+
|
|
+static void emit_tnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_T(rd) | ARMF_N(rn) | ARMF_M(rm);
|
|
+}
|
|
+
|
|
+static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_D(rd) | ARMF_M(rm);
|
|
+}
|
|
+
|
|
+static void emit_tm(ASMState *as, ARMIns ai, Reg rd, Reg rm)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_T(rd) | ARMF_M(rm);
|
|
+}
|
|
+
|
|
+static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_D(rd) | ARMF_N(rn);
|
|
+}
|
|
+
|
|
+static void emit_tn(ASMState *as, ARMIns ai, Reg rd, Reg rn)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_T(rd) | ARMF_N(rn);
|
|
+}
|
|
+
|
|
+static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_N(rn) | ARMF_M(rm);
|
|
+}
|
|
+
|
|
+static void emit_d(ASMState *as, ARMIns ai, Reg rd)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_D(rd);
|
|
+}
|
|
+
|
|
+static void emit_t(ASMState *as, ARMIns ai, Reg rd)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_T(rd);
|
|
+}
|
|
+
|
|
+static void emit_n(ASMState *as, ARMIns ai, Reg rn)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_N(rn);
|
|
+}
|
|
+
|
|
+static void emit_m(ASMState *as, ARMIns ai, Reg rm)
|
|
+{
|
|
+ * --as->mcp = ai | ARMF_M(rm);
|
|
+}
|
|
+
|
|
+static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
|
|
+{
|
|
+ lj_assertA(ofs >= -1020 && ofs <= 1020,
|
|
+ "load/store offset %d out of range", ofs);
|
|
+ if (ofs < 0) ofs = -ofs;
|
|
+ else ai |= ARMI_LSX_U;
|
|
+ * --as->mcp = ai | ARMI_LSX_P | ARMF_T(rd) | ARMF_D(rd + 1) | ARMF_N(rn) |
|
|
+ (((ofs >> 2) & 0xff) << 16); /* imm multiples of 4 */
|
|
+}
|
|
+
|
|
+static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
|
|
+{
|
|
+ lj_assertA(ofs >= -255 && ofs <= 4095,
|
|
+ "load/store offset %d out of range", ofs);
|
|
+ /* Combine LDR/STR pairs to LDRD/STRD. */
|
|
+ if (*as->mcp == (ai | ARMI_LS_1 | ARMI_LS_P | ARMI_LS_U | ARMF_T(rd ^ 1) | ARMF_N(rn) | (ofs ^ 4)) &&
|
|
+ (ai & ~(ARMI_LDR ^ ARMI_STR)) == ARMI_STR && rd != rn &&
|
|
+ (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >> 2)) & 1) &&
|
|
+ as->mcp != as->mcloop) {
|
|
+ as->mcp++;
|
|
+ emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd & ~1, rn, ofs & ~4);
|
|
+ return;
|
|
+ }
|
|
+ if (ofs > 255) {
|
|
+ * --as->mcp = ai | ARMI_LS_I | ARMF_T(rd) | ARMF_N(rn) | ((ofs & 0xfff) << 16);
|
|
+ return;
|
|
+ }
|
|
+ if (ofs < 0) ofs = -ofs;
|
|
+ else ai |= ARMI_LS_U;
|
|
+ * --as->mcp = ai | ARMI_LS_1 | ARMI_LS_P | ARMF_T(rd) | ARMF_N(rn) | ((ofs & 0xff) << 16);
|
|
+}
|
|
+
|
|
+#if !LJ_SOFTFP
|
|
+static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
|
|
+{
|
|
+ lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs & 3) == 0,
|
|
+ "load/store offset %d out of range", ofs);
|
|
+ if (ofs < 0) ofs = -ofs;
|
|
+ else ai |= ARMI_LSX_U;
|
|
+ * --as->mcp = ai | ARMF_T(rd & 15) | ARMF_N(rn) | ((ofs >> 2) << 16);
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* -- Emit loads/stores --------------------------------------------------- */
|
|
+
|
|
+/* Prefer spills of BASE/L. */
|
|
+#define emit_canremat(ref) ((ref) < ASMREF_L)
|
|
+
|
|
+/* Try to find a one step delta relative to another constant. */
|
|
+static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
|
|
+{
|
|
+ RegSet work = ~as->freeset & RSET_GPR;
|
|
+ while (work) {
|
|
+ Reg r = rset_picktop(work);
|
|
+ IRRef ref = regcost_ref(as->cost[r]);
|
|
+ lj_assertA(r != d, "dest reg not free");
|
|
+ if (emit_canremat(ref)) {
|
|
+ int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
|
|
+ uint32_t k = emit_isk12(ARMI_ADD, delta);
|
|
+ if (k) {
|
|
+ if (k == ARMI_K12)
|
|
+ emit_dm(as, ARMI_MOV, d, r);
|
|
+ else
|
|
+ emit_dn(as, ARMI_ADD ^ k, d, r);
|
|
+ return 1;
|
|
+ }
|
|
+ }
|
|
+ rset_clear(work, r);
|
|
+ }
|
|
+ return 0; /* Failed. */
|
|
+}
|
|
+
|
|
+/* Try to find a two step delta relative to another constant. */
|
|
+static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
|
|
+{
|
|
+ RegSet work = ~as->freeset & RSET_GPR;
|
|
+ while (work) {
|
|
+ Reg r = rset_picktop(work);
|
|
+ IRRef ref = regcost_ref(as->cost[r]);
|
|
+ lj_assertA(r != rd, "dest reg %d not free", rd);
|
|
+ if (emit_canremat(ref)) {
|
|
+ int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i;
|
|
+ if (other) {
|
|
+ int32_t delta = i - other;
|
|
+ uint32_t sh, inv = 0, k2, k;
|
|
+ if (delta < 0) { delta = -delta; inv = (ARMI_ADD ^ 0x1a00) ^ (ARMI_SUB ^ 0x1a00); }
|
|
+ sh = lj_ffs(delta) & ~1;
|
|
+ k2 = emit_isk12(0, delta & (255 << sh));
|
|
+ k = emit_isk12(0, delta & ~(255 << sh));
|
|
+ if (k) {
|
|
+ emit_dn(as, ARMI_ADD ^ k2 ^ inv, rd, rd);
|
|
+ emit_dn(as, ARMI_ADD ^ k ^ inv, rd, r);
|
|
+ return 1;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ rset_clear(work, r);
|
|
+ }
|
|
+ return 0; /* Failed. */
|
|
+}
|
|
+
|
|
+/* Load a 32 bit constant into a GPR. */
|
|
+static void emit_loadi(ASMState *as, Reg rd, int32_t i)
|
|
+{
|
|
+ uint32_t k = emit_isk12(ARMI_MOV, i);
|
|
+ lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP,
|
|
+ "dest reg %d not free", rd);
|
|
+ if (k) {
|
|
+ /* Standard K12 constant. */
|
|
+ emit_d(as, ARMI_MOV ^ k, rd);
|
|
+ }
|
|
+ else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
|
|
+ /* 16 bit loword constant for ARMv6T2. */
|
|
+ emit_d(as, ARMI_MOVW | ((i & 0xff) << 16) | ((i & 0x700) << 20) | ((i & 0x800) >> 1) | ((i & 0xf000) >> 12), rd);
|
|
+ }
|
|
+ else if (emit_kdelta1(as, rd, i)) {
|
|
+ /* One step delta relative to another constant. */
|
|
+ }
|
|
+ else if ((as->flags & JIT_F_ARMV6T2)) {
|
|
+ /* 32 bit hiword/loword constant for ARMv6T2. */
|
|
+ emit_d(as, ARMI_MOVT | (((i >> 16) & 0xff) << 16) | (((i >> 16) & 0x700) << 20) | (((i >> 16) & 0x800) >> 1) | (((i >> 16) & 0xf000) >> 12), rd);
|
|
+ emit_d(as, ARMI_MOVW | ((i & 0xff) << 16) | ((i & 0x700) << 20) | ((i & 0x800) >> 1) | ((i & 0xf000) >> 12), rd);
|
|
+ }
|
|
+ else if (emit_kdelta2(as, rd, i)) {
|
|
+ /* Two step delta relative to another constant. */
|
|
+ }
|
|
+ else {
|
|
+ /* Otherwise construct the constant with up to 4 instructions. */
|
|
+ /* NYI: use mvn+bic, use pc-relative loads. */
|
|
+ for (;;) {
|
|
+ uint32_t sh = lj_ffs(i) & ~1;
|
|
+ int32_t m = i & (255 << sh);
|
|
+ i &= ~(255 << sh);
|
|
+ if (i == 0) {
|
|
+ emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd);
|
|
+ break;
|
|
+ }
|
|
+ emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr)))
|
|
+
|
|
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
|
|
+
|
|
+/* Get/set from constant pointer. */
|
|
+static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
|
|
+{
|
|
+ int32_t i = i32ptr(p);
|
|
+ emit_lso(as,
|
|
+ ai,
|
|
+ r,
|
|
+ ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)),
|
|
+ (i & 4095));
|
|
+}
|
|
+
|
|
+#if !LJ_SOFTFP
|
|
+/* Load a number constant into an FPR. */
|
|
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
|
|
+{
|
|
+ cTValue *tv = ir_knum(ir);
|
|
+ int32_t i;
|
|
+ if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
|
|
+ uint32_t hi = tv->u32.hi;
|
|
+ uint32_t b = ((hi >> 22) & 0x1ff);
|
|
+ if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) {
|
|
+ * --as->mcp = ARMI_VMOVI_D | ARMF_T(r & 15) |
|
|
+ ((((tv->u32.hi >> 12) & 0x00080000) |
|
|
+ ((tv->u32.hi >> 4) & 0x00070000)) >> 16) |
|
|
+ (((tv->u32.hi >> 16) & 0x0000000f) << 16);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ i = i32ptr(tv);
|
|
+ emit_vlso(as,
|
|
+ ARMI_VLDR_D,
|
|
+ r,
|
|
+ ra_allock(as, (i & ~1020), RSET_GPR),
|
|
+ (i & 1020));
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* Get/set global_State fields. */
|
|
+#define emit_getgl(as, r, field) \
|
|
+ emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field)
|
|
+#define emit_setgl(as, r, field) \
|
|
+ emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field)
|
|
+
|
|
+/* Trace number is determined from pc of exit instruction. */
|
|
+#define emit_setvmstate(as, i) UNUSED(i)
|
|
+
|
|
+/* -- Emit control-flow instructions -------------------------------------- */
|
|
+
|
|
+/* Label for internal jumps. */
|
|
+typedef MCode *MCLabel;
|
|
+
|
|
+/* Return label pointing to current PC. */
|
|
+#define emit_label(as) ((as)->mcp)
|
|
+
|
|
+static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
|
|
+{
|
|
+ MCode *p = as->mcp;
|
|
+ ptrdiff_t delta = (target - p) << 1;
|
|
+ lj_assertA(((delta + 0x0080000) >> 20) == 0, "branch target out of range");
|
|
+ * --p = ai | ARMC_B((uint32_t)delta & 0x00fffffu);
|
|
+ as->mcp = p;
|
|
+}
|
|
+
|
|
+static void emit_branchlink(ASMState *as, ARMIns ai, MCode *target)
|
|
+{
|
|
+ MCode *p = as->mcp;
|
|
+ ptrdiff_t delta = (target - p) << 1;
|
|
+ * --p = ai | ARMC_BL((uint32_t)delta & 0x0ffffffu);
|
|
+ as->mcp = p;
|
|
+}
|
|
+
|
|
+static void emit_jmp(ASMState *as, MCode *target)
|
|
+{
|
|
+ MCode *p = as->mcp;
|
|
+ ptrdiff_t delta = (target - p) << 1;
|
|
+ lj_assertA(((delta + 0x0800000) >> 24) == 0, "jump target out of range");
|
|
+ * --p = ARMI_B_T4 | ARMC_BL((uint32_t)delta & 0x00ffffffu);
|
|
+ as->mcp = p;
|
|
+}
|
|
+
|
|
+static void emit_call(ASMState *as, void *target)
|
|
+{
|
|
+ MCode *p = --as->mcp;
|
|
+ ptrdiff_t delta = ((char *)target - (char *)p) - 4;
|
|
+ if ((((delta >> 1) + 0x00100000) >> 21) == 0) {
|
|
+ /* Only Thumb code is allowed */
|
|
+ *p = ARMI_BL | ARMC_BL((uint32_t)(delta >> 1));
|
|
+ }
|
|
+ else {
|
|
+ /* Target out of range: need indirect call. But don't use R0-R3. */
|
|
+ Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12 + 1));
|
|
+ *p = ARMI_BLXr | ARMF_M2(r);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* -- Emit generic operations --------------------------------------------- */
|
|
+
|
|
+/* Generic move between two regs. */
|
|
+static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
|
|
+{
|
|
+#if LJ_SOFTFP
|
|
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
|
|
+#else
|
|
+ if (dst >= RID_MAX_GPR) {
|
|
+ emit_tm(as,
|
|
+ irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
|
|
+ (dst & 15),
|
|
+ (src & 15));
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ if (as->mcp != as->mcloop) {
|
|
+ /* Swap early registers for loads/stores. */
|
|
+ MCode ins = *as->mcp, swp = (src ^ dst);
|
|
+ if ((ins & 0x0fc0ff80) == 0x0000f800) {
|
|
+ if (!((ins ^ dst) & 0x0000000f))
|
|
+ *as->mcp = ins ^ swp; /* Swap N in load/store. */
|
|
+ if (!(ins & 0x00000010) && !((ins ^ (dst << 28)) & 0xf0000000))
|
|
+ *as->mcp = ins ^ (swp << 28); /* Swap D in store. */
|
|
+ }
|
|
+ }
|
|
+ emit_dm(as, ARMI_MOV, dst, src);
|
|
+}
|
|
+
|
|
+/* Generic load of register with base and (small) offset address. */
|
|
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
|
|
+{
|
|
+#if LJ_SOFTFP
|
|
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
|
|
+#else
|
|
+ if (r >= RID_MAX_GPR)
|
|
+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
|
|
+ else
|
|
+#endif
|
|
+ emit_lso(as, ARMI_LDR, r, base, ofs);
|
|
+}
|
|
+
|
|
+/* Generic store of register with base and (small) offset address. */
|
|
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
|
|
+{
|
|
+#if LJ_SOFTFP
|
|
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
|
|
+#else
|
|
+ if (r >= RID_MAX_GPR)
|
|
+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
|
|
+ else
|
|
+#endif
|
|
+ emit_lso(as, ARMI_STR, r, base, ofs);
|
|
+}
|
|
+
|
|
+/* Emit an arithmetic/logic operation with a constant operand. */
|
|
+static void emit_opk(ASMState *as,
|
|
+ ARMIns ai,
|
|
+ Reg dest,
|
|
+ Reg src,
|
|
+ int32_t i,
|
|
+ RegSet allow)
|
|
+{
|
|
+ uint32_t k = emit_isk12(ai, i);
|
|
+ if (k)
|
|
+ emit_dn(as, ai ^ k, dest, src);
|
|
+ else
|
|
+ emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
|
|
+}
|
|
+
|
|
+/* Add offset to pointer. */
|
|
+static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
|
|
+{
|
|
+ if (ofs)
|
|
+ emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r));
|
|
+}
|
|
+
|
|
+#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
|
|
+
|
|
diff --git a/src/lj_jit.h b/src/lj_jit.h
|
|
index 32b3861a..10644724 100644
|
|
--- a/src/lj_jit.h
|
|
+++ b/src/lj_jit.h
|
|
@@ -107,7 +107,7 @@
|
|
#define JIT_P_sizemcode_DEFAULT 64
|
|
#else
|
|
/* Could go as low as 4K, but the mmap() overhead would be rather high. */
|
|
-#define JIT_P_sizemcode_DEFAULT 32
|
|
+#define JIT_P_sizemcode_DEFAULT 8
|
|
#endif
|
|
|
|
/* Optimization parameters and their defaults. Length is a char in octal! */
|
|
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
|
|
index 163aada4..7ea1fe2f 100644
|
|
--- a/src/lj_mcode.c
|
|
+++ b/src/lj_mcode.c
|
|
@@ -45,6 +45,8 @@ void lj_mcode_sync(void *start, void *end)
|
|
sys_icache_invalidate(start, (char *)end-(char *)start);
|
|
#elif LJ_TARGET_PPC
|
|
lj_vm_cachesync(start, end);
|
|
+#elif LJ_TARGET_NUTTX
|
|
+ up_invalidate_icache_all();
|
|
#elif defined(__GNUC__) || defined(__clang__)
|
|
__clear_cache(start, end);
|
|
#else
|
|
@@ -86,6 +88,50 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
|
|
return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
|
|
}
|
|
|
|
+#elif LJ_TARGET_NUTTX
|
|
+
|
|
+#include <nuttx/config.h>
|
|
+#include <nuttx/mm/mm.h>
|
|
+
|
|
+static bool initialized = false;
|
|
+static struct mm_heap_s *g_mcode_heap;
|
|
+
|
|
+#define MCPROT_RW 0
|
|
+#define MCPROT_RX 0
|
|
+#define MCPROT_RWX 0
|
|
+
|
|
+static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
|
|
+{
|
|
+ UNUSED(J); UNUSED(prot);
|
|
+
|
|
+ if (!initialized) {
|
|
+ static uint8_t buffer[CONFIG_LUAJIT_MCODE_SIZE]
|
|
+ locate_data(CONFIG_LUAJIT_MCODE_SECTION_NAME);
|
|
+ g_mcode_heap = mm_initialize("mcode",
|
|
+ (void *)buffer,
|
|
+ CONFIG_LUAJIT_MCODE_SIZE);
|
|
+ initialized = true;
|
|
+ }
|
|
+
|
|
+ void *p = mm_malloc(g_mcode_heap, sz);
|
|
+ if (p == NULL) {
|
|
+ if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
|
|
+ }
|
|
+ return p;
|
|
+}
|
|
+
|
|
+static void mcode_free(jit_State *J, void *p, size_t sz)
|
|
+{
|
|
+ UNUSED(J); UNUSED(sz);
|
|
+ mm_free(g_mcode_heap, p);
|
|
+}
|
|
+
|
|
+static int mcode_setprot(void *p, size_t sz, int prot)
|
|
+{
|
|
+ UNUSED(p); UNUSED(sz); UNUSED(prot);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
#elif LJ_TARGET_POSIX
|
|
|
|
#include <sys/mman.h>
|
|
diff --git a/src/lj_target.h b/src/lj_target.h
|
|
index 19716928..8cee29ea 100644
|
|
--- a/src/lj_target.h
|
|
+++ b/src/lj_target.h
|
|
@@ -137,7 +137,11 @@ typedef uint32_t RegCost;
|
|
#if LJ_TARGET_X86ORX64
|
|
#include "lj_target_x86.h"
|
|
#elif LJ_TARGET_ARM
|
|
+#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__)
|
|
+#include "lj_target_armv7m.h"
|
|
+#else
|
|
#include "lj_target_arm.h"
|
|
+#endif
|
|
#elif LJ_TARGET_ARM64
|
|
#include "lj_target_arm64.h"
|
|
#elif LJ_TARGET_PPC
|
|
diff --git a/src/lj_target_armv7m.h b/src/lj_target_armv7m.h
|
|
new file mode 100755
|
|
index 00000000..5dc6d488
|
|
--- /dev/null
|
|
+++ b/src/lj_target_armv7m.h
|
|
@@ -0,0 +1,315 @@
|
|
+/*
|
|
+** Definitions for ARMv7-M CPUs.
|
|
+** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h
|
|
+** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
|
|
+*/
|
|
+
|
|
+#ifndef _LJ_TARGET_ARMV7M_H
|
|
+#define _LJ_TARGET_ARMV7M_H
|
|
+
|
|
+/* -- Registers IDs ------------------------------------------------------- */
|
|
+
|
|
+#define GPRDEF(_) \
|
|
+ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \
|
|
+ _(R8) _(R9) _(R10) _(R11) _(R12) _(SP) _(LR) _(PC)
|
|
+#if LJ_SOFTFP
|
|
+#define FPRDEF(_)
|
|
+#else
|
|
+#define FPRDEF(_) \
|
|
+ _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
|
|
+ _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15)
|
|
+#endif
|
|
+#define VRIDDEF(_)
|
|
+
|
|
+#define RIDENUM(name) RID_##name,
|
|
+
|
|
+enum {
|
|
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
|
|
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
|
|
+ RID_MAX,
|
|
+ RID_TMP = RID_LR,
|
|
+
|
|
+ /* Calling conventions. */
|
|
+ RID_RET = RID_R0,
|
|
+ RID_RETLO = RID_R0,
|
|
+ RID_RETHI = RID_R1,
|
|
+#if LJ_SOFTFP
|
|
+ RID_FPRET = RID_R0,
|
|
+#else
|
|
+ RID_FPRET = RID_D0,
|
|
+#endif
|
|
+
|
|
+ /* These definitions must match with the *.dasc file(s): */
|
|
+ RID_BASE = RID_R9, /* Interpreter BASE. */
|
|
+ RID_LPC = RID_R6, /* Interpreter PC. */
|
|
+ RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */
|
|
+ RID_LREG = RID_R8, /* Interpreter L. */
|
|
+
|
|
+ /* Register ranges [min, max) and number of registers. */
|
|
+ RID_MIN_GPR = RID_R0,
|
|
+ RID_MAX_GPR = RID_PC + 1,
|
|
+ RID_MIN_FPR = RID_MAX_GPR,
|
|
+#if LJ_SOFTFP
|
|
+ RID_MAX_FPR = RID_MIN_FPR,
|
|
+#else
|
|
+ RID_MAX_FPR = RID_D15 + 1,
|
|
+#endif
|
|
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
|
|
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
|
|
+};
|
|
+
|
|
+#define RID_NUM_KREF RID_NUM_GPR
|
|
+#define RID_MIN_KREF RID_R0
|
|
+
|
|
+/* -- Register sets ------------------------------------------------------- */
|
|
+
|
|
+/* Make use of all registers, except sp, lr and pc. */
|
|
+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1))
|
|
+#define RSET_GPREVEN \
|
|
+ (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \
|
|
+ RID2RSET(RID_R8)|RID2RSET(RID_R10))
|
|
+#define RSET_GPRODD \
|
|
+ (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \
|
|
+ RID2RSET(RID_R9)|RID2RSET(RID_R11))
|
|
+#if LJ_SOFTFP
|
|
+#define RSET_FPR 0
|
|
+#else
|
|
+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
|
|
+#endif
|
|
+#define RSET_ALL (RSET_GPR|RSET_FPR)
|
|
+#define RSET_INIT RSET_ALL
|
|
+
|
|
+/* ABI-specific register sets. lr is an implicit scratch register. */
|
|
+#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12))
|
|
+#ifdef __APPLE__
|
|
+#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9))
|
|
+#else
|
|
+#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_
|
|
+#endif
|
|
+#if LJ_SOFTFP
|
|
+#define RSET_SCRATCH_FPR 0
|
|
+#else
|
|
+#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1))
|
|
+#endif
|
|
+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
|
|
+#define REGARG_FIRSTGPR RID_R0
|
|
+#define REGARG_LASTGPR RID_R3
|
|
+#define REGARG_NUMGPR 4
|
|
+#if LJ_ABI_SOFTFP
|
|
+#define REGARG_FIRSTFPR 0
|
|
+#define REGARG_LASTFPR 0
|
|
+#define REGARG_NUMFPR 0
|
|
+#else
|
|
+#define REGARG_FIRSTFPR RID_D0
|
|
+#define REGARG_LASTFPR RID_D7
|
|
+#define REGARG_NUMFPR 8
|
|
+#endif
|
|
+
|
|
+/* -- Spill slots --------------------------------------------------------- */
|
|
+
|
|
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
|
|
+**
|
|
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
|
|
+** This definition must match with the *.dasc file(s).
|
|
+**
|
|
+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
|
|
+*/
|
|
+#define SPS_FIXED 2
|
|
+#define SPS_FIRST 2
|
|
+
|
|
+#define SPOFS_TMP 0
|
|
+
|
|
+#define sps_scale(slot) (4 * (int32_t)(slot))
|
|
+#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1)
|
|
+
|
|
+/* -- Exit state ---------------------------------------------------------- */
|
|
+
|
|
+/* This definition must match with the *.dasc file(s). */
|
|
+typedef struct {
|
|
+#if !LJ_SOFTFP
|
|
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
|
|
+#endif
|
|
+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
|
|
+ int32_t spill[256]; /* Spill slots. */
|
|
+} ExitState;
|
|
+
|
|
+/* PC after instruction that caused an exit. Used to find the trace number. */
|
|
+#define EXITSTATE_PCREG RID_PC
|
|
+/* Highest exit + 1 indicates stack check. */
|
|
+#define EXITSTATE_CHECKEXIT 1
|
|
+
|
|
+#define EXITSTUB_SPACING 4
|
|
+#define EXITSTUBS_PER_GROUP 32
|
|
+
|
|
+/* -- Instructions -------------------------------------------------------- */
|
|
+
|
|
+/* Instruction fields. */
|
|
+#define ARMF_CC(ai, cc) ((ai) | ((cc) << 6))
|
|
+#define ARMF_N(r) ((r)<<0)
|
|
+#define ARMF_T(r) ((r)<<28)
|
|
+#define ARMF_D(r) ((r)<<24)
|
|
+#define ARMF_M(r) ((r)<<16)
|
|
+#define ARMF_M2(r) ((r)<<19) // BLXr
|
|
+#define ARMF_SH(sh, n) (((sh)<<20)|(((n)&0x3)<<22)|((((n)>>2)&0x7)<<28))
|
|
+#define ARMF_LSL(n) ((n&0x3)<<20)
|
|
+#define ARMF_RSH(sh, r) (0xf0000000|((sh)<<5)|ARMF_M(r))
|
|
+
|
|
+/* Instruction compositing */
|
|
+#define ARMC_K12(arg1, arg2) (((arg1)^ARMI_K12)| \
|
|
+ (((arg2)&0xff)<<16)| \
|
|
+ (((arg2)&0x700)<<20)| \
|
|
+ (((arg2)&0x800)>>1))
|
|
+#define ARMC_B(arg) ((((arg)&0x7ff)<<16)| \
|
|
+ (((arg)&0x1f800)>>11)| \
|
|
+ (((arg)&0x20000)<<12)| \
|
|
+ (((arg)&0x40000)<<9)| \
|
|
+ (((arg)&0x80000)>>9))
|
|
+#define ARMC_BL(arg) ((((arg)&0x7ff)<<16)| \
|
|
+ (((arg)&0x1ff800)>>11)| \
|
|
+ (((~(((arg)&0x200000)>>21)&0x1)^((((arg)&0x800000)>>23)&0x1))<<27)| \
|
|
+ (((~(((arg)&0x400000)>>22)&0x1)^((((arg)&0x800000)>>23)&0x1))<<29)| \
|
|
+ (((((arg)&0x800000)>>23)&0x1)<<10))
|
|
+#define ARMC_BL_READ(ins) (((((ins)&0x07ff0000u)>>16))| \
|
|
+ (((ins)&0x000003ffu)<<11)| \
|
|
+ (((~((((ins)&0x08000000u)>>27)^(((ins)&0x00000400u)>>10)))&0x1)<<21)| \
|
|
+ (((~((((ins)&0x20000000u)>>29)^(((ins)&0x00000400u)>>10)))&0x1)<<22)| \
|
|
+ ((((ins)&0x00000400u)>>10)<<23))
|
|
+#define ARMI_IT(cc) *--as->mcp = (0xbf08bf00u|(((cc)&0xf)<<20))
|
|
+
|
|
+
|
|
+typedef enum ARMIns {
|
|
+ ARMI_CCAL = 0x000003c0,
|
|
+ ARMI_S = 0x00000010,
|
|
+ ARMI_K12 = 0x00001a00,
|
|
+
|
|
+ ARMI_LS_W = 0x01000000,
|
|
+ ARMI_LS_U = 0x02000000,
|
|
+ ARMI_LS_P = 0x04000000,
|
|
+ ARMI_LS_1 = 0x08000000,
|
|
+ ARMI_LS_I = 0x00000080,
|
|
+ ARMI_LSX_W = 0x00000020,
|
|
+ ARMI_LSX_U = 0x00000080,
|
|
+ ARMI_LSX_P = 0x00000100,
|
|
+
|
|
+ ARMI_AND = 0x0000ea00,
|
|
+ ARMI_EOR = 0x0000ea80,
|
|
+ ARMI_SUB = 0x0000eba0,
|
|
+ ARMI_RSB = 0x0000ebc0,
|
|
+ ARMI_ADD = 0x0000eb00,
|
|
+ ARMI_ADC = 0x0000eb40,
|
|
+ ARMI_SBC = 0x0000eb60,
|
|
+ // ARMI_RSC = 0xe0e00000,
|
|
+ ARMI_TST = 0x0f00ea10,
|
|
+ ARMI_TEQ = 0x0f00ea90,
|
|
+ ARMI_CMP = 0x0f00ebb0,
|
|
+ ARMI_CMN = 0x0f00eb10,
|
|
+ ARMI_ORR = 0x0000ea40,
|
|
+ ARMI_MOV = 0x0000ea4f,
|
|
+ ARMI_BIC = 0x0000ea20,
|
|
+ ARMI_MVN = 0x0000ea6f,
|
|
+ ARMI_NOP = 0xbf00bf00,
|
|
+
|
|
+ ARMI_MUL = 0xf000fb00,
|
|
+ ARMI_SMULL = 0x0000fb80,
|
|
+
|
|
+ ARMI_LDR = 0x0000f850,
|
|
+ ARMI_LDRB = 0x0000f810,
|
|
+ ARMI_LDRH = 0x0000f830,
|
|
+ ARMI_LDRSB = 0x0000f910,
|
|
+ ARMI_LDRSH = 0x0000f930,
|
|
+ ARMI_LDRD = 0x0000e850,
|
|
+ ARMI_STR = 0x0000f840,
|
|
+ ARMI_STRB = 0x0000f800,
|
|
+ ARMI_STRH = 0x0000f820,
|
|
+ ARMI_STRD = 0x0000e840,
|
|
+ ARMI_PUSH = 0x0000e92d,
|
|
+
|
|
+ ARMI_B = 0x8000f000,
|
|
+ ARMI_B_T4 = 0x9000f000,
|
|
+ ARMI_BL = 0xd000f000,
|
|
+ ARMI_BLXr = 0x4780bf00,
|
|
+
|
|
+ /* ARMv6 */
|
|
+ ARMI_REV = 0xf080fa90,
|
|
+ ARMI_SXTB = 0xf080fa4f,
|
|
+ ARMI_SXTH = 0xf080fa0f,
|
|
+ ARMI_UXTB = 0xf080fa5f,
|
|
+ ARMI_UXTH = 0xf080fa1f,
|
|
+
|
|
+ /* ARMv6T2 */
|
|
+ ARMI_MOVW = 0x0000f240,
|
|
+ ARMI_MOVT = 0x0000f2c0,
|
|
+
|
|
+ /* VFP */
|
|
+ ARMI_VMOV_D = 0x0b40eeb0,
|
|
+ ARMI_VMOV_S = 0x0a40eeb0,
|
|
+ ARMI_VMOVI_D = 0x0b00eeb0,
|
|
+
|
|
+ ARMI_VMOV_R_S = 0x0a10ee10,
|
|
+ ARMI_VMOV_S_R = 0x0a10ee00,
|
|
+ ARMI_VMOV_RR_D = 0x0b10ec50,
|
|
+ ARMI_VMOV_D_RR = 0x0b10ec40,
|
|
+
|
|
+ ARMI_VADD_D = 0x0b00ee30,
|
|
+ ARMI_VSUB_D = 0x0b40ee30,
|
|
+ ARMI_VMUL_D = 0x0b00ee20,
|
|
+ ARMI_VMLA_D = 0x0b00ee00,
|
|
+ ARMI_VMLS_D = 0x0b40ee00,
|
|
+ ARMI_VNMLS_D = 0x0b00ee10,
|
|
+ ARMI_VDIV_D = 0x0b00ee80,
|
|
+
|
|
+ ARMI_VABS_D = 0x0bc0eeb0,
|
|
+ ARMI_VNEG_D = 0x0b40eeb1,
|
|
+ ARMI_VSQRT_D = 0x0bc0eeb1,
|
|
+
|
|
+ ARMI_VCMP_D = 0x0b40eeb4,
|
|
+ ARMI_VCMPZ_D = 0x0b40eeb5,
|
|
+
|
|
+ ARMI_VMRS = 0xfa10eef1,
|
|
+
|
|
+ ARMI_VCVT_S32_F32 = 0x0ac0eebd,
|
|
+ ARMI_VCVT_S32_F64 = 0x0bc0eebd,
|
|
+ ARMI_VCVT_U32_F32 = 0x0ac0eebc,
|
|
+ ARMI_VCVT_U32_F64 = 0x0bc0eebc,
|
|
+ ARMI_VCVT_F32_S32 = 0x0ac0eeb8,
|
|
+ ARMI_VCVT_F64_S32 = 0x0bc0eeb8,
|
|
+ ARMI_VCVT_F32_U32 = 0x0a40eeb8,
|
|
+ ARMI_VCVT_F64_U32 = 0x0b40eeb8,
|
|
+ ARMI_VCVT_F32_F64 = 0x0bc0eeb7,
|
|
+ ARMI_VCVT_F64_F32 = 0x0ac0eeb7,
|
|
+
|
|
+ ARMI_VLDR_S = 0x0a00ed10,
|
|
+ ARMI_VLDR_D = 0x0b00ed10,
|
|
+ ARMI_VSTR_S = 0x0a00ed00,
|
|
+ ARMI_VSTR_D = 0x0b00ed00,
|
|
+} ARMIns;
|
|
+
|
|
+typedef enum ARMShift {
|
|
+ ARMSH_LSL,
|
|
+ ARMSH_LSR,
|
|
+ ARMSH_ASR,
|
|
+ ARMSH_ROR
|
|
+} ARMShift;
|
|
+
|
|
+/* ARM condition codes. */
|
|
+typedef enum ARMCC {
|
|
+ CC_EQ,
|
|
+ CC_NE,
|
|
+ CC_CS,
|
|
+ CC_CC,
|
|
+ CC_MI,
|
|
+ CC_PL,
|
|
+ CC_VS,
|
|
+ CC_VC,
|
|
+ CC_HI,
|
|
+ CC_LS,
|
|
+ CC_GE,
|
|
+ CC_LT,
|
|
+ CC_GT,
|
|
+ CC_LE,
|
|
+ CC_AL,
|
|
+ CC_HS = CC_CS,
|
|
+ CC_LO = CC_CC
|
|
+} ARMCC;
|
|
+
|
|
+#endif
|
|
diff --git a/src/vm_armv7m.dasc b/src/vm_armv7m.dasc
|
|
new file mode 100755
|
|
index 00000000..13266007
|
|
--- /dev/null
|
|
+++ b/src/vm_armv7m.dasc
|
|
@@ -0,0 +1,4901 @@
|
|
+|// Low-level VM code for ARMV7M CPUs.
|
|
+|// Bytecode interpreter, fast functions and helper functions.
|
|
+|// Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h
|
|
+|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
|
|
+|
|
|
+|.arch armv7m
|
|
+|.section code_op, code_sub
|
|
+|
|
|
+|.actionlist build_actionlist
|
|
+|.globals GLOB_
|
|
+|.globalnames globnames
|
|
+|.externnames extnames
|
|
+|
|
|
+|// Note: The ragged indentation of the instructions is intentional.
|
|
+|// The starting columns indicate data dependencies.
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|.macro ldrd_i, rt, rt2, rn, rm
|
|
+| add rt, rn, rm
|
|
+| ldm rt, {rt, rt2}
|
|
+|.endmacro
|
|
+|.macro ldrd_iw, rt, rt2, rn, rm
|
|
+| add rn, rn, rm
|
|
+| ldrd rt, rt2, [rn]
|
|
+|.endmacro
|
|
+|
|
|
+|.macro ldrdlo_i, rt, rt2, rn, rm
|
|
+| itt lo
|
|
+| addlo rt, rn, rm
|
|
+| ldmlo rt, {rt, rt2}
|
|
+|.endmacro
|
|
+|.macro ldrdlo_iw, rt, rt2, rn, rm
|
|
+| itt lo
|
|
+| addlo rn, rn, rm
|
|
+| ldrdlo rt, rt2, [rn]
|
|
+|.endmacro
|
|
+|
|
|
+|.macro strd_i, rt, rt2, rn, rm
|
|
+| add rn, rn, rm
|
|
+| strd rt, rt2, [rn]
|
|
+| sub rn, rn, rm
|
|
+|.endmacro
|
|
+|
|
|
+|.macro strdne_i, rt, rt2, rn, rm
|
|
+| ittt ne
|
|
+| addne rn, rn, rm
|
|
+| strdne rt, rt2, [rn]
|
|
+| subne rn, rn, rm
|
|
+|.endmacro
|
|
+|.macro strdls_i, rt, rt2, rn, rm
|
|
+| ittt ls
|
|
+| addls rn, rn, rm
|
|
+| strdls rt, rt2, [rn]
|
|
+| subls rn, rn, rm
|
|
+|.endmacro
|
|
+|.macro strdhi_i, rt, rt2, rn, rm
|
|
+| ittt hi
|
|
+| addhi rn, rn, rm
|
|
+| strdhi rt, rt2, [rn]
|
|
+| subhi rn, rn, rm
|
|
+|.endmacro
|
|
+|
|
|
+|// Fixed register assignments for the interpreter.
|
|
+|
|
|
+|// The following must be C callee-save.
|
|
+|.define MASKR8, r4 // 255*8 constant for fast bytecode decoding.
|
|
+|.define KBASE, r5 // Constants of current Lua function.
|
|
+|.define PC, r6 // Next PC.
|
|
+|.define DISPATCH,r7 // Opcode dispatch table.
|
|
+|.define LREG, r8 // Register holding lua_State (also in SAVE_L).
|
|
+|
|
|
+|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+.
|
|
+|.define BASE, r9 // Base of current Lua stack frame.
|
|
+|
|
|
+|// The following temporaries are not saved across C calls, except for RA/RC.
|
|
+|.define RA, r10 // Callee-save.
|
|
+|.define RC, r11 // Callee-save.
|
|
+|.define RB, r12
|
|
+|.define OP, r12 // Overlaps RB, must not be lr.
|
|
+|.define INS, lr
|
|
+|
|
|
+|// Calling conventions. Also used as temporaries.
|
|
+|.define CARG1, r0
|
|
+|.define CARG2, r1
|
|
+|.define CARG3, r2
|
|
+|.define CARG4, r3
|
|
+|
|
|
+|.define CRET1, r0
|
|
+|.define CRET2, r1
|
|
+|
|
|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
|
|
+|.define SAVE_R4, [sp, #28]
|
|
+|.define CFRAME_SPACE, #28
|
|
+|.define SAVE_ERRF, [sp, #24]
|
|
+|.define SAVE_NRES, [sp, #20]
|
|
+|.define SAVE_CFRAME, [sp, #16]
|
|
+|.define SAVE_L, [sp, #12]
|
|
+|.define SAVE_PC, [sp, #8]
|
|
+|.define SAVE_MULTRES, [sp, #4]
|
|
+|.define ARG5, [sp]
|
|
+|
|
|
+|.define TMPDhi, [sp, #4]
|
|
+|.define TMPDlo, [sp]
|
|
+|.define TMPD, [sp]
|
|
+|.define TMPDp, sp
|
|
+|
|
|
+|.if FPU
|
|
+|.macro saveregs
|
|
+| push {r5, r6, r7, r8, r9, r10, r11, lr}
|
|
+| vpush {d8-d15}
|
|
+| sub sp, sp, CFRAME_SPACE+4
|
|
+| str r4, SAVE_R4
|
|
+|.endmacro
|
|
+|.macro restoreregs_ret
|
|
+| ldr r4, SAVE_R4
|
|
+| add sp, sp, CFRAME_SPACE+4
|
|
+| vpop {d8-d15}
|
|
+| pop {r5, r6, r7, r8, r9, r10, r11, pc}
|
|
+|.endmacro
|
|
+|.else
|
|
+|.macro saveregs
|
|
+| push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
|
|
+| sub sp, sp, CFRAME_SPACE
|
|
+|.endmacro
|
|
+|.macro restoreregs_ret
|
|
+| add sp, sp, CFRAME_SPACE
|
|
+| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
|
|
+|.endmacro
|
|
+|.endif
|
|
+|
|
|
+|// Type definitions. Some of these are only used for documentation.
|
|
+|.type L, lua_State, LREG
|
|
+|.type GL, global_State
|
|
+|.type TVALUE, TValue
|
|
+|.type GCOBJ, GCobj
|
|
+|.type STR, GCstr
|
|
+|.type TAB, GCtab
|
|
+|.type LFUNC, GCfuncL
|
|
+|.type CFUNC, GCfuncC
|
|
+|.type PROTO, GCproto
|
|
+|.type UPVAL, GCupval
|
|
+|.type NODE, Node
|
|
+|.type NARGS8, int
|
|
+|.type TRACE, GCtrace
|
|
+|.type SBUF, SBuf
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Trap for not-yet-implemented parts.
|
|
+|.macro NYI; bkpt #0; .endmacro
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Access to frame relative to BASE.
|
|
+|.define FRAME_FUNC, #-8
|
|
+|.define FRAME_PC, #-4
|
|
+|
|
|
+|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro
|
|
+|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro
|
|
+|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro
|
|
+|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro
|
|
+|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro
|
|
+|
|
|
+|// Instruction fetch.
|
|
+|.macro ins_NEXT1
|
|
+| ldrb OP, [PC]
|
|
+|.endmacro
|
|
+|.macro ins_NEXT2
|
|
+| ldr INS, [PC], #4
|
|
+|.endmacro
|
|
+|// Instruction decode+dispatch.
|
|
+|.macro ins_NEXT3
|
|
+| ldr OP, [DISPATCH, OP, lsl #2]
|
|
+| decode_RA8 RA, INS
|
|
+| decode_RD RC, INS
|
|
+| bx OP
|
|
+|.endmacro
|
|
+|.macro ins_NEXT
|
|
+| ins_NEXT1
|
|
+| ins_NEXT2
|
|
+| ins_NEXT3
|
|
+|.endmacro
|
|
+|
|
|
+|// Instruction footer.
|
|
+|.if 1
|
|
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
|
|
+| .define ins_next, ins_NEXT
|
|
+| .define ins_next_, ins_NEXT
|
|
+| .define ins_next1, ins_NEXT1
|
|
+| .define ins_next2, ins_NEXT2
|
|
+| .define ins_next3, ins_NEXT3
|
|
+|.else
|
|
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
|
|
+| // Affects only certain kinds of benchmarks (and only with -j off).
|
|
+| .macro ins_next
|
|
+| b ->ins_next
|
|
+| .endmacro
|
|
+| .macro ins_next1
|
|
+| .endmacro
|
|
+| .macro ins_next2
|
|
+| .endmacro
|
|
+| .macro ins_next3
|
|
+| b ->ins_next
|
|
+| .endmacro
|
|
+| .macro ins_next_
|
|
+| ->ins_next:
|
|
+| ins_NEXT
|
|
+| .endmacro
|
|
+|.endif
|
|
+|
|
|
+|// Avoid register name substitution for field name.
|
|
+#define field_pc pc
|
|
+|
|
|
+|// Call decode and dispatch.
|
|
+|.macro ins_callt
|
|
+| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
|
|
+| ldr PC, LFUNC:CARG3->field_pc
|
|
+| ldrb OP, [PC] // STALL: load PC. early PC.
|
|
+| ldr INS, [PC], #4
|
|
+| ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP.
|
|
+| decode_RA8 RA, INS
|
|
+| add RA, RA, BASE
|
|
+| bx OP
|
|
+|.endmacro
|
|
+|
|
|
+|.macro ins_call
|
|
+| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
|
|
+| str PC, [BASE, FRAME_PC]
|
|
+| ins_callt // STALL: locked PC.
|
|
+|.endmacro
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+|
|
|
+|// Macros to test operand types.
|
|
+|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro
|
|
+|.macro checktpeq, reg, tp; it eq; cmneq reg, #-tp; .endmacro
|
|
+|.macro checktpne, reg, tp; it ne; cmnne reg, #-tp; .endmacro
|
|
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro
|
|
+|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro
|
|
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro
|
|
+|
|
|
+|// Assumes DISPATCH is relative to GL.
|
|
+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
|
|
+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
|
|
+|
|
|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
|
|
+|
|
|
+|.macro hotcheck, delta
|
|
+| lsr CARG1, PC, #1
|
|
+| and CARG1, CARG1, #126
|
|
+| sub CARG1, CARG1, #-GG_DISP2HOT
|
|
+| ldrh CARG2, [DISPATCH, CARG1]
|
|
+| subs CARG2, CARG2, #delta
|
|
+| strh CARG2, [DISPATCH, CARG1]
|
|
+|.endmacro
|
|
+|
|
|
+|.macro hotloop
|
|
+| hotcheck HOTCOUNT_LOOP
|
|
+| blo ->vm_hotloop
|
|
+|.endmacro
|
|
+|
|
|
+|.macro hotcall
|
|
+| hotcheck HOTCOUNT_CALL
|
|
+| blo ->vm_hotcall
|
|
+|.endmacro
|
|
+|
|
|
+|// Set current VM state.
|
|
+|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro
|
|
+|.macro st_vmstate, reg; push {r12}; sub r12, DISPATCH, #-DISPATCH_GL(vmstate); str reg, [r12]; pop {r12}; .endmacro
|
|
+|
|
|
+|// Move table write barrier back. Overwrites mark and tmp.
|
|
+|.macro barrierback, tab, mark, tmp
|
|
+| sub tmp, DISPATCH, #-DISPATCH_GL(gc.grayagain)
|
|
+| ldr tmp, [tmp]
|
|
+| str tmp, tab->gclist
|
|
+| sub tmp, DISPATCH, #-DISPATCH_GL(gc.grayagain)
|
|
+| bic mark, mark, #LJ_GC_BLACK // black2gray(tab)
|
|
+| str tab, [tmp]
|
|
+| strb mark, tab->marked
|
|
+|.endmacro
|
|
+|
|
|
+|.macro .IOS, a, b
|
|
+|.if IOS
|
|
+| a, b
|
|
+|.endif
|
|
+|.endmacro
|
|
+|
|
|
+|//-----------------------------------------------------------------------
|
|
+
|
|
+#if !LJ_DUALNUM
|
|
+#error "Only dual-number mode supported for ARM target"
|
|
+#endif
|
|
+
|
|
+/* Generate subroutines used by opcodes and other parts of the VM. */
|
|
+/* The .code_sub section should be last to help static branch prediction. */
|
|
+static void build_subroutines(BuildCtx *ctx)
|
|
+{
|
|
+ |.code_sub
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Return handling ----------------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_returnp:
|
|
+ | // See vm_return. Also: RB = previous base.
|
|
+ | tst PC, #FRAME_P
|
|
+ | beq ->cont_dispatch
|
|
+ |
|
|
+ | // Return from pcall or xpcall fast func.
|
|
+ | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
|
|
+ | mvn CARG2, #~LJ_TTRUE
|
|
+ | mov BASE, RB
|
|
+ | // Prepending may overwrite the pcall frame, so do it at the end.
|
|
+ | str CARG2, [RA, FRAME_PC] // Prepend true to results.
|
|
+ | sub RA, RA, #8
|
|
+ |
|
|
+ |->vm_returnc:
|
|
+ | adds RC, RC, #8 // RC = (nresults+1)*8.
|
|
+ | mov CRET1, #LUA_YIELD
|
|
+ | beq ->vm_unwind_c_eh
|
|
+ | str RC, SAVE_MULTRES
|
|
+ | ands CARG1, PC, #FRAME_TYPE
|
|
+ | beq ->BC_RET_Z // Handle regular return to Lua.
|
|
+ |
|
|
+ |->vm_return:
|
|
+ | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
|
|
+ | // CARG1 = PC & FRAME_TYPE
|
|
+ | bic RB, PC, #FRAME_TYPEP
|
|
+ | cmp CARG1, #FRAME_C
|
|
+ | sub RB, BASE, RB // RB = previous base.
|
|
+ | bne ->vm_returnp
|
|
+ |
|
|
+ | str RB, L->base
|
|
+ | ldr KBASE, SAVE_NRES
|
|
+ | mv_vmstate CARG4, C
|
|
+ | sub BASE, BASE, #8
|
|
+ | subs CARG3, RC, #8
|
|
+ | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8
|
|
+ | st_vmstate CARG4
|
|
+ | beq >2
|
|
+ |1:
|
|
+ | subs CARG3, CARG3, #8
|
|
+ | ldrd CARG1, CARG2, [RA], #8
|
|
+ | strd CARG1, CARG2, [BASE], #8
|
|
+ | bne <1
|
|
+ |2:
|
|
+ | cmp KBASE, RC // More/less results wanted?
|
|
+ | bne >6
|
|
+ |3:
|
|
+ | str BASE, L->top // Store new top.
|
|
+ |
|
|
+ |->vm_leave_cp:
|
|
+ | ldr RC, SAVE_CFRAME // Restore previous C frame.
|
|
+ | mov CRET1, #0 // Ok return status for vm_pcall.
|
|
+ | str RC, L->cframe
|
|
+ |
|
|
+ |->vm_leave_unw:
|
|
+ | restoreregs_ret
|
|
+ |
|
|
+ |6:
|
|
+ | blt >7 // Less results wanted?
|
|
+ | // More results wanted. Check stack size and fill up results with nil.
|
|
+ | ldr CARG3, L->maxstack
|
|
+ | mvn CARG2, #~LJ_TNIL
|
|
+ | cmp BASE, CARG3
|
|
+ | bhs >8
|
|
+ | str CARG2, [BASE, #4]
|
|
+ | add RC, RC, #8
|
|
+ | add BASE, BASE, #8
|
|
+ | b <2
|
|
+ |
|
|
+ |7: // Less results wanted.
|
|
+ | sub CARG1, RC, KBASE
|
|
+ | cmp KBASE, #0 // LUA_MULTRET+1 case?
|
|
+ | it ne
|
|
+ | subne BASE, BASE, CARG1 // Either keep top or shrink it.
|
|
+ | b <3
|
|
+ |
|
|
+ |8: // Corner case: need to grow stack for filling up results.
|
|
+ | // This can happen if:
|
|
+ | // - A C function grows the stack (a lot).
|
|
+ | // - The GC shrinks the stack in between.
|
|
+ | // - A return back from a lua_call() with (high) nresults adjustment.
|
|
+ | str BASE, L->top // Save current top held in BASE (yes).
|
|
+ | lsr CARG2, KBASE, #3
|
|
+ | mov CARG1, L
|
|
+ | bl extern lj_state_growstack // (lua_State *L, int n)
|
|
+ | ldr BASE, L->top // Need the (realloced) L->top in BASE.
|
|
+ | b <2
|
|
+ |
|
|
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
|
|
+ | // (void *cframe, int errcode)
|
|
+ | mov sp, CARG1
|
|
+ | mov CRET1, CARG2
|
|
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
|
|
+ | ldr L, SAVE_L
|
|
+ | mv_vmstate CARG4, C
|
|
+ | ldr GL:CARG3, L->glref
|
|
+ | str CARG4, GL:CARG3->vmstate
|
|
+ | b ->vm_leave_unw
|
|
+ |
|
|
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
|
|
+ | // (void *cframe)
|
|
+ | bic CARG1, CARG1, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated.
|
|
+ | mov sp, CARG1
|
|
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
|
|
+ | ldr L, SAVE_L
|
|
+ | mov MASKR8, #255
|
|
+ | mov RC, #16 // 2 results: false + error message.
|
|
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
|
|
+ | ldr BASE, L->base
|
|
+ | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
|
|
+ | mvn CARG1, #~LJ_TFALSE
|
|
+ | sub RA, BASE, #8 // Results start at BASE-8.
|
|
+ | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
|
|
+ | add DISPATCH, DISPATCH, #GG_G2DISP
|
|
+ | mv_vmstate CARG2, INTERP
|
|
+ | str CARG1, [BASE, #-4] // Prepend false to error message.
|
|
+ | st_vmstate CARG2
|
|
+ | b ->vm_returnc
|
|
+ |
|
|
+ |->vm_unwind_ext: // Complete external unwind.
|
|
+#if !LJ_NO_UNWIND
|
|
+ | push {r0, r1, r2, lr}
|
|
+ | bl extern _Unwind_Complete
|
|
+ | ldr r0, [sp]
|
|
+ | bl extern _Unwind_DeleteException
|
|
+ | pop {r0, r1, r2, lr}
|
|
+ | mov r0, r1
|
|
+ | bx r2
|
|
+#endif
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Grow stack for calls -----------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_growstack_c: // Grow stack for C function.
|
|
+ | // CARG1 = L
|
|
+ | mov CARG2, #LUA_MINSTACK
|
|
+ | b >2
|
|
+ |
|
|
+ |->vm_growstack_l: // Grow stack for Lua function.
|
|
+ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
|
|
+ | add RC, BASE, RC
|
|
+ | sub RA, RA, BASE
|
|
+ | mov CARG1, L
|
|
+ | str BASE, L->base
|
|
+ | add PC, PC, #4 // Must point after first instruction.
|
|
+ | str RC, L->top
|
|
+ | lsr CARG2, RA, #3
|
|
+ |2:
|
|
+ | // L->base = new base, L->top = top
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_state_growstack // (lua_State *L, int n)
|
|
+ | ldr BASE, L->base
|
|
+ | ldr RC, L->top
|
|
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
|
|
+ | sub NARGS8:RC, RC, BASE
|
|
+ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
|
|
+ | ins_callt // Just retry the call.
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Entry points into the assembler VM ---------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_resume: // Setup C frame and resume thread.
|
|
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
|
|
+ | saveregs
|
|
+ | mov L, CARG1
|
|
+ | ldr DISPATCH, L:CARG1->glref // Setup pointer to dispatch table.
|
|
+ | mov BASE, CARG2
|
|
+ | add DISPATCH, DISPATCH, #GG_G2DISP
|
|
+ | str L, SAVE_L
|
|
+ | mov PC, #FRAME_CP
|
|
+ | str CARG3, SAVE_NRES
|
|
+ | add CARG2, sp, #CFRAME_RESUME
|
|
+ | ldrb CARG1, L->status
|
|
+ | str CARG3, SAVE_ERRF
|
|
+ | str L, SAVE_PC // Any value outside of bytecode is ok.
|
|
+ | str CARG3, SAVE_CFRAME
|
|
+ | cmp CARG1, #0
|
|
+ | str CARG2, L->cframe
|
|
+ | beq >3
|
|
+ |
|
|
+ | // Resume after yield (like a return).
|
|
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
|
|
+ | mov RA, BASE
|
|
+ | ldr BASE, L->base
|
|
+ | ldr CARG1, L->top
|
|
+ | mov MASKR8, #255
|
|
+ | strb CARG3, L->status
|
|
+ | sub RC, CARG1, BASE
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
|
|
+ | mv_vmstate CARG2, INTERP
|
|
+ | add RC, RC, #8
|
|
+ | ands CARG1, PC, #FRAME_TYPE
|
|
+ | st_vmstate CARG2
|
|
+ | str RC, SAVE_MULTRES
|
|
+ | beq ->BC_RET_Z
|
|
+ | b ->vm_return
|
|
+ |
|
|
+ |->vm_pcall: // Setup protected C frame and enter VM.
|
|
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
|
|
+ | saveregs
|
|
+ | mov PC, #FRAME_CP
|
|
+ | str CARG4, SAVE_ERRF
|
|
+ | b >1
|
|
+ |
|
|
+ |->vm_call: // Setup C frame and enter VM.
|
|
+ | // (lua_State *L, TValue *base, int nres1)
|
|
+ | saveregs
|
|
+ | mov PC, #FRAME_C
|
|
+ |
|
|
+ |1: // Entry point for vm_pcall above (PC = ftype).
|
|
+ | ldr RC, L:CARG1->cframe
|
|
+ | str CARG3, SAVE_NRES
|
|
+ | mov L, CARG1
|
|
+ | str CARG1, SAVE_L
|
|
+ | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
|
|
+ | mov BASE, CARG2
|
|
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
|
|
+ | str RC, SAVE_CFRAME
|
|
+ | add DISPATCH, DISPATCH, #GG_G2DISP
|
|
+ | str sp, L->cframe // Add our C frame to cframe chain.
|
|
+ |
|
|
+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
|
|
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
|
|
+ | ldr RB, L->base // RB = old base (for vmeta_call).
|
|
+ | ldr CARG1, L->top
|
|
+ | mov MASKR8, #255
|
|
+ | add PC, PC, BASE
|
|
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
|
|
+ | sub PC, PC, RB // PC = frame delta + frame type
|
|
+ | mv_vmstate CARG2, INTERP
|
|
+ | sub NARGS8:RC, CARG1, BASE
|
|
+ | st_vmstate CARG2
|
|
+ |
|
|
+ |->vm_call_dispatch:
|
|
+ | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
|
|
+ | ldrd CARG3, CARG4, [BASE, FRAME_FUNC]
|
|
+ | checkfunc CARG4, ->vmeta_call
|
|
+ |
|
|
+ |->vm_call_dispatch_f:
|
|
+ | ins_call
|
|
+ | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
|
|
+ |
|
|
+ |->vm_cpcall: // Setup protected C frame, call C.
|
|
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
|
|
+ | saveregs
|
|
+ | mov L, CARG1
|
|
+ | ldr RA, L:CARG1->stack
|
|
+ | str CARG1, SAVE_L
|
|
+ | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
|
|
+ | ldr RB, L->top
|
|
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
|
|
+ | ldr RC, L->cframe
|
|
+ | add DISPATCH, DISPATCH, #GG_G2DISP
|
|
+ | sub RA, RA, RB // Compute -savestack(L, L->top).
|
|
+ | mov RB, #0
|
|
+ | str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
|
|
+ | str RB, SAVE_ERRF // No error function.
|
|
+ | str RC, SAVE_CFRAME
|
|
+ | str sp, L->cframe // Add our C frame to cframe chain.
|
|
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
|
|
+ | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
|
|
+ | movs BASE, CRET1
|
|
+ | mov PC, #FRAME_CP
|
|
+ | bne <3 // Else continue with the call.
|
|
+ | b ->vm_leave_cp // No base? Just remove C frame.
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Metamethod handling ------------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |//-- Continuation dispatch ----------------------------------------------
|
|
+ |
|
|
+ |->cont_dispatch:
|
|
+ | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
|
|
+ | ldr LFUNC:CARG3, [RB, FRAME_FUNC]
|
|
+ | ldr CARG1, [BASE, #-16] // Get continuation.
|
|
+ | mov CARG4, BASE
|
|
+ | mov BASE, RB // Restore caller BASE.
|
|
+ |.if FFI
|
|
+ | cmp CARG1, #1
|
|
+ |.endif
|
|
+ | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC].
|
|
+ | mvn INS, #~LJ_TNIL
|
|
+ | add CARG2, RA, RC
|
|
+ | str INS, [CARG2, #-4] // Ensure one valid arg.
|
|
+ |.if FFI
|
|
+ | bls >1
|
|
+ |.endif
|
|
+ | ldr CARG3, LFUNC:CARG3->field_pc
|
|
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
|
|
+ | // BASE = base, RA = resultptr, CARG4 = meta base
|
|
+ | bx CARG1
|
|
+ |
|
|
+ |.if FFI
|
|
+ |1:
|
|
+ | beq ->cont_ffi_callback // cont = 1: return from FFI callback.
|
|
+ | // cont = 0: tailcall from C function.
|
|
+ | sub CARG4, CARG4, #16
|
|
+ | sub RC, CARG4, BASE
|
|
+ | b ->vm_call_tail
|
|
+ |.endif
|
|
+ |
|
|
+ |->cont_cat: // RA = resultptr, CARG4 = meta base
|
|
+ | ldr INS, [PC, #-4]
|
|
+ | sub CARG2, CARG4, #16
|
|
+ | ldrd CARG3, CARG4, [RA]
|
|
+ | str BASE, L->base
|
|
+ | decode_RB8 RC, INS
|
|
+ | decode_RA8 RA, INS
|
|
+ | add CARG1, BASE, RC
|
|
+ | subs CARG1, CARG2, CARG1
|
|
+ | itt ne
|
|
+ | strdne CARG3, CARG4, [CARG2]
|
|
+ | movne CARG3, CARG1
|
|
+ | bne ->BC_CAT_Z
|
|
+ | strd_i CARG3, CARG4, BASE, RA
|
|
+ | b ->cont_nop
|
|
+ |
|
|
+ |//-- Table indexing metamethods -----------------------------------------
|
|
+ |
|
|
+ |->vmeta_tgets1:
|
|
+ | add CARG2, BASE, RB
|
|
+ | b >2
|
|
+ |
|
|
+ |->vmeta_tgets:
|
|
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv)
|
|
+ | mvn CARG4, #~LJ_TTAB
|
|
+ | str TAB:RB, [CARG2]
|
|
+ | str CARG4, [CARG2, #4]
|
|
+ |2:
|
|
+ | mvn CARG4, #~LJ_TSTR
|
|
+ | str STR:RC, TMPDlo
|
|
+ | str CARG4, TMPDhi
|
|
+ | mov CARG3, TMPDp
|
|
+ | b >1
|
|
+ |
|
|
+ |->vmeta_tgetb: // RC = index
|
|
+ | decode_RB8 RB, INS
|
|
+ | str RC, TMPDlo
|
|
+ | mvn CARG4, #~LJ_TISNUM
|
|
+ | add CARG2, BASE, RB
|
|
+ | str CARG4, TMPDhi
|
|
+ | mov CARG3, TMPDp
|
|
+ | b >1
|
|
+ |
|
|
+ |->vmeta_tgetv:
|
|
+ | add CARG2, BASE, RB
|
|
+ | add CARG3, BASE, RC
|
|
+ |1:
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
|
|
+ | // Returns TValue * (finished) or NULL (metamethod).
|
|
+ | .IOS ldr BASE, L->base
|
|
+ | cmp CRET1, #0
|
|
+ | beq >3
|
|
+ | ldrd CARG3, CARG4, [CRET1]
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG3, CARG4, BASE, RA
|
|
+ | ins_next3
|
|
+ |
|
|
+ |3: // Call __index metamethod.
|
|
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
|
|
+ | rsb CARG1, BASE, #FRAME_CONT
|
|
+ | ldr BASE, L->top
|
|
+ | mov NARGS8:RC, #16 // 2 args for func(t, k).
|
|
+ | str PC, [BASE, #-12] // [cont|PC]
|
|
+ | add PC, CARG1, BASE
|
|
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
|
|
+ | b ->vm_call_dispatch_f
|
|
+ |
|
|
+ |->vmeta_tgetr:
|
|
+ | .IOS mov RC, BASE
|
|
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
|
|
+ | // Returns cTValue * or NULL.
|
|
+ | .IOS mov BASE, RC
|
|
+ | cmp CRET1, #0
|
|
+ | ite ne
|
|
+ | ldrdne CARG1, CARG2, [CRET1]
|
|
+ | mvneq CARG2, #~LJ_TNIL
|
|
+ | b ->BC_TGETR_Z
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vmeta_tsets1:
|
|
+ | add CARG2, BASE, RB
|
|
+ | b >2
|
|
+ |
|
|
+ |->vmeta_tsets:
|
|
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv)
|
|
+ | mvn CARG4, #~LJ_TTAB
|
|
+ | str TAB:RB, [CARG2]
|
|
+ | str CARG4, [CARG2, #4]
|
|
+ |2:
|
|
+ | mvn CARG4, #~LJ_TSTR
|
|
+ | str STR:RC, TMPDlo
|
|
+ | str CARG4, TMPDhi
|
|
+ | mov CARG3, TMPDp
|
|
+ | b >1
|
|
+ |
|
|
+ |->vmeta_tsetb: // RC = index
|
|
+ | decode_RB8 RB, INS
|
|
+ | str RC, TMPDlo
|
|
+ | mvn CARG4, #~LJ_TISNUM
|
|
+ | add CARG2, BASE, RB
|
|
+ | str CARG4, TMPDhi
|
|
+ | mov CARG3, TMPDp
|
|
+ | b >1
|
|
+ |
|
|
+ |->vmeta_tsetv:
|
|
+ | add CARG2, BASE, RB
|
|
+ | add CARG3, BASE, RC
|
|
+ |1:
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
|
|
+ | // Returns TValue * (finished) or NULL (metamethod).
|
|
+ | .IOS ldr BASE, L->base
|
|
+ | cmp CRET1, #0
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | beq >3
|
|
+ | ins_next1
|
|
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
|
|
+ | strd CARG3, CARG4, [CRET1]
|
|
+ | ins_next2
|
|
+ | ins_next3
|
|
+ |
|
|
+ |3: // Call __newindex metamethod.
|
|
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
|
|
+ | rsb CARG1, BASE, #FRAME_CONT
|
|
+ | ldr BASE, L->top
|
|
+ | mov NARGS8:RC, #24 // 3 args for func(t, k, v).
|
|
+ | strd CARG3, CARG4, [BASE, #16] // Copy value to third argument.
|
|
+ | str PC, [BASE, #-12] // [cont|PC]
|
|
+ | add PC, CARG1, BASE
|
|
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
|
|
+ | b ->vm_call_dispatch_f
|
|
+ |
|
|
+ |->vmeta_tsetr:
|
|
+ | str BASE, L->base
|
|
+ | .IOS mov RC, BASE
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
|
|
+ | // Returns TValue *.
|
|
+ | .IOS mov BASE, RC
|
|
+ | b ->BC_TSETR_Z
|
|
+ |
|
|
+ |//-- Comparison metamethods ---------------------------------------------
|
|
+ |
|
|
+ |->vmeta_comp:
|
|
+ | mov CARG1, L
|
|
+ | sub PC, PC, #4
|
|
+ | mov CARG2, RA
|
|
+ | str BASE, L->base
|
|
+ | mov CARG3, RC
|
|
+ | str PC, SAVE_PC
|
|
+ | decode_OP CARG4, INS
|
|
+ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
|
|
+ | // Returns 0/1 or TValue * (metamethod).
|
|
+ |3:
|
|
+ | .IOS ldr BASE, L->base
|
|
+ | cmp CRET1, #1
|
|
+ | bhi ->vmeta_binop
|
|
+ |4:
|
|
+ | ldrh RB, [PC, #2]
|
|
+ | add PC, PC, #4
|
|
+ | add RB, PC, RB, lsl #2
|
|
+ | it hs
|
|
+ | subhs PC, RB, #0x20000
|
|
+ |->cont_nop:
|
|
+ | ins_next
|
|
+ |
|
|
+ |->cont_ra: // RA = resultptr
|
|
+ | ldr INS, [PC, #-4]
|
|
+ | ldrd CARG1, CARG2, [RA]
|
|
+ | decode_RA8 CARG3, INS
|
|
+ | strd_i CARG1, CARG2, BASE, CARG3
|
|
+ | b ->cont_nop
|
|
+ |
|
|
+ |->cont_condt: // RA = resultptr
|
|
+ | ldr CARG2, [RA, #4]
|
|
+ | mvn CARG1, #~LJ_TTRUE
|
|
+ | cmp CARG1, CARG2 // Branch if result is true.
|
|
+ | b <4
|
|
+ |
|
|
+ |->cont_condf: // RA = resultptr
|
|
+ | ldr CARG2, [RA, #4]
|
|
+ | checktp CARG2, LJ_TFALSE // Branch if result is false.
|
|
+ | b <4
|
|
+ |
|
|
+ |->vmeta_equal:
|
|
+ | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
|
|
+ | sub PC, PC, #4
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
|
|
+ | // Returns 0/1 or TValue * (metamethod).
|
|
+ | b <3
|
|
+ |
|
|
+ |->vmeta_equal_cd:
|
|
+ |.if FFI
|
|
+ | sub PC, PC, #4
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | mov CARG2, INS
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
|
|
+ | // Returns 0/1 or TValue * (metamethod).
|
|
+ | b <3
|
|
+ |.endif
|
|
+ |
|
|
+ |->vmeta_istype:
|
|
+ | sub PC, PC, #4
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | lsr CARG2, RA, #3
|
|
+ | mov CARG3, RC
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
|
|
+ | .IOS ldr BASE, L->base
|
|
+ | b ->cont_nop
|
|
+ |
|
|
+ |//-- Arithmetic metamethods ---------------------------------------------
|
|
+ |
|
|
+ |->vmeta_arith_vn:
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | add CARG3, BASE, RB
|
|
+ | add CARG4, KBASE, RC
|
|
+ | b >1
|
|
+ |
|
|
+ |->vmeta_arith_nv:
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | add CARG4, BASE, RB
|
|
+ | add CARG3, KBASE, RC
|
|
+ | b >1
|
|
+ |
|
|
+ |->vmeta_unm:
|
|
+ | ldr INS, [PC, #-8]
|
|
+ | sub PC, PC, #4
|
|
+ | add CARG3, BASE, RC
|
|
+ | add CARG4, BASE, RC
|
|
+ | b >1
|
|
+ |
|
|
+ |->vmeta_arith_vv:
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | add CARG3, BASE, RB
|
|
+ | add CARG4, BASE, RC
|
|
+ |1:
|
|
+ | decode_OP OP, INS
|
|
+ | add CARG2, BASE, RA
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | str OP, ARG5
|
|
+ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
|
|
+ | // Returns NULL (finished) or TValue * (metamethod).
|
|
+ | .IOS ldr BASE, L->base
|
|
+ | cmp CRET1, #0
|
|
+ | beq ->cont_nop
|
|
+ |
|
|
+ | // Call metamethod for binary op.
|
|
+ |->vmeta_binop:
|
|
+ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
|
|
+ | sub CARG2, CRET1, BASE
|
|
+ | str PC, [CRET1, #-12] // [cont|PC]
|
|
+ | add PC, CARG2, #FRAME_CONT
|
|
+ | mov BASE, CRET1
|
|
+ | mov NARGS8:RC, #16 // 2 args for func(o1, o2).
|
|
+ | b ->vm_call_dispatch
|
|
+ |
|
|
+ |->vmeta_len:
|
|
+ | add CARG2, BASE, RC
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_meta_len // (lua_State *L, TValue *o)
|
|
+ | // Returns NULL (retry) or TValue * (metamethod base).
|
|
+ | .IOS ldr BASE, L->base
|
|
+#if LJ_52
|
|
+ | cmp CRET1, #0
|
|
+ | bne ->vmeta_binop // Binop call for compatibility.
|
|
+ | ldr TAB:CARG1, [BASE, RC]
|
|
+ | b ->BC_LEN_Z
|
|
+#else
|
|
+ | b ->vmeta_binop // Binop call for compatibility.
|
|
+#endif
|
|
+ |
|
|
+ |//-- Call metamethod ----------------------------------------------------
|
|
+ |
|
|
+ |->vmeta_call: // Resolve and call __call metamethod.
|
|
+ | // RB = old base, BASE = new base, RC = nargs*8
|
|
+ | mov CARG1, L
|
|
+ | str RB, L->base // This is the callers base!
|
|
+ | sub CARG2, BASE, #8
|
|
+ | str PC, SAVE_PC
|
|
+ | add CARG3, BASE, NARGS8:RC
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
|
|
+ | .IOS mov BASE, RA
|
|
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
|
|
+ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
|
|
+ | ins_call
|
|
+ |
|
|
+ |->vmeta_callt: // Resolve __call for BC_CALLT.
|
|
+ | // BASE = old base, RA = new base, RC = nargs*8
|
|
+ | mov CARG1, L
|
|
+ | str BASE, L->base
|
|
+ | sub CARG2, RA, #8
|
|
+ | str PC, SAVE_PC
|
|
+ | add CARG3, RA, NARGS8:RC
|
|
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
|
|
+ | .IOS ldr BASE, L->base
|
|
+ | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here.
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
|
|
+ | b ->BC_CALLT2_Z
|
|
+ |
|
|
+ |//-- Argument coercion for 'for' statement ------------------------------
|
|
+ |
|
|
+ |->vmeta_for:
|
|
+ | mov CARG1, L
|
|
+ | str BASE, L->base
|
|
+ | mov CARG2, RA
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_meta_for // (lua_State *L, TValue *base)
|
|
+ | .IOS ldr BASE, L->base
|
|
+ |.if JIT
|
|
+ | ldrb OP, [PC, #-4]
|
|
+ |.endif
|
|
+ | ldr INS, [PC, #-4]
|
|
+ |.if JIT
|
|
+ | cmp OP, #BC_JFORI
|
|
+ |.endif
|
|
+ | decode_RA8 RA, INS
|
|
+ | decode_RD RC, INS
|
|
+ |.if JIT
|
|
+ | beq =>BC_JFORI
|
|
+ |.endif
|
|
+ | b =>BC_FORI
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Fast functions -----------------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |.macro .ffunc, name
|
|
+ |->ff_ .. name:
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_1, name
|
|
+ |->ff_ .. name:
|
|
+ | ldrd CARG1, CARG2, [BASE]
|
|
+ | cmp NARGS8:RC, #8
|
|
+ | blo ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_2, name
|
|
+ |->ff_ .. name:
|
|
+ | ldrd CARG1, CARG2, [BASE]
|
|
+ | ldrd CARG3, CARG4, [BASE, #8]
|
|
+ | cmp NARGS8:RC, #16
|
|
+ | blo ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_n, name
|
|
+ | .ffunc_1 name
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bhs ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_nn, name
|
|
+ | .ffunc_2 name
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it lo
|
|
+ | cmnlo CARG4, #-LJ_TISNUM
|
|
+ | bhs ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_d, name
|
|
+ | .ffunc name
|
|
+ | ldr CARG2, [BASE, #4]
|
|
+ | cmp NARGS8:RC, #8
|
|
+ | vldr d0, [BASE]
|
|
+ | blo ->fff_fallback
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bhs ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro .ffunc_dd, name
|
|
+ | .ffunc name
|
|
+ | ldr CARG2, [BASE, #4]
|
|
+ | ldr CARG4, [BASE, #12]
|
|
+ | cmp NARGS8:RC, #16
|
|
+ | vldr d0, [BASE]
|
|
+ | vldr d1, [BASE, #8]
|
|
+ | blo ->fff_fallback
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it lo
|
|
+ | cmnlo CARG4, #-LJ_TISNUM
|
|
+ | bhs ->fff_fallback
|
|
+ |.endmacro
|
|
+ |
|
|
+ |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
|
|
+ |.macro ffgccheck
|
|
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.total)
|
|
+ | ldr CARG1, [CARG1]
|
|
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(gc.threshold)
|
|
+ | ldr CARG2, [CARG2]
|
|
+ | cmp CARG1, CARG2
|
|
+ | it ge
|
|
+ | blge ->fff_gcstep
|
|
+ |.endmacro
|
|
+ |
|
|
+ |//-- Base library: checks -----------------------------------------------
|
|
+ |
|
|
+ |.ffunc_1 assert
|
|
+ | checktp CARG2, LJ_TTRUE
|
|
+ | bhi ->fff_fallback
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | strd CARG1, CARG2, [BASE, #-8]
|
|
+ | mov RB, BASE
|
|
+ | subs RA, NARGS8:RC, #8
|
|
+ | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8.
|
|
+ | beq ->fff_res // Done if exactly 1 argument.
|
|
+ |1:
|
|
+ | ldrd CARG1, CARG2, [RB, #8]
|
|
+ | subs RA, RA, #8
|
|
+ | strd CARG1, CARG2, [RB], #8
|
|
+ | bne <1
|
|
+ | b ->fff_res
|
|
+ |
|
|
+ |.ffunc type
|
|
+ | ldr CARG2, [BASE, #4]
|
|
+ | cmp NARGS8:RC, #8
|
|
+ | blo ->fff_fallback
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it lo
|
|
+ | mvnlo CARG2, #~LJ_TISNUM
|
|
+ | rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1
|
|
+ | lsl CARG4, CARG4, #3
|
|
+ | ldrd_i CARG1, CARG2, CFUNC:CARG3, CARG4
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |//-- Base library: getters and setters ---------------------------------
|
|
+ |
|
|
+ |.ffunc_1 getmetatable
|
|
+ | checktp CARG2, LJ_TTAB
|
|
+ | it ne
|
|
+ | cmnne CARG2, #-LJ_TUDATA
|
|
+ | bne >6
|
|
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
|
|
+ | ldr TAB:RB, TAB:CARG1->metatable
|
|
+ |2:
|
|
+ | mvn CARG2, #~LJ_TNIL
|
|
+ | ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])]
|
|
+ | cmp TAB:RB, #0
|
|
+ | beq ->fff_restv
|
|
+ | ldr CARG3, TAB:RB->hmask
|
|
+ | ldr CARG4, STR:RC->sid
|
|
+ | ldr NODE:INS, TAB:RB->node
|
|
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
|
|
+ | add CARG3, CARG3, CARG3, lsl #1
|
|
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
|
|
+ |3: // Rearranged logic, because we expect _not_ to find the key.
|
|
+ | ldrd CARG3, CARG4, NODE:INS->key // STALL: early NODE:INS.
|
|
+ | ldrd CARG1, CARG2, NODE:INS->val
|
|
+ | ldr NODE:INS, NODE:INS->next
|
|
+ | checktp CARG4, LJ_TSTR
|
|
+ | it eq
|
|
+ | cmpeq CARG3, STR:RC
|
|
+ | beq >5
|
|
+ | cmp NODE:INS, #0
|
|
+ | bne <3
|
|
+ |4:
|
|
+ | mov CARG1, RB // Use metatable as default result.
|
|
+ | mvn CARG2, #~LJ_TTAB
|
|
+ | b ->fff_restv
|
|
+ |5:
|
|
+ | checktp CARG2, LJ_TNIL
|
|
+ | bne ->fff_restv
|
|
+ | b <4
|
|
+ |
|
|
+ |6:
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | ite hs
|
|
+ | mvnhs CARG2, CARG2
|
|
+ | movlo CARG2, #~LJ_TISNUM
|
|
+ | add CARG4, DISPATCH, CARG2, lsl #2
|
|
+ | ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])]
|
|
+ | b <2
|
|
+ |
|
|
+ |.ffunc_2 setmetatable
|
|
+ | // Fast path: no mt for table yet and not clearing the mt.
|
|
+ | checktp CARG2, LJ_TTAB
|
|
+ | it eq
|
|
+ | ldreq TAB:RB, TAB:CARG1->metatable
|
|
+ | checktpeq CARG4, LJ_TTAB
|
|
+ | it eq
|
|
+ | ldrbeq CARG4, TAB:CARG1->marked
|
|
+ | it eq
|
|
+ | cmpeq TAB:RB, #0
|
|
+ | bne ->fff_fallback
|
|
+ | tst CARG4, #LJ_GC_BLACK // isblack(table)
|
|
+ | str TAB:CARG3, TAB:CARG1->metatable
|
|
+ | beq ->fff_restv
|
|
+ | barrierback TAB:CARG1, CARG4, CARG3
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |.ffunc rawget
|
|
+ | ldrd CARG3, CARG4, [BASE]
|
|
+ | cmp NARGS8:RC, #16
|
|
+ | blo ->fff_fallback
|
|
+ | mov CARG2, CARG3
|
|
+ | checktab CARG4, ->fff_fallback
|
|
+ | mov CARG1, L
|
|
+ | add CARG3, BASE, #8
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
|
|
+ | // Returns cTValue *.
|
|
+ | .IOS mov BASE, RA
|
|
+ | ldrd CARG1, CARG2, [CRET1]
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |//-- Base library: conversions ------------------------------------------
|
|
+ |
|
|
+ |.ffunc tonumber
|
|
+ | // Only handles the number case inline (without a base argument).
|
|
+ | ldrd CARG1, CARG2, [BASE]
|
|
+ | cmp NARGS8:RC, #8
|
|
+ | bne ->fff_fallback
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bls ->fff_restv
|
|
+ | b ->fff_fallback
|
|
+ |
|
|
+ |.ffunc_1 tostring
|
|
+ | // Only handles the string or number case inline.
|
|
+ | checktp CARG2, LJ_TSTR
|
|
+ | // A __tostring method in the string base metatable is ignored.
|
|
+ | beq ->fff_restv
|
|
+ | // Handle numbers inline, unless a number base metatable is present.
|
|
+ | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])]
|
|
+ | str BASE, L->base
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it ls
|
|
+ | cmpls CARG4, #0
|
|
+ | str PC, SAVE_PC // Redundant (but a defined value).
|
|
+ | bhi ->fff_fallback
|
|
+ | ffgccheck
|
|
+ | mov CARG1, L
|
|
+ | mov CARG2, BASE
|
|
+ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
|
|
+ | // Returns GCstr *.
|
|
+ | ldr BASE, L->base
|
|
+ | mvn CARG2, #~LJ_TSTR
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |//-- Base library: iterators -------------------------------------------
|
|
+ |
|
|
+ |.ffunc_1 next
|
|
+ | mvn CARG4, #~LJ_TNIL
|
|
+ | checktab CARG2, ->fff_fallback
|
|
+ | strd_i CARG3, CARG4, BASE, NARGS8:RC // Set missing 2nd arg to nil.
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | add CARG2, BASE, #8
|
|
+ | sub CARG3, BASE, #8
|
|
+ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
|
|
+ | // Returns 1=found, 0=end, -1=error.
|
|
+ | .IOS ldr BASE, L->base
|
|
+ | cmp CRET1, #0
|
|
+ | mov RC, #(2+1)*8
|
|
+ | bgt ->fff_res // Found key/value.
|
|
+ | bmi ->fff_fallback // Invalid key.
|
|
+ | // End of traversal: return nil.
|
|
+ | mvn CRET2, #~LJ_TNIL
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |.ffunc_1 pairs
|
|
+ | checktab CARG2, ->fff_fallback
|
|
+#if LJ_52
|
|
+ | ldr TAB:RB, TAB:CARG1->metatable
|
|
+#endif
|
|
+ | ldrd CFUNC:CARG3, CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+#if LJ_52
|
|
+ | cmp TAB:RB, #0
|
|
+ | bne ->fff_fallback
|
|
+#endif
|
|
+ | mvn CARG2, #~LJ_TNIL
|
|
+ | mov RC, #(3+1)*8
|
|
+ | strd CFUNC:CARG3, CFUNC:CARG4, [BASE, #-8]
|
|
+ | str CARG2, [BASE, #12]
|
|
+ | b ->fff_res
|
|
+ |
|
|
+ |.ffunc_2 ipairs_aux
|
|
+ | checktp CARG2, LJ_TTAB
|
|
+ | checktpeq CARG4, LJ_TISNUM
|
|
+ | bne ->fff_fallback
|
|
+ | ldr RB, TAB:CARG1->asize
|
|
+ | ldr RC, TAB:CARG1->array
|
|
+ | add CARG3, CARG3, #1
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | cmp CARG3, RB
|
|
+ | add RC, RC, CARG3, lsl #3
|
|
+ | strd CARG3, CARG4, [BASE, #-8]
|
|
+ | it lo
|
|
+ | ldrdlo CARG1, CARG2, [RC]
|
|
+ | mov RC, #(0+1)*8
|
|
+ | bhs >2 // Not in array part?
|
|
+ |1:
|
|
+ | checktp CARG2, LJ_TNIL
|
|
+ | itt ne
|
|
+ | movne RC, #(2+1)*8
|
|
+ | strdne CARG1, CARG2, [BASE]
|
|
+ | b ->fff_res
|
|
+ |2: // Check for empty hash part first. Otherwise call C function.
|
|
+ | ldr RB, TAB:CARG1->hmask
|
|
+ | mov CARG2, CARG3
|
|
+ | cmp RB, #0
|
|
+ | beq ->fff_res
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
|
|
+ | // Returns cTValue * or NULL.
|
|
+ | .IOS mov BASE, RA
|
|
+ | cmp CRET1, #0
|
|
+ | beq ->fff_res
|
|
+ | ldrd CARG1, CARG2, [CRET1]
|
|
+ | b <1
|
|
+ |
|
|
+ |.ffunc_1 ipairs
|
|
+ | checktab CARG2, ->fff_fallback
|
|
+#if LJ_52
|
|
+ | ldr TAB:RB, TAB:CARG1->metatable
|
|
+#endif
|
|
+ | ldrd CFUNC:CARG3, CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+#if LJ_52
|
|
+ | cmp TAB:RB, #0
|
|
+ | bne ->fff_fallback
|
|
+#endif
|
|
+ | mov CARG1, #0
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | mov RC, #(3+1)*8
|
|
+ | strd CFUNC:CARG3, CFUNC:CARG4, [BASE, #-8]
|
|
+ | strd CARG1, CARG2, [BASE, #8]
|
|
+ | b ->fff_res
|
|
+ |
|
|
+ |//-- Base library: catch errors ----------------------------------------
|
|
+ |
|
|
+ |.ffunc pcall
|
|
+ | sub RA, DISPATCH, #-DISPATCH_GL(hookmask)
|
|
+ | ldrb RA, [RA]
|
|
+ | cmp NARGS8:RC, #8
|
|
+ | blo ->fff_fallback
|
|
+ | tst RA, #HOOK_ACTIVE // Remember active hook before pcall.
|
|
+ | mov RB, BASE
|
|
+ | add BASE, BASE, #8
|
|
+ | ite eq
|
|
+ | moveq PC, #8+FRAME_PCALL
|
|
+ | movne PC, #8+FRAME_PCALLH
|
|
+ | sub NARGS8:RC, NARGS8:RC, #8
|
|
+ | b ->vm_call_dispatch
|
|
+ |
|
|
+ |.ffunc_2 xpcall
|
|
+ | sub RA, DISPATCH, #-DISPATCH_GL(hookmask)
|
|
+ | ldrb RA, [RA]
|
|
+ | checkfunc CARG4, ->fff_fallback // Traceback must be a function.
|
|
+ | mov RB, BASE
|
|
+ | strd CARG1, CARG2, [BASE, #8] // Swap function and traceback.
|
|
+ | strd CARG3, CARG4, [BASE]
|
|
+ | tst RA, #HOOK_ACTIVE // Remember active hook before pcall.
|
|
+ | add BASE, BASE, #16
|
|
+ | ite eq
|
|
+ | moveq PC, #16+FRAME_PCALL
|
|
+ | movne PC, #16+FRAME_PCALLH
|
|
+ | sub NARGS8:RC, NARGS8:RC, #16
|
|
+ | b ->vm_call_dispatch
|
|
+ |
|
|
+ |//-- Coroutine library --------------------------------------------------
|
|
+ |
|
|
+ |.macro coroutine_resume_wrap, resume
|
|
+ |.if resume
|
|
+ |.ffunc_1 coroutine_resume
|
|
+ | checktp CARG2, LJ_TTHREAD
|
|
+ | bne ->fff_fallback
|
|
+ |.else
|
|
+ |.ffunc coroutine_wrap_aux
|
|
+ | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
|
|
+ |.endif
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | str BASE, L->base
|
|
+ | ldr CARG2, L:CARG1->top
|
|
+ | ldrb RA, L:CARG1->status
|
|
+ | ldr RB, L:CARG1->base
|
|
+ | add CARG3, CARG2, NARGS8:RC
|
|
+ | add CARG4, CARG2, RA
|
|
+ | str PC, SAVE_PC
|
|
+ | cmp CARG4, RB
|
|
+ | beq ->fff_fallback
|
|
+ | ldr CARG4, L:CARG1->maxstack
|
|
+ | ldr RB, L:CARG1->cframe
|
|
+ | cmp RA, #LUA_YIELD
|
|
+ | it ls
|
|
+ | cmpls CARG3, CARG4
|
|
+ | it ls
|
|
+ | cmpls RB, #0
|
|
+ | bhi ->fff_fallback
|
|
+ |1:
|
|
+ |.if resume
|
|
+ | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC.
|
|
+ | add BASE, BASE, #8
|
|
+ | sub NARGS8:RC, NARGS8:RC, #8
|
|
+ |.endif
|
|
+ | str CARG3, L:CARG1->top
|
|
+ | str BASE, L->top
|
|
+ |2: // Move args to coroutine.
|
|
+ | ldrd_i CARG3, CARG4, BASE, RB
|
|
+ | cmp RB, NARGS8:RC
|
|
+ | strdne_i CARG3, CARG4, CARG2, RB
|
|
+ | add RB, RB, #8
|
|
+ | bne <2
|
|
+ |
|
|
+ | mov CARG3, #0
|
|
+ | mov L:RA, L:CARG1
|
|
+ | mov CARG4, #0
|
|
+ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
|
|
+ | // Returns thread status.
|
|
+ |4:
|
|
+ | ldr CARG3, L:RA->base
|
|
+ | mv_vmstate CARG2, INTERP
|
|
+ | ldr CARG4, L:RA->top
|
|
+ | cmp CRET1, #LUA_YIELD
|
|
+ | ldr BASE, L->base
|
|
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
|
|
+ | st_vmstate CARG2
|
|
+ | bhi >8
|
|
+ | subs RC, CARG4, CARG3
|
|
+ | ldr CARG1, L->maxstack
|
|
+ | add CARG2, BASE, RC
|
|
+ | beq >6 // No results?
|
|
+ | cmp CARG2, CARG1
|
|
+ | mov RB, #0
|
|
+ | bhi >9 // Need to grow stack?
|
|
+ |
|
|
+ | sub CARG4, RC, #8
|
|
+ | str CARG3, L:RA->top // Clear coroutine stack.
|
|
+ |5: // Move results from coroutine.
|
|
+ | ldrd_i CARG1, CARG2, CARG3, RB
|
|
+ | cmp RB, CARG4
|
|
+ | strd_i CARG1, CARG2, BASE, RB
|
|
+ | add RB, RB, #8
|
|
+ | bne <5
|
|
+ |6:
|
|
+ |.if resume
|
|
+ | mvn CARG3, #~LJ_TTRUE
|
|
+ | add RC, RC, #16
|
|
+ |7:
|
|
+ | str CARG3, [BASE, #-4] // Prepend true/false to results.
|
|
+ | sub RA, BASE, #8
|
|
+ |.else
|
|
+ | mov RA, BASE
|
|
+ | add RC, RC, #8
|
|
+ |.endif
|
|
+ | ands CARG1, PC, #FRAME_TYPE
|
|
+ | str PC, SAVE_PC
|
|
+ | str RC, SAVE_MULTRES
|
|
+ | beq ->BC_RET_Z
|
|
+ | b ->vm_return
|
|
+ |
|
|
+ |8: // Coroutine returned with error (at co->top-1).
|
|
+ |.if resume
|
|
+ | ldrd CARG1, CARG2, [CARG4, #-8]!
|
|
+ | mvn CARG3, #~LJ_TFALSE
|
|
+ | mov RC, #(2+1)*8
|
|
+ | str CARG4, L:RA->top // Remove error from coroutine stack.
|
|
+ | strd CARG1, CARG2, [BASE] // Copy error message.
|
|
+ | b <7
|
|
+ |.else
|
|
+ | mov CARG1, L
|
|
+ | mov CARG2, L:RA
|
|
+ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
|
|
+ | // Never returns.
|
|
+ |.endif
|
|
+ |
|
|
+ |9: // Handle stack expansion on return from yield.
|
|
+ | mov CARG1, L
|
|
+ | lsr CARG2, RC, #3
|
|
+ | bl extern lj_state_growstack // (lua_State *L, int n)
|
|
+ | mov CRET1, #0
|
|
+ | b <4
|
|
+ |.endmacro
|
|
+ |
|
|
+ | coroutine_resume_wrap 1 // coroutine.resume
|
|
+ | coroutine_resume_wrap 0 // coroutine.wrap
|
|
+ |
|
|
+ |.ffunc coroutine_yield
|
|
+ | ldr CARG1, L->cframe
|
|
+ | add CARG2, BASE, NARGS8:RC
|
|
+ | str BASE, L->base
|
|
+ | tst CARG1, #CFRAME_RESUME
|
|
+ | str CARG2, L->top
|
|
+ | mov CRET1, #LUA_YIELD
|
|
+ | mov CARG3, #0
|
|
+ | beq ->fff_fallback
|
|
+ | str CARG3, L->cframe
|
|
+ | strb CRET1, L->status
|
|
+ | b ->vm_leave_unw
|
|
+ |
|
|
+ |//-- Math library -------------------------------------------------------
|
|
+ |
|
|
+ |.macro math_round, func
|
|
+ | .ffunc_1 math_ .. func
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | beq ->fff_restv
|
|
+ | bhi ->fff_fallback
|
|
+ | // Round FP value and normalize result.
|
|
+ | lsl CARG3, CARG2, #1
|
|
+ | adds RB, CARG3, #0x00200000
|
|
+ | bpl >2 // |x| < 1?
|
|
+ | mvn CARG4, #0x3e0
|
|
+ | subs RB, CARG4, RB, asr #21
|
|
+ | lsl CARG4, CARG2, #11
|
|
+ | lsl CARG3, CARG1, #11
|
|
+ | orr CARG4, CARG4, #0x80000000
|
|
+ | rsb INS, RB, #32
|
|
+ | orr CARG4, CARG4, CARG1, lsr #21
|
|
+ | bls >3 // |x| >= 2^31?
|
|
+ | lsl CARG1, CARG4, INS
|
|
+ | orr CARG3, CARG3, CARG1
|
|
+ | lsr CARG1, CARG4, RB
|
|
+ |.if "func" == "floor"
|
|
+ | tst CARG3, CARG2, asr #31
|
|
+ | it ne
|
|
+ | addne CARG1, CARG1, #1
|
|
+ |.else
|
|
+ | bics CARG3, CARG3, CARG2, asr #31
|
|
+ | it ne
|
|
+ | addsne CARG1, CARG1, #1
|
|
+ | it vs
|
|
+ | ldrdvs CARG1, CARG2, >9
|
|
+ | bvs ->fff_restv
|
|
+ |.endif
|
|
+ | cmp CARG2, #0
|
|
+ | it lt
|
|
+ | rsblt CARG1, CARG1, #0
|
|
+ |1:
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |2: // |x| < 1
|
|
+ | bcs ->fff_restv // |x| is not finite.
|
|
+ | orr CARG3, CARG3, CARG1 // ztest = abs(hi) | lo
|
|
+ |.if "func" == "floor"
|
|
+ | tst CARG3, CARG2, asr #31 // return (ztest & sign) == 0 ? 0 : -1
|
|
+ | ite eq
|
|
+ | moveq CARG1, #0
|
|
+ | mvnne CARG1, #0
|
|
+ |.else
|
|
+ | bics CARG3, CARG3, CARG2, asr #31 // return (ztest & ~sign) == 0 ? 0 : 1
|
|
+ | ite eq
|
|
+ | moveq CARG1, #0
|
|
+ | movne CARG1, #1
|
|
+ |.endif
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |3: // |x| >= 2^31. Check for x == -(2^31).
|
|
+ | it eq
|
|
+ | cmpeq CARG4, #0x80000000
|
|
+ |.if "func" == "floor"
|
|
+ | it eq
|
|
+ | cmpeq CARG3, #0
|
|
+ |.endif
|
|
+ | bne >4
|
|
+ | cmp CARG2, #0
|
|
+ | it mi
|
|
+ | movmi CARG1, #0x80000000
|
|
+ | bmi <1
|
|
+ |4:
|
|
+ | bl ->vm_..func.._sf
|
|
+ | b ->fff_restv
|
|
+ |.endmacro
|
|
+ |
|
|
+ | math_round floor
|
|
+ | math_round ceil
|
|
+ |
|
|
+ |.align 8
|
|
+ |9:
|
|
+ | .long 0x00000000, 0x000041e0 // 2^31. jturnsek: swaped halfwords!!!
|
|
+ |
|
|
+ |.ffunc_1 math_abs
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bhi ->fff_fallback
|
|
+ | it ne
|
|
+ | bicne CARG2, CARG2, #0x80000000
|
|
+ | bne ->fff_restv
|
|
+ | cmp CARG1, #0
|
|
+ | it lt
|
|
+ | rsbslt CARG1, CARG1, #0
|
|
+ | it vs
|
|
+ | ldrdvs CARG1, CARG2, <9
|
|
+ | // Fallthrough.
|
|
+ |
|
|
+ |->fff_restv:
|
|
+ | // CARG1, CARG2 = TValue result.
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | strd CARG1, CARG2, [BASE, #-8]
|
|
+ |->fff_res1:
|
|
+ | // PC = return.
|
|
+ | mov RC, #(1+1)*8
|
|
+ |->fff_res:
|
|
+ | // RC = (nresults+1)*8, PC = return.
|
|
+ | ands CARG1, PC, #FRAME_TYPE
|
|
+ | it eq
|
|
+ | ldreq INS, [PC, #-4]
|
|
+ | str RC, SAVE_MULTRES
|
|
+ | sub RA, BASE, #8
|
|
+ | bne ->vm_return
|
|
+ | decode_RB8 RB, INS
|
|
+ |5:
|
|
+ | cmp RB, RC // More results expected?
|
|
+ | bhi >6
|
|
+ | decode_RA8 CARG1, INS
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
|
|
+ | sub BASE, RA, CARG1
|
|
+ | ins_next3
|
|
+ |
|
|
+ |6: // Fill up results with nil.
|
|
+ | add CARG2, RA, RC
|
|
+ | mvn CARG1, #~LJ_TNIL
|
|
+ | add RC, RC, #8
|
|
+ | str CARG1, [CARG2, #-4]
|
|
+ | b <5
|
|
+ |
|
|
+ |.macro math_extern, func
|
|
+ |.if HFABI
|
|
+ | .ffunc_d math_ .. func
|
|
+ |.else
|
|
+ | .ffunc_n math_ .. func
|
|
+ |.endif
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern func
|
|
+ | .IOS mov BASE, RA
|
|
+ |.if HFABI
|
|
+ | b ->fff_resd
|
|
+ |.else
|
|
+ | b ->fff_restv
|
|
+ |.endif
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro math_extern2, func
|
|
+ |.if HFABI
|
|
+ | .ffunc_dd math_ .. func
|
|
+ |.else
|
|
+ | .ffunc_nn math_ .. func
|
|
+ |.endif
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern func
|
|
+ | .IOS mov BASE, RA
|
|
+ |.if HFABI
|
|
+ | b ->fff_resd
|
|
+ |.else
|
|
+ | b ->fff_restv
|
|
+ |.endif
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.if FPU
|
|
+ | .ffunc_d math_sqrt
|
|
+ | vsqrt.f64 d0, d0
|
|
+ |->fff_resd:
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | vstr d0, [BASE, #-8]
|
|
+ | b ->fff_res1
|
|
+ |.else
|
|
+ | math_extern sqrt
|
|
+ |.endif
|
|
+ |
|
|
+ |.ffunc math_log
|
|
+ |.if HFABI
|
|
+ | ldr CARG2, [BASE, #4]
|
|
+ | cmp NARGS8:RC, #8 // Need exactly 1 argument.
|
|
+ | vldr d0, [BASE]
|
|
+ | bne ->fff_fallback
|
|
+ |.else
|
|
+ | ldrd CARG1, CARG2, [BASE]
|
|
+ | cmp NARGS8:RC, #8 // Need exactly 1 argument.
|
|
+ | bne ->fff_fallback
|
|
+ |.endif
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bhs ->fff_fallback
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern log
|
|
+ | .IOS mov BASE, RA
|
|
+ |.if HFABI
|
|
+ | b ->fff_resd
|
|
+ |.else
|
|
+ | b ->fff_restv
|
|
+ |.endif
|
|
+ |
|
|
+ | math_extern log10
|
|
+ | math_extern exp
|
|
+ | math_extern sin
|
|
+ | math_extern cos
|
|
+ | math_extern tan
|
|
+ | math_extern asin
|
|
+ | math_extern acos
|
|
+ | math_extern atan
|
|
+ | math_extern sinh
|
|
+ | math_extern cosh
|
|
+ | math_extern tanh
|
|
+ | math_extern2 pow
|
|
+ | math_extern2 atan2
|
|
+ | math_extern2 fmod
|
|
+ |
|
|
+ |.if HFABI
|
|
+ | .ffunc math_ldexp
|
|
+ | ldr CARG4, [BASE, #4]
|
|
+ | ldrd CARG1, CARG2, [BASE, #8]
|
|
+ | cmp NARGS8:RC, #16
|
|
+ | blo ->fff_fallback
|
|
+ | vldr d0, [BASE]
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | bhs ->fff_fallback
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bne ->fff_fallback
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern ldexp // (double x, int exp)
|
|
+ | .IOS mov BASE, RA
|
|
+ | b ->fff_resd
|
|
+ |.else
|
|
+ |.ffunc_2 math_ldexp
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bhs ->fff_fallback
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | bne ->fff_fallback
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern ldexp // (double x, int exp)
|
|
+ | .IOS mov BASE, RA
|
|
+ | b ->fff_restv
|
|
+ |.endif
|
|
+ |
|
|
+ |.if HFABI
|
|
+ |.ffunc_d math_frexp
|
|
+ | mov CARG1, sp
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern frexp
|
|
+ | .IOS mov BASE, RA
|
|
+ | ldr CARG3, [sp]
|
|
+ | mvn CARG4, #~LJ_TISNUM
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | vstr d0, [BASE, #-8]
|
|
+ | mov RC, #(2+1)*8
|
|
+ | strd CARG3, CARG4, [BASE]
|
|
+ | b ->fff_res
|
|
+ |.else
|
|
+ |.ffunc_n math_frexp
|
|
+ | mov CARG3, sp
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern frexp
|
|
+ | .IOS mov BASE, RA
|
|
+ | ldr CARG3, [sp]
|
|
+ | mvn CARG4, #~LJ_TISNUM
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | strd CARG1, CARG2, [BASE, #-8]
|
|
+ | mov RC, #(2+1)*8
|
|
+ | strd CARG3, CARG4, [BASE]
|
|
+ | b ->fff_res
|
|
+ |.endif
|
|
+ |
|
|
+ |.if HFABI
|
|
+ |.ffunc_d math_modf
|
|
+ | sub CARG1, BASE, #8
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern modf
|
|
+ | .IOS mov BASE, RA
|
|
+ | mov RC, #(2+1)*8
|
|
+ | vstr d0, [BASE]
|
|
+ | b ->fff_res
|
|
+ |.else
|
|
+ |.ffunc_n math_modf
|
|
+ | sub CARG3, BASE, #8
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | .IOS mov RA, BASE
|
|
+ | bl extern modf
|
|
+ | .IOS mov BASE, RA
|
|
+ | mov RC, #(2+1)*8
|
|
+ | strd CARG1, CARG2, [BASE]
|
|
+ | b ->fff_res
|
|
+ |.endif
|
|
+ |
|
|
+ |.macro math_minmax, name, cond, fcond
|
|
+ |.if FPU
|
|
+ | .ffunc_1 name
|
|
+ | add RB, BASE, RC
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | add RA, BASE, #8
|
|
+ | bne >4
|
|
+ |1: // Handle integers.
|
|
+ | ldrd CARG3, CARG4, [RA]
|
|
+ | cmp RA, RB
|
|
+ | bhs ->fff_restv
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | bne >3
|
|
+ | cmp CARG1, CARG3
|
|
+ | add RA, RA, #8
|
|
+ | it cond
|
|
+ | mov..cond CARG1, CARG3
|
|
+ | b <1
|
|
+ |3: // Convert intermediate result to number and continue below.
|
|
+ | vmov s4, CARG1
|
|
+ | bhi ->fff_fallback
|
|
+ | vldr d1, [RA]
|
|
+ | vcvt.f64.s32 d0, s4
|
|
+ | b >6
|
|
+ |
|
|
+ |4:
|
|
+ | vldr d0, [BASE]
|
|
+ | bhi ->fff_fallback
|
|
+ |5: // Handle numbers.
|
|
+ | ldrd CARG3, CARG4, [RA]
|
|
+ | vldr d1, [RA]
|
|
+ | cmp RA, RB
|
|
+ | bhs ->fff_resd
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | bhs >7
|
|
+ |6:
|
|
+ | vcmp.f64 d0, d1
|
|
+ | vmrs
|
|
+ | add RA, RA, #8
|
|
+ | it fcond
|
|
+ | vmov..fcond.f64 d0, d1
|
|
+ | b <5
|
|
+ |7: // Convert integer to number and continue above.
|
|
+ | vmov s4, CARG3
|
|
+ | bhi ->fff_fallback
|
|
+ | vcvt.f64.s32 d1, s4
|
|
+ | b <6
|
|
+ |
|
|
+ |.else
|
|
+ |
|
|
+ | .ffunc_1 name
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | mov RA, #8
|
|
+ | bne >4
|
|
+ |1: // Handle integers.
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | cmp RA, RC
|
|
+ | bhs ->fff_restv
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | bne >3
|
|
+ | cmp CARG1, CARG3
|
|
+ | add RA, RA, #8
|
|
+ | it cond
|
|
+ | mov..cond CARG1, CARG3
|
|
+ | b <1
|
|
+ |3: // Convert intermediate result to number and continue below.
|
|
+ | bhi ->fff_fallback
|
|
+ | bl extern __aeabi_i2d
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | b >6
|
|
+ |
|
|
+ |4:
|
|
+ | bhi ->fff_fallback
|
|
+ |5: // Handle numbers.
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | cmp RA, RC
|
|
+ | bhs ->fff_restv
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | bhs >7
|
|
+ |6:
|
|
+ | bl extern __aeabi_cdcmple
|
|
+ | add RA, RA, #8
|
|
+ | mov..fcond CARG1, CARG3
|
|
+ | mov..fcond CARG2, CARG4
|
|
+ | b <5
|
|
+ |7: // Convert integer to number and continue above.
|
|
+ | bhi ->fff_fallback
|
|
+ | strd CARG1, CARG2, TMPD
|
|
+ | mov CARG1, CARG3
|
|
+ | bl extern __aeabi_i2d
|
|
+ | ldrd CARG3, CARG4, TMPD
|
|
+ | b <6
|
|
+ |.endif
|
|
+ |.endmacro
|
|
+ |
|
|
+ | math_minmax math_min, gt, pl
|
|
+ | math_minmax math_max, lt, le
|
|
+ |
|
|
+ |//-- String library -----------------------------------------------------
|
|
+ |
|
|
+ |.ffunc string_byte // Only handle the 1-arg case here.
|
|
+ | ldrd CARG1, CARG2, [BASE]
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | cmp NARGS8:RC, #8
|
|
+ | checktpeq CARG2, LJ_TSTR // Need exactly 1 argument.
|
|
+ | bne ->fff_fallback
|
|
+ | ldr CARG3, STR:CARG1->len
|
|
+ | ldrb CARG1, STR:CARG1[1] // Access is always ok (NUL at end).
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | cmp CARG3, #0
|
|
+ | ite eq
|
|
+ | moveq RC, #(0+1)*8
|
|
+ | movne RC, #(1+1)*8
|
|
+ | strd CARG1, CARG2, [BASE, #-8]
|
|
+ | b ->fff_res
|
|
+ |
|
|
+ |.ffunc string_char // Only handle the 1-arg case here.
|
|
+ | ffgccheck
|
|
+ | ldrd CARG1, CARG2, [BASE]
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | cmp NARGS8:RC, #8 // Need exactly 1 argument.
|
|
+ | checktpeq CARG2, LJ_TISNUM
|
|
+ | it eq
|
|
+ | bicseq CARG4, CARG1, #255
|
|
+ | mov CARG3, #1
|
|
+ | bne ->fff_fallback
|
|
+ | str CARG1, TMPD
|
|
+ | mov CARG2, TMPDp // Points to stack. Little-endian.
|
|
+ |->fff_newstr:
|
|
+ | // CARG2 = str, CARG3 = len.
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
|
|
+ |->fff_resstr:
|
|
+ | // Returns GCstr *.
|
|
+ | ldr BASE, L->base
|
|
+ | mvn CARG2, #~LJ_TSTR
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |.ffunc string_sub
|
|
+ | ffgccheck
|
|
+ | ldrd CARG1, CARG2, [BASE]
|
|
+ | ldrd CARG3, CARG4, [BASE, #16]
|
|
+ | cmp NARGS8:RC, #16
|
|
+ | mvn RB, #0
|
|
+ | beq >1
|
|
+ | blo ->fff_fallback
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | mov RB, CARG3
|
|
+ | bne ->fff_fallback
|
|
+ |1:
|
|
+ | ldrd CARG3, CARG4, [BASE, #8]
|
|
+ | checktp CARG2, LJ_TSTR
|
|
+ | it eq
|
|
+ | ldreq CARG2, STR:CARG1->len
|
|
+ | checktpeq CARG4, LJ_TISNUM
|
|
+ | bne ->fff_fallback
|
|
+ | // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end
|
|
+ | add CARG4, CARG2, #1
|
|
+ | cmp CARG3, #0 // if (start < 0) start += len+1
|
|
+ | it lt
|
|
+ | addlt CARG3, CARG3, CARG4
|
|
+ | cmp CARG3, #1 // if (start < 1) start = 1
|
|
+ | it lt
|
|
+ | movlt CARG3, #1
|
|
+ | cmp RB, #0 // if (end < 0) end += len+1
|
|
+ | it lt
|
|
+ | addlt RB, RB, CARG4
|
|
+ | bic RB, RB, RB, asr #31 // if (end < 0) end = 0
|
|
+ | cmp RB, CARG2 // if (end > len) end = len
|
|
+ | add CARG1, STR:CARG1, #sizeof(GCstr)-1
|
|
+ | it gt
|
|
+ | movgt RB, CARG2
|
|
+ | add CARG2, CARG1, CARG3
|
|
+ | subs CARG3, RB, CARG3 // len = end - start
|
|
+ | add CARG3, CARG3, #1 // len += 1
|
|
+ | bge ->fff_newstr
|
|
+ |->fff_emptystr:
|
|
+ | sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty)
|
|
+ | mvn CARG2, #~LJ_TSTR
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |.macro ffstring_op, name
|
|
+ | .ffunc string_ .. name
|
|
+ | ffgccheck
|
|
+ | ldr CARG3, [BASE, #4]
|
|
+ | cmp NARGS8:RC, #8
|
|
+ | ldr STR:CARG2, [BASE]
|
|
+ | blo ->fff_fallback
|
|
+ | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
|
|
+ | checkstr CARG3, ->fff_fallback
|
|
+ | ldr CARG4, SBUF:CARG1->b
|
|
+ | str BASE, L->base
|
|
+ | str PC, SAVE_PC
|
|
+ | str L, SBUF:CARG1->L
|
|
+ | str CARG4, SBUF:CARG1->w
|
|
+ | bl extern lj_buf_putstr_ .. name
|
|
+ | bl extern lj_buf_tostr
|
|
+ | b ->fff_resstr
|
|
+ |.endmacro
|
|
+ |
|
|
+ |ffstring_op reverse
|
|
+ |ffstring_op lower
|
|
+ |ffstring_op upper
|
|
+ |
|
|
+ |//-- Bit library --------------------------------------------------------
|
|
+ |
|
|
+ |// FP number to bit conversion for soft-float. Clobbers r0-r3.
|
|
+ |->vm_tobit_fb:
|
|
+ | bhi ->fff_fallback
|
|
+ |->vm_tobit:
|
|
+ | lsl RB, CARG2, #1
|
|
+ | adds RB, RB, #0x00200000
|
|
+ | ittt pl
|
|
+ | movpl CARG1, #0 // |x| < 1?
|
|
+ | bxpl lr
|
|
+ | mvn CARG4, #0x3e0
|
|
+ | subs RB, CARG4, RB, asr #21
|
|
+ | bmi >1 // |x| >= 2^32?
|
|
+ | lsl CARG4, CARG2, #11
|
|
+ | orr CARG4, CARG4, #0x80000000
|
|
+ | orr CARG4, CARG4, CARG1, lsr #21
|
|
+ | cmp CARG2, #0
|
|
+ | lsr CARG1, CARG4, RB
|
|
+ | it lt
|
|
+ | rsblt CARG1, CARG1, #0
|
|
+ | bx lr
|
|
+ |1:
|
|
+ | add RB, RB, #21
|
|
+ | lsr CARG4, CARG1, RB
|
|
+ | rsb RB, RB, #20
|
|
+ | lsl CARG1, CARG2, #12
|
|
+ | cmp CARG2, #0
|
|
+ | lsl CARG1, CARG1, RB
|
|
+ | orr CARG1, CARG4, CARG1
|
|
+ | it lt
|
|
+ | rsblt CARG1, CARG1, #0
|
|
+ | bx lr
|
|
+ |
|
|
+ |.macro .ffunc_bit, name
|
|
+ | .ffunc_1 bit_..name
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it ne
|
|
+ | blne ->vm_tobit_fb
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.ffunc_bit tobit
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |.macro .ffunc_bit_op, name, ins
|
|
+ | .ffunc_bit name
|
|
+ | mov CARG3, CARG1
|
|
+ | mov RA, #8
|
|
+ |1:
|
|
+ | ldrd_i CARG1, CARG2, BASE, RA
|
|
+ | cmp RA, NARGS8:RC
|
|
+ | add RA, RA, #8
|
|
+ | bge >2
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it ne
|
|
+ | blne ->vm_tobit_fb
|
|
+ | ins CARG3, CARG3, CARG1
|
|
+ | b <1
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.ffunc_bit_op band, and
|
|
+ |.ffunc_bit_op bor, orr
|
|
+ |.ffunc_bit_op bxor, eor
|
|
+ |
|
|
+ |2:
|
|
+ | mvn CARG4, #~LJ_TISNUM
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | strd CARG3, CARG4, [BASE, #-8]
|
|
+ | b ->fff_res1
|
|
+ |
|
|
+ |.ffunc_bit bswap
|
|
+ | eor CARG3, CARG1, CARG1, ror #16
|
|
+ | bic CARG3, CARG3, #0x00ff0000
|
|
+ | ror CARG1, CARG1, #8
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | eor CARG1, CARG1, CARG3, lsr #8
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |.ffunc_bit bnot
|
|
+ | mvn CARG1, CARG1
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | b ->fff_restv
|
|
+ |
|
|
+ |.macro .ffunc_bit_sh, name, ins, shmod
|
|
+ | .ffunc bit_..name
|
|
+ | ldrd CARG1, CARG2, [BASE, #8]
|
|
+ | cmp NARGS8:RC, #16
|
|
+ | blo ->fff_fallback
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it ne
|
|
+ | blne ->vm_tobit_fb
|
|
+ |.if shmod == 0
|
|
+ | and RA, CARG1, #31
|
|
+ |.else
|
|
+ | rsb RA, CARG1, #0
|
|
+ |.endif
|
|
+ | ldrd CARG1, CARG2, [BASE]
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it ne
|
|
+ | blne ->vm_tobit_fb
|
|
+ | ins CARG1, CARG1, RA
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | b ->fff_restv
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.ffunc_bit_sh lshift, lsl, 0
|
|
+ |.ffunc_bit_sh rshift, lsr, 0
|
|
+ |.ffunc_bit_sh arshift, asr, 0
|
|
+ |.ffunc_bit_sh rol, ror, 1
|
|
+ |.ffunc_bit_sh ror, ror, 0
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->fff_fallback: // Call fast function fallback handler.
|
|
+ | // BASE = new base, RC = nargs*8
|
|
+ | ldr CARG3, [BASE, FRAME_FUNC]
|
|
+ | ldr CARG2, L->maxstack
|
|
+ | add CARG1, BASE, NARGS8:RC
|
|
+ | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC.
|
|
+ | str CARG1, L->top
|
|
+ | ldr CARG3, CFUNC:CARG3->f
|
|
+ | str BASE, L->base
|
|
+ | add CARG1, CARG1, #8*LUA_MINSTACK
|
|
+ | str PC, SAVE_PC // Redundant (but a defined value).
|
|
+ | cmp CARG1, CARG2
|
|
+ | mov CARG1, L
|
|
+ | bhi >5 // Need to grow stack.
|
|
+ | blx CARG3 // (lua_State *L)
|
|
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
|
|
+ | ldr BASE, L->base
|
|
+ | cmp CRET1, #0
|
|
+ | lsl RC, CRET1, #3
|
|
+ | sub RA, BASE, #8
|
|
+ | bgt ->fff_res // Returned nresults+1?
|
|
+ |1: // Returned 0 or -1: retry fast path.
|
|
+ | ldr CARG1, L->top
|
|
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
|
|
+ | sub NARGS8:RC, CARG1, BASE
|
|
+ | bne ->vm_call_tail // Returned -1?
|
|
+ | ins_callt // Returned 0: retry fast path.
|
|
+ |
|
|
+ |// Reconstruct previous base for vmeta_call during tailcall.
|
|
+ |->vm_call_tail:
|
|
+ | ands CARG1, PC, #FRAME_TYPE
|
|
+ | bic CARG2, PC, #FRAME_TYPEP
|
|
+ | ittt eq
|
|
+ | ldreq INS, [PC, #-4]
|
|
+ | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8.
|
|
+ | addeq CARG2, CARG2, #8
|
|
+ | sub RB, BASE, CARG2
|
|
+ | b ->vm_call_dispatch // Resolve again for tailcall.
|
|
+ |
|
|
+ |5: // Grow stack for fallback handler.
|
|
+ | mov CARG2, #LUA_MINSTACK
|
|
+ | bl extern lj_state_growstack // (lua_State *L, int n)
|
|
+ | ldr BASE, L->base
|
|
+ | cmp CARG1, CARG1 // Set zero-flag to force retry.
|
|
+ | b <1
|
|
+ |
|
|
+ |->fff_gcstep: // Call GC step function.
|
|
+ | // BASE = new base, RC = nargs*8
|
|
+ | mov RA, lr
|
|
+ | str BASE, L->base
|
|
+ | add CARG2, BASE, NARGS8:RC
|
|
+ | str PC, SAVE_PC // Redundant (but a defined value).
|
|
+ | str CARG2, L->top
|
|
+ | mov CARG1, L
|
|
+ | bl extern lj_gc_step // (lua_State *L)
|
|
+ | ldr BASE, L->base
|
|
+ | mov lr, RA // Help return address predictor.
|
|
+ | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
|
|
+ | bx lr
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Special dispatch targets -------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_record: // Dispatch target for recording phase.
|
|
+ |.if JIT
|
|
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask)
|
|
+ | ldrb CARG1, [CARG1]
|
|
+ | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
|
|
+ | bne >5
|
|
+ | // Decrement the hookcount for consistency, but always do the call.
|
|
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(hookcount)
|
|
+ | ldr CARG2, [CARG2]
|
|
+ | tst CARG1, #HOOK_ACTIVE
|
|
+ | bne >1
|
|
+ | sub CARG2, CARG2, #1
|
|
+ | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
|
|
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookcount)
|
|
+ | it ne
|
|
+ | strne CARG2, [CARG1]
|
|
+ | b >1
|
|
+ |.endif
|
|
+ |
|
|
+ |->vm_rethook: // Dispatch target for return hooks.
|
|
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask)
|
|
+ | ldrb CARG1, [CARG1]
|
|
+ | tst CARG1, #HOOK_ACTIVE // Hook already active?
|
|
+ | beq >1
|
|
+ |5: // Re-dispatch to static ins.
|
|
+ | decode_OP OP, INS
|
|
+ | add OP, DISPATCH, OP, lsl #2
|
|
+ | ldr pc, [OP, #GG_DISP2STATIC]
|
|
+ |
|
|
+ |->vm_inshook: // Dispatch target for instr/line hooks.
|
|
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask)
|
|
+ | ldrb CARG1, [CARG1]
|
|
+ | sub CARG2, DISPATCH, #-DISPATCH_GL(hookcount)
|
|
+ | ldr CARG2, [CARG2]
|
|
+ | tst CARG1, #HOOK_ACTIVE // Hook already active?
|
|
+ | bne <5
|
|
+ | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
|
|
+ | beq <5
|
|
+ | subs CARG2, CARG2, #1
|
|
+ | sub CARG3, DISPATCH, #-DISPATCH_GL(hookcount)
|
|
+ | str CARG2, [CARG3]
|
|
+ | beq >1
|
|
+ | tst CARG1, #LUA_MASKLINE
|
|
+ | beq <5
|
|
+ |1:
|
|
+ | mov CARG1, L
|
|
+ | str BASE, L->base
|
|
+ | mov CARG2, PC
|
|
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
|
|
+ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
|
|
+ |3:
|
|
+ | ldr BASE, L->base
|
|
+ |4: // Re-dispatch to static ins.
|
|
+ | ldrb OP, [PC, #-4]
|
|
+ | ldr INS, [PC, #-4]
|
|
+ | add OP, DISPATCH, OP, lsl #2
|
|
+ | ldr OP, [OP, #GG_DISP2STATIC]
|
|
+ | decode_RA8 RA, INS
|
|
+ | decode_RD RC, INS
|
|
+ | bx OP
|
|
+ |
|
|
+ |->cont_hook: // Continue from hook yield.
|
|
+ | ldr CARG1, [CARG4, #-24]
|
|
+ | add PC, PC, #4
|
|
+ | str CARG1, SAVE_MULTRES // Restore MULTRES for *M ins.
|
|
+ | b <4
|
|
+ |
|
|
+ |->vm_hotloop: // Hot loop counter underflow.
|
|
+ |.if JIT
|
|
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L).
|
|
+ | sub CARG1, DISPATCH, #-GG_DISP2J
|
|
+ | str PC, SAVE_PC
|
|
+ | ldr CARG3, LFUNC:CARG3->field_pc
|
|
+ | mov CARG2, PC
|
|
+ | sub RB, DISPATCH, #-DISPATCH_J(L)
|
|
+ | str L, [RB]
|
|
+ | ldrb CARG3, [CARG3, #PC2PROTO(framesize)]
|
|
+ | str BASE, L->base
|
|
+ | add CARG3, BASE, CARG3, lsl #3
|
|
+ | str CARG3, L->top
|
|
+ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
|
|
+ | b <3
|
|
+ |.endif
|
|
+ |
|
|
+ |->vm_callhook: // Dispatch target for call hooks.
|
|
+ | mov CARG2, PC
|
|
+ |.if JIT
|
|
+ | b >1
|
|
+ |.endif
|
|
+ |
|
|
+ |->vm_hotcall: // Hot call counter underflow.
|
|
+ |.if JIT
|
|
+ | orr CARG2, PC, #1
|
|
+ |1:
|
|
+ |.endif
|
|
+ | add CARG4, BASE, RC
|
|
+ | str PC, SAVE_PC
|
|
+ | mov CARG1, L
|
|
+ | str BASE, L->base
|
|
+ | sub RA, RA, BASE
|
|
+ | str CARG4, L->top
|
|
+ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
|
|
+ | // Returns ASMFunction.
|
|
+ | ldr BASE, L->base
|
|
+ | ldr CARG4, L->top
|
|
+ | mov CARG2, #0
|
|
+ | add RA, BASE, RA
|
|
+ | sub NARGS8:RC, CARG4, BASE
|
|
+ | str CARG2, SAVE_PC // Invalidate for subsequent line hook.
|
|
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
|
|
+ | ldr INS, [PC, #-4]
|
|
+ | bx CRET1
|
|
+ |
|
|
+ |->cont_stitch: // Trace stitching.
|
|
+ |.if JIT
|
|
+ | // RA = resultptr, CARG4 = meta base
|
|
+ | ldr RB, SAVE_MULTRES
|
|
+ | ldr INS, [PC, #-4]
|
|
+ | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
|
|
+ | subs RB, RB, #8
|
|
+ | decode_RA8 RC, INS // Call base.
|
|
+ | beq >2
|
|
+ |1: // Move results down.
|
|
+ | ldrd CARG1, CARG2, [RA]
|
|
+ | add RA, RA, #8
|
|
+ | subs RB, RB, #8
|
|
+ | strd_i CARG1, CARG2, BASE, RC
|
|
+ | add RC, RC, #8
|
|
+ | bne <1
|
|
+ |2:
|
|
+ | decode_RA8 RA, INS
|
|
+ | decode_RB8 RB, INS
|
|
+ | add RA, RA, RB
|
|
+ |3:
|
|
+ | cmp RA, RC
|
|
+ | mvn CARG2, #~LJ_TNIL
|
|
+ | bhi >9 // More results wanted?
|
|
+ |
|
|
+ | ldrh RA, TRACE:CARG3->traceno
|
|
+ | ldrh RC, TRACE:CARG3->link
|
|
+ | cmp RC, RA
|
|
+ | beq ->cont_nop // Blacklisted.
|
|
+ | cmp RC, #0
|
|
+ | bne =>BC_JLOOP // Jump to stitched trace.
|
|
+ |
|
|
+ | // Stitch a new trace to the previous trace.
|
|
+ | sub RB, DISPATCH, #-DISPATCH_J(exitno)
|
|
+ | str RA, [RB]
|
|
+ | sub RB, DISPATCH, #-DISPATCH_J(L)
|
|
+ | str L, [RB]
|
|
+ | str BASE, L->base
|
|
+ | sub CARG1, DISPATCH, #-GG_DISP2J
|
|
+ | mov CARG2, PC
|
|
+ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
|
|
+ | ldr BASE, L->base
|
|
+ | b ->cont_nop
|
|
+ |
|
|
+ |9: // Fill up results with nil.
|
|
+ | strd_i CARG1, CARG2, BASE, RC
|
|
+ | add RC, RC, #8
|
|
+ | b <3
|
|
+ |.endif
|
|
+ |
|
|
+ |->vm_profhook: // Dispatch target for profiler hook.
|
|
+#if LJ_HASPROFILE
|
|
+ | mov CARG1, L
|
|
+ | str BASE, L->base
|
|
+ | mov CARG2, PC
|
|
+ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
|
|
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
|
|
+ | ldr BASE, L->base
|
|
+ | sub PC, PC, #4
|
|
+ | b ->cont_nop
|
|
+#endif
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Trace exit handler -------------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |->vm_exit_handler:
|
|
+ |.if JIT
|
|
+ | sub sp, sp, #12
|
|
+ | push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12}
|
|
+ | ldr CARG1, [sp, #64] // Load original value of lr.
|
|
+ | ldr DISPATCH, [lr, #-1] // Load DISPATCH.
|
|
+ | add CARG3, sp, #64 // Recompute original value of sp.
|
|
+ | mv_vmstate CARG4, EXIT
|
|
+ | str CARG3, [sp, #52] // Store sp in RID_SP
|
|
+ | st_vmstate CARG4
|
|
+ | ldr CARG4, [CARG1, #-5]! // Get exit instruction.
|
|
+ | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC.
|
|
+ | str CARG1, [sp, #60]
|
|
+ |.if FPU
|
|
+ | vpush {d0-d15}
|
|
+ |.endif
|
|
+ | .long 0xf3432180 //SBFX CARG2, CARG4, #10, #1
|
|
+ | .long 0xf36321d4 //BFI CARG2, CARG4, #11, #10
|
|
+ | lsr CARG4, CARG4, #16
|
|
+ | .long 0xf363010a //BFI CARG2, CARG4, #0, #11
|
|
+ | add CARG1, CARG1, CARG2, lsl #1
|
|
+ | ldr CARG2, [lr, #3] // Load exit stub group offset.
|
|
+ | sub CARG1, CARG1, lr
|
|
+ | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
|
|
+ | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
|
|
+ | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
|
|
+ | sub RB, DISPATCH, #-DISPATCH_J(exitno)
|
|
+ | str CARG1, [RB]
|
|
+ | mov CARG4, #0
|
|
+ | str BASE, L->base
|
|
+ | sub RB, DISPATCH, #-DISPATCH_J(L)
|
|
+ | str L, [RB]
|
|
+ | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
|
|
+ | sub CARG1, DISPATCH, #-GG_DISP2J
|
|
+ | mov CARG2, sp
|
|
+ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
|
|
+ | // Returns MULTRES (unscaled) or negated error code.
|
|
+ | ldr CARG2, L->cframe
|
|
+ | ldr BASE, L->base
|
|
+ | bic CARG2, CARG2, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated.
|
|
+ | mov sp, CARG2
|
|
+ | ldr PC, SAVE_PC // Get SAVE_PC.
|
|
+ | str L, SAVE_L // Set SAVE_L (on-trace resume/yield).
|
|
+ | b >1
|
|
+ |.endif
|
|
+ |->vm_exit_interp:
|
|
+ | // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set.
|
|
+ |.if JIT
|
|
+ | ldr L, SAVE_L
|
|
+ |1:
|
|
+ | cmp CARG1, #0
|
|
+ | blt >9 // Check for error from exit.
|
|
+ | lsl RC, CARG1, #3
|
|
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
|
+ | str RC, SAVE_MULTRES
|
|
+ | mov CARG3, #0
|
|
+ | str BASE, L->base
|
|
+ | ldr CARG2, LFUNC:CARG2->field_pc
|
|
+ | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
|
|
+ | mv_vmstate CARG4, INTERP
|
|
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
|
|
+ | // Modified copy of ins_next which handles function header dispatch, too.
|
|
+ | ldrb OP, [PC]
|
|
+ | mov MASKR8, #255
|
|
+ | ldr INS, [PC], #4
|
|
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
|
|
+ | st_vmstate CARG4
|
|
+ | cmp OP, #BC_FUNCC+2 // Fast function?
|
|
+ | bhs >4
|
|
+ |2:
|
|
+ | cmp OP, #BC_FUNCF // Function header?
|
|
+ | ldr OP, [DISPATCH, OP, lsl #2]
|
|
+ | decode_RA8 RA, INS
|
|
+ | iteee lo
|
|
+ | lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
|
|
+ | subhs RC, RC, #8
|
|
+ | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
|
|
+ | ldrhs CARG3, [BASE, FRAME_FUNC]
|
|
+ | bx OP
|
|
+ |
|
|
+ |4: // Check frame below fast function.
|
|
+ | ldr CARG1, [BASE, FRAME_PC]
|
|
+ | ands CARG2, CARG1, #FRAME_TYPE
|
|
+ | bne <2 // Trace stitching continuation?
|
|
+ | // Otherwise set KBASE for Lua function below fast function.
|
|
+ | ldr CARG3, [CARG1, #-4]
|
|
+ | decode_RA8 CARG1, CARG3
|
|
+ | sub CARG2, BASE, CARG1
|
|
+ | ldr LFUNC:CARG3, [CARG2, #-16]
|
|
+ | ldr CARG3, LFUNC:CARG3->field_pc
|
|
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
|
|
+ | b <2
|
|
+ |
|
|
+ |9: // Rethrow error from the right C frame.
|
|
+ | mov CARG1, L
|
|
+ | bl extern lj_err_run // (lua_State *L)
|
|
+ |.endif
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Math helper functions ----------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |// FP value rounding. Called from JIT code.
|
|
+ |//
|
|
+ |// double lj_vm_floor/ceil/trunc(double x);
|
|
+ |.macro vm_round, func, hf
|
|
+ |.if hf == 1
|
|
+ | vmov CARG1, CARG2, d0
|
|
+ |.endif
|
|
+ | lsl CARG3, CARG2, #1
|
|
+ | adds RB, CARG3, #0x00200000
|
|
+ | bpl >2 // |x| < 1?
|
|
+ | mvn CARG4, #0x3cc
|
|
+ | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0.
|
|
+ | itt lo
|
|
+ | bxlo lr // |x| >= 2^52: done.
|
|
+ | mvn CARG4, #1
|
|
+ | lsl CARG4, CARG4, RB
|
|
+ | bic CARG3, CARG1, CARG4 // ztest = lo & ~lomask
|
|
+ | and CARG1, CARG1, CARG4 // lo &= lomask
|
|
+ | subs RB, RB, #32
|
|
+ | mvn CARG4, #1
|
|
+ | itttt pl
|
|
+ | lslpl CARG4, CARG4, RB
|
|
+ | bicpl CARG4, CARG2, CARG4 // |x| <= 2^20: ztest |= hi & ~himask
|
|
+ | orrpl CARG3, CARG3, CARG4
|
|
+ | mvnpl CARG4, #1
|
|
+ | itt pl
|
|
+ | lslpl CARG4, CARG4, RB
|
|
+ | andpl CARG2, CARG2, CARG4 // |x| <= 2^20: hi &= himask
|
|
+ | mvn CARG4, #1
|
|
+ |.if "func" == "floor"
|
|
+ | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0)
|
|
+ |.else
|
|
+ | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0)
|
|
+ |.endif
|
|
+ |.if hf == 1
|
|
+ | it eq
|
|
+ | vmoveq d0, CARG1, CARG2
|
|
+ |.endif
|
|
+ | itt eq
|
|
+ | bxeq lr // iszero: done.
|
|
+ | mvn CARG4, #1
|
|
+ | cmp RB, #0
|
|
+ | ite pl
|
|
+ | lslpl CARG3, CARG4, RB
|
|
+ | mvnmi CARG3, #0
|
|
+ | add RB, RB, #32
|
|
+ | lsl CARG4, CARG4, RB
|
|
+ | subs CARG1, CARG1, CARG4 // lo = lo-lomask
|
|
+ | mvn CARG4, #1
|
|
+ | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry
|
|
+ |.if hf == 1
|
|
+ | vmov d0, CARG1, CARG2
|
|
+ |.endif
|
|
+ | bx lr
|
|
+ |
|
|
+ |2: // |x| < 1:
|
|
+ | itt cs
|
|
+ | bxcs lr // |x| is not finite.
|
|
+ | orr CARG3, CARG3, CARG1 // ztest = (2*hi) | lo
|
|
+ |.if "func" == "floor"
|
|
+ | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0)
|
|
+ |.else
|
|
+ | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0)
|
|
+ |.endif
|
|
+ | mov CARG1, #0 // lo = 0
|
|
+ | and CARG2, CARG2, #0x80000000
|
|
+ | itt ne
|
|
+ | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0)
|
|
+ | orrne CARG2, CARG2, CARG4
|
|
+ |.if hf == 1
|
|
+ | vmov d0, CARG1, CARG2
|
|
+ |.endif
|
|
+ | bx lr
|
|
+ |.endmacro
|
|
+ |
|
|
+ |9:
|
|
+ | .long 0x00003ff0 // hiword(+1.0) jturnsek: swaped halfwords!!!
|
|
+ |
|
|
+ |->vm_floor:
|
|
+ |.if HFABI
|
|
+ | vm_round floor, 1
|
|
+ |.endif
|
|
+ |->vm_floor_sf:
|
|
+ | vm_round floor, 0
|
|
+ |
|
|
+ |->vm_ceil:
|
|
+ |.if HFABI
|
|
+ | vm_round ceil, 1
|
|
+ |.endif
|
|
+ |->vm_ceil_sf:
|
|
+ | vm_round ceil, 0
|
|
+ |
|
|
+ |.macro vm_trunc, hf
|
|
+ |.if JIT
|
|
+ |.if hf == 1
|
|
+ | vmov CARG1, CARG2, d0
|
|
+ |.endif
|
|
+ | lsl CARG3, CARG2, #1
|
|
+ | adds RB, CARG3, #0x00200000
|
|
+ | itt pl
|
|
+ | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0.
|
|
+ | movpl CARG1, #0
|
|
+ |.if hf == 1
|
|
+ | it pl
|
|
+ | vmovpl d0, CARG1, CARG2
|
|
+ |.endif
|
|
+ | itt pl
|
|
+ | bxpl lr
|
|
+ | mvn CARG4, #0x3cc
|
|
+ | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0.
|
|
+ | itt lo
|
|
+ | bxlo lr // |x| >= 2^52: already done.
|
|
+ | mvn CARG4, #1
|
|
+ | lsl CARG4, CARG4, RB
|
|
+ | and CARG1, CARG1, CARG4 // lo &= lomask
|
|
+ | subs RB, RB, #32
|
|
+ | mvn CARG4, #1
|
|
+ | itt pl
|
|
+ | lsl CARG4, CARG4, RB
|
|
+ | andpl CARG2, CARG2, CARG4 // |x| <= 2^20: hi &= himask
|
|
+ |.if hf == 1
|
|
+ | vmov d0, CARG1, CARG2
|
|
+ |.endif
|
|
+ | bx lr
|
|
+ |.endif
|
|
+ |.endmacro
|
|
+ |
|
|
+ |->vm_trunc:
|
|
+ |.if HFABI
|
|
+ | vm_trunc 1
|
|
+ |.endif
|
|
+ |->vm_trunc_sf:
|
|
+ | vm_trunc 0
|
|
+ |
|
|
+ | // double lj_vm_mod(double dividend, double divisor);
|
|
+ |->vm_mod:
|
|
+ |.if FPU
|
|
+ | // Special calling convention. Also, RC (r11) is not preserved.
|
|
+ | vdiv.f64 d0, d6, d7
|
|
+ | mov RC, lr
|
|
+ | vmov CARG1, CARG2, d0
|
|
+ | bl ->vm_floor_sf
|
|
+ | vmov d0, CARG1, CARG2
|
|
+ | vmul.f64 d0, d0, d7
|
|
+ | mov lr, RC
|
|
+ | vsub.f64 d6, d6, d0
|
|
+ | bx lr
|
|
+ |.else
|
|
+ | push {r0, r1, r2, r3, r4, lr}
|
|
+ | bl extern __aeabi_ddiv
|
|
+ | bl ->vm_floor_sf
|
|
+ | ldrd CARG3, CARG4, [sp, #8]
|
|
+ | bl extern __aeabi_dmul
|
|
+ | ldrd CARG3, CARG4, [sp]
|
|
+ | eor CARG2, CARG2, #0x80000000
|
|
+ | bl extern __aeabi_dadd
|
|
+ | add sp, sp, #20
|
|
+ | pop {pc}
|
|
+ |.endif
|
|
+ |
|
|
+ | // int lj_vm_modi(int dividend, int divisor);
|
|
+ |->vm_modi:
|
|
+ | ands RB, CARG1, #0x80000000
|
|
+ | it mi
|
|
+ | rsbmi CARG1, CARG1, #0 // a = |dividend|
|
|
+ | eor RB, RB, CARG2, asr #1 // Keep signdiff and sign(divisor).
|
|
+ | cmp CARG2, #0
|
|
+ | it mi
|
|
+ | rsbmi CARG2, CARG2, #0 // b = |divisor|
|
|
+ | subs CARG4, CARG2, #1
|
|
+ | ite ne
|
|
+ | cmpne CARG1, CARG2
|
|
+ | moveq CARG1, #0 // if (b == 1 || a == b) a = 0
|
|
+ | it hi
|
|
+ | tsthi CARG2, CARG4
|
|
+ | it eq
|
|
+ | andeq CARG1, CARG1, CARG4 // else if ((b & (b-1)) == 0) a &= b-1
|
|
+ | bls >1
|
|
+ | // Use repeated subtraction to get the remainder.
|
|
+ | clz CARG3, CARG1
|
|
+ | clz CARG4, CARG2
|
|
+ | sub CARG4, CARG4, CARG3
|
|
+ | rsbs CARG3, CARG4, #31 // entry = (31-(clz(b)-clz(a)))*12
|
|
+ | it ne
|
|
+ | .long 0xe8dff002 // tbbne [pc, CARG3] // Duff's device.
|
|
+ | .long 0xb8bec4ca, 0xa0a6acb2, 0x888e949a, 0x70767c82 // TBB table (part1)
|
|
+ | .long 0x585e646a, 0x40464c52, 0x282e343a, 0x10161c22 // TBB table (part2)
|
|
+ {
|
|
+ int i;
|
|
+ for (i = 31; i >= 0; i--) {
|
|
+ | cmp CARG1, CARG2, lsl #i
|
|
+ | it hs
|
|
+ | subhs CARG1, CARG1, CARG2, lsl #i
|
|
+ }
|
|
+ }
|
|
+ |1:
|
|
+ | cmp CARG1, #0
|
|
+ | it ne
|
|
+ | cmpne RB, #0
|
|
+ | it mi
|
|
+ | submi CARG1, CARG1, CARG2 // if (y != 0 && signdiff) y = y - b
|
|
+ | eors CARG2, CARG1, RB, lsl #1
|
|
+ | it mi
|
|
+ | rsbmi CARG1, CARG1, #0 // if (sign(divisor) != sign(y)) y = -y
|
|
+ | bx lr
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- Miscellaneous functions --------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |.define NEXT_TAB, TAB:CARG1
|
|
+ |.define NEXT_RES, CARG1
|
|
+ |.define NEXT_IDX, CARG2
|
|
+ |.define NEXT_TMP0, CARG3
|
|
+ |.define NEXT_TMP1, CARG4
|
|
+ |.define NEXT_LIM, r12
|
|
+ |.define NEXT_RES_PTR, sp
|
|
+ |.define NEXT_RES_VAL, [sp]
|
|
+ |.define NEXT_RES_KEY_I, [sp, #8]
|
|
+ |.define NEXT_RES_KEY_IT, [sp, #12]
|
|
+ |
|
|
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
|
|
+ |// Next idx returned in CRET2.
|
|
+ |->vm_next:
|
|
+ |.if JIT
|
|
+ | ldr NEXT_TMP0, NEXT_TAB->array
|
|
+ | ldr NEXT_LIM, NEXT_TAB->asize
|
|
+ | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3
|
|
+ |1: // Traverse array part.
|
|
+ | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM
|
|
+ | bhs >5
|
|
+ | ldr NEXT_TMP1, [NEXT_TMP0, #4]
|
|
+ | str NEXT_IDX, NEXT_RES_KEY_I
|
|
+ | add NEXT_TMP0, NEXT_TMP0, #8
|
|
+ | add NEXT_IDX, NEXT_IDX, #1
|
|
+ | checktp NEXT_TMP1, LJ_TNIL
|
|
+ | beq <1 // Skip holes in array part.
|
|
+ | ldr NEXT_TMP0, [NEXT_TMP0, #-8]
|
|
+ | mov NEXT_RES, NEXT_RES_PTR
|
|
+ | strd NEXT_TMP0, NEXT_TMP1, NEXT_RES_VAL
|
|
+ | mvn NEXT_TMP0, #~LJ_TISNUM
|
|
+ | str NEXT_TMP0, NEXT_RES_KEY_IT
|
|
+ | bx lr
|
|
+ |
|
|
+ |5: // Traverse hash part.
|
|
+ | ldr NEXT_TMP0, NEXT_TAB->hmask
|
|
+ | ldr NODE:NEXT_RES, NEXT_TAB->node
|
|
+ | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1
|
|
+ | add NEXT_LIM, NEXT_LIM, NEXT_TMP0
|
|
+ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3
|
|
+ |6:
|
|
+ | cmp NEXT_IDX, NEXT_LIM
|
|
+ | bhi >9
|
|
+ | ldr NEXT_TMP1, NODE:NEXT_RES->val.it
|
|
+ | checktp NEXT_TMP1, LJ_TNIL
|
|
+ | add NEXT_IDX, NEXT_IDX, #1
|
|
+ | itt ne
|
|
+ | bxne lr
|
|
+ | // Skip holes in hash part.
|
|
+ | add NEXT_RES, NEXT_RES, #sizeof(Node)
|
|
+ | b <6
|
|
+ |
|
|
+ |9: // End of iteration. Set the key to nil (not the value).
|
|
+ | mvn NEXT_TMP0, #0
|
|
+ | mov NEXT_RES, NEXT_RES_PTR
|
|
+ | str NEXT_TMP0, NEXT_RES_KEY_IT
|
|
+ | bx lr
|
|
+ |.endif
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |//-- FFI helper functions -----------------------------------------------
|
|
+ |//-----------------------------------------------------------------------
|
|
+ |
|
|
+ |// Handler for callback functions.
|
|
+ |// Saveregs already performed. Callback slot number in [sp], g in r12.
|
|
+ |->vm_ffi_callback:
|
|
+ |.if FFI
|
|
+ |.type CTSTATE, CTState, PC
|
|
+ | ldr CTSTATE, GL:r12->ctype_state
|
|
+ | add DISPATCH, r12, #GG_G2DISP
|
|
+ |.if FPU
|
|
+ | str r4, SAVE_R4
|
|
+ | add r4, sp, CFRAME_SPACE+4+8*8
|
|
+ | vstmdb r4!, {d8-d15}
|
|
+ |.endif
|
|
+ |.if HFABI
|
|
+ | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8])
|
|
+ |.endif
|
|
+ | strd CARG3, CARG4, CTSTATE->cb.gpr[2]
|
|
+ | strd CARG1, CARG2, CTSTATE->cb.gpr[0]
|
|
+ |.if HFABI
|
|
+ | vstmdb r12!, {d0-d7}
|
|
+ |.endif
|
|
+ | ldr CARG4, [sp]
|
|
+ | add CARG3, sp, #CFRAME_SIZE
|
|
+ | mov CARG1, CTSTATE
|
|
+ | lsr CARG4, CARG4, #3
|
|
+ | str CARG3, CTSTATE->cb.stack
|
|
+ | mov CARG2, sp
|
|
+ | str CARG4, CTSTATE->cb.slot
|
|
+ | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
|
|
+ | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
|
|
+ | // Returns lua_State *.
|
|
+ | ldr BASE, L:CRET1->base
|
|
+ | mv_vmstate CARG2, INTERP
|
|
+ | ldr RC, L:CRET1->top
|
|
+ | mov MASKR8, #255
|
|
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
|
|
+ | mov L, CRET1
|
|
+ | sub RC, RC, BASE
|
|
+ | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
|
|
+ | st_vmstate CARG2
|
|
+ | ins_callt
|
|
+ |.endif
|
|
+ |
|
|
+ |->cont_ffi_callback: // Return from FFI callback.
|
|
+ |.if FFI
|
|
+ | ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)]
|
|
+ | str BASE, L->base
|
|
+ | str CARG4, L->top
|
|
+ | str L, CTSTATE->L
|
|
+ | mov CARG1, CTSTATE
|
|
+ | mov CARG2, RA
|
|
+ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
|
|
+ | ldrd CARG1, CARG2, CTSTATE->cb.gpr[0]
|
|
+ |.if HFABI
|
|
+ | vldr d0, CTSTATE->cb.fpr[0]
|
|
+ |.endif
|
|
+ | b ->vm_leave_unw
|
|
+ |.endif
|
|
+ |
|
|
+ |->vm_ffi_call: // Call C function via FFI.
|
|
+ | // Caveat: needs special frame unwinding, see below.
|
|
+ |.if FFI
|
|
+ | .type CCSTATE, CCallState, r4
|
|
+ | push {CCSTATE, r5, r11, lr}
|
|
+ | mov CCSTATE, CARG1
|
|
+ | ldr CARG1, CCSTATE:CARG1->spadj
|
|
+ | ldrb CARG2, CCSTATE->nsp
|
|
+ | add CARG3, CCSTATE, #offsetof(CCallState, stack)
|
|
+ |.if HFABI
|
|
+ | add RB, CCSTATE, #offsetof(CCallState, fpr[0])
|
|
+ |.endif
|
|
+ | mov r11, sp
|
|
+ | sub sp, sp, CARG1 // Readjust stack.
|
|
+ | subs CARG2, CARG2, #1
|
|
+ |.if HFABI
|
|
+ | vldm RB, {d0-d7}
|
|
+ |.endif
|
|
+ | ldr RB, CCSTATE->func
|
|
+ | bmi >2
|
|
+ |1: // Copy stack slots.
|
|
+ | ldr CARG4, [CARG3, CARG2, lsl #2]
|
|
+ | str CARG4, [sp, CARG2, lsl #2]
|
|
+ | subs CARG2, CARG2, #1
|
|
+ | bpl <1
|
|
+ |2:
|
|
+ | ldrd CARG1, CARG2, CCSTATE->gpr[0]
|
|
+ | ldrd CARG3, CARG4, CCSTATE->gpr[2]
|
|
+ | blx RB
|
|
+ | mov sp, r11
|
|
+ |.if HFABI
|
|
+ | add r12, CCSTATE, #offsetof(CCallState, fpr[4])
|
|
+ |.endif
|
|
+ | strd CRET1, CRET2, CCSTATE->gpr[0]
|
|
+ |.if HFABI
|
|
+ | vstmdb r12!, {d0-d3}
|
|
+ |.endif
|
|
+ | pop {CCSTATE, r5, r11, pc}
|
|
+ |.endif
|
|
+ |// Note: vm_ffi_call must be the last function in this object file!
|
|
+ |
|
|
+ |//-----------------------------------------------------------------------
|
|
+}
|
|
+
|
|
+/* Generate the code for a single instruction. */
|
|
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|
+{
|
|
+ int vk = 0;
|
|
+ |=>defop:
|
|
+
|
|
+ switch (op) {
|
|
+
|
|
+ /* -- Comparison ops ---------------------------------------------------- */
|
|
+
|
|
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
|
|
+
|
|
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
|
|
+ | // RA = src1*8, RC = src2, JMP with RC = target
|
|
+ | lsl RC, RC, #3
|
|
+ | ldrd_iw CARG1, CARG2, RA, BASE
|
|
+ | ldrh RB, [PC, #2]
|
|
+ | ldrd_iw CARG3, CARG4, RC, BASE
|
|
+ | add PC, PC, #4
|
|
+ | add RB, PC, RB, lsl #2
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bne >3
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | bne >4
|
|
+ | cmp CARG1, CARG3
|
|
+ if (op == BC_ISLT) {
|
|
+ | it lt
|
|
+ | sublt PC, RB, #0x20000
|
|
+ } else if (op == BC_ISGE) {
|
|
+ | it ge
|
|
+ | subge PC, RB, #0x20000
|
|
+ } else if (op == BC_ISLE) {
|
|
+ | it le
|
|
+ | suble PC, RB, #0x20000
|
|
+ } else {
|
|
+ | it gt
|
|
+ | subgt PC, RB, #0x20000
|
|
+ }
|
|
+ |1:
|
|
+ | ins_next
|
|
+ |
|
|
+ |3: // CARG1, CARG2 is not an integer.
|
|
+ |.if FPU
|
|
+ | vldr d0, [RA]
|
|
+ | bhi ->vmeta_comp
|
|
+ | // d0 is a number.
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | vldr d1, [RC]
|
|
+ | blo >5
|
|
+ | bhi ->vmeta_comp
|
|
+ | // d0 is a number, CARG3 is an integer.
|
|
+ | vmov s4, CARG3
|
|
+ | vcvt.f64.s32 d1, s4
|
|
+ | b >5
|
|
+ |4: // CARG1 is an integer, CARG3, CARG4 is not an integer.
|
|
+ | vldr d1, [RC]
|
|
+ | bhi ->vmeta_comp
|
|
+ | // CARG1 is an integer, d1 is a number.
|
|
+ | vmov s4, CARG1
|
|
+ | vcvt.f64.s32 d0, s4
|
|
+ |5: // d0 and d1 are numbers.
|
|
+ | vcmp.f64 d0, d1
|
|
+ | vmrs
|
|
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
|
|
+ if (op == BC_ISLT) {
|
|
+ | it lo
|
|
+ | sublo PC, RB, #0x20000
|
|
+ } else if (op == BC_ISGE) {
|
|
+ | it hs
|
|
+ | subhs PC, RB, #0x20000
|
|
+ } else if (op == BC_ISLE) {
|
|
+ | it ls
|
|
+ | subls PC, RB, #0x20000
|
|
+ } else {
|
|
+ | it hi
|
|
+ | subhi PC, RB, #0x20000
|
|
+ }
|
|
+ | b <1
|
|
+ |.else
|
|
+ | bhi ->vmeta_comp
|
|
+ | // CARG1, CARG2 is a number.
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | it lo
|
|
+ | movlo RA, RB // Save RB.
|
|
+ | blo >5
|
|
+ | bhi ->vmeta_comp
|
|
+ | // CARG1, CARG2 is a number, CARG3 is an integer.
|
|
+ | mov CARG1, CARG3
|
|
+ | mov RC, RA
|
|
+ | mov RA, RB // Save RB.
|
|
+ | bl extern __aeabi_i2d
|
|
+ | mov CARG3, CARG1
|
|
+ | mov CARG4, CARG2
|
|
+ | ldrd CARG1, CARG2, [RC] // Restore first operand.
|
|
+ | b >5
|
|
+ |4: // CARG1 is an integer, CARG3, CARG4 is not an integer.
|
|
+ | bhi ->vmeta_comp
|
|
+ | // CARG1 is an integer, CARG3, CARG4 is a number.
|
|
+ | mov RA, RB // Save RB.
|
|
+ | bl extern __aeabi_i2d
|
|
+ | ldrd CARG3, CARG4, [RC] // Restore second operand.
|
|
+ |5: // CARG1, CARG2 and CARG3, CARG4 are numbers.
|
|
+ | bl extern __aeabi_cdcmple
|
|
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
|
|
+ if (op == BC_ISLT) {
|
|
+ | it lo
|
|
+ | sublo PC, RA, #0x20000
|
|
+ } else if (op == BC_ISGE) {
|
|
+ | it hs
|
|
+ | subhs PC, RA, #0x20000
|
|
+ } else if (op == BC_ISLE) {
|
|
+ | it ls
|
|
+ | subls PC, RA, #0x20000
|
|
+ } else {
|
|
+ | it hi
|
|
+ | subhi PC, RA, #0x20000
|
|
+ }
|
|
+ | b <1
|
|
+ |.endif
|
|
+ break;
|
|
+
|
|
+ case BC_ISEQV: case BC_ISNEV:
|
|
+ vk = op == BC_ISEQV;
|
|
+ | // RA = src1*8, RC = src2, JMP with RC = target
|
|
+ | lsl RC, RC, #3
|
|
+ | ldrd_iw CARG1, CARG2, RA, BASE
|
|
+ | ldrh RB, [PC, #2]
|
|
+ | ldrd_iw CARG3, CARG4, RC, BASE
|
|
+ | add PC, PC, #4
|
|
+ | add RB, PC, RB, lsl #2
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | it ls
|
|
+ | cmnls CARG4, #-LJ_TISNUM
|
|
+ if (vk) {
|
|
+ | bls ->BC_ISEQN_Z
|
|
+ } else {
|
|
+ | bls ->BC_ISNEN_Z
|
|
+ }
|
|
+ | // Either or both types are not numbers.
|
|
+ |.if FFI
|
|
+ | checktp CARG2, LJ_TCDATA
|
|
+ | checktpne CARG4, LJ_TCDATA
|
|
+ | beq ->vmeta_equal_cd
|
|
+ |.endif
|
|
+ | cmp CARG2, CARG4 // Compare types.
|
|
+ | bne >2 // Not the same type?
|
|
+ | checktp CARG2, LJ_TISPRI
|
|
+ | bhs >1 // Same type and primitive type?
|
|
+ |
|
|
+ | // Same types and not a primitive type. Compare GCobj or pvalue.
|
|
+ | cmp CARG1, CARG3
|
|
+ if (vk) {
|
|
+ | bne >3 // Different GCobjs or pvalues?
|
|
+ |1: // Branch if same.
|
|
+ | sub PC, RB, #0x20000
|
|
+ |2: // Different.
|
|
+ | ins_next
|
|
+ |3:
|
|
+ | checktp CARG2, LJ_TISTABUD
|
|
+ | bhi <2 // Different objects and not table/ud?
|
|
+ } else {
|
|
+ | beq >1 // Same GCobjs or pvalues?
|
|
+ | checktp CARG2, LJ_TISTABUD
|
|
+ | bhi >2 // Different objects and not table/ud?
|
|
+ }
|
|
+ | // Different tables or userdatas. Need to check __eq metamethod.
|
|
+ | // Field metatable must be at same offset for GCtab and GCudata!
|
|
+ | ldr TAB:RA, TAB:CARG1->metatable
|
|
+ | cmp TAB:RA, #0
|
|
+ if (vk) {
|
|
+ | beq <2 // No metatable?
|
|
+ } else {
|
|
+ | beq >2 // No metatable?
|
|
+ }
|
|
+ | ldrb RA, TAB:RA->nomm
|
|
+ | mov CARG4, #1-vk // ne = 0 or 1.
|
|
+ | mov CARG2, CARG1
|
|
+ | tst RA, #1<<MM_eq
|
|
+ | beq ->vmeta_equal // 'no __eq' flag not set?
|
|
+ if (vk) {
|
|
+ | b <2
|
|
+ } else {
|
|
+ |2: // Branch if different.
|
|
+ | sub PC, RB, #0x20000
|
|
+ |1: // Same.
|
|
+ | ins_next
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case BC_ISEQS: case BC_ISNES:
|
|
+ vk = op == BC_ISEQS;
|
|
+ | // RA = src*8, RC = str_const (~), JMP with RC = target
|
|
+ | mvn RC, RC
|
|
+ | ldrd_i CARG1, CARG2, BASE, RA
|
|
+ | ldrh RB, [PC, #2]
|
|
+ | ldr STR:CARG3, [KBASE, RC, lsl #2]
|
|
+ | add PC, PC, #4
|
|
+ | add RB, PC, RB, lsl #2
|
|
+ | checktp CARG2, LJ_TSTR
|
|
+ |.if FFI
|
|
+ | bne >7
|
|
+ | cmp CARG1, CARG3
|
|
+ |.else
|
|
+ | it eq
|
|
+ | cmpeq CARG1, CARG3
|
|
+ |.endif
|
|
+ if (vk) {
|
|
+ | it eq
|
|
+ | subeq PC, RB, #0x20000
|
|
+ |1:
|
|
+ } else {
|
|
+ |1:
|
|
+ | it ne
|
|
+ | subne PC, RB, #0x20000
|
|
+ }
|
|
+ | ins_next
|
|
+ |
|
|
+ |.if FFI
|
|
+ |7:
|
|
+ | checktp CARG2, LJ_TCDATA
|
|
+ | bne <1
|
|
+ | b ->vmeta_equal_cd
|
|
+ |.endif
|
|
+ break;
|
|
+
|
|
+ case BC_ISEQN: case BC_ISNEN:
|
|
+ vk = op == BC_ISEQN;
|
|
+ | // RA = src*8, RC = num_const (~), JMP with RC = target
|
|
+ | lsl RC, RC, #3
|
|
+ | ldrd_iw CARG1, CARG2, RA, BASE
|
|
+ | ldrh RB, [PC, #2]
|
|
+ | ldrd_iw CARG3, CARG4, RC, KBASE
|
|
+ | add PC, PC, #4
|
|
+ | add RB, PC, RB, lsl #2
|
|
+ if (vk) {
|
|
+ |->BC_ISEQN_Z:
|
|
+ } else {
|
|
+ |->BC_ISNEN_Z:
|
|
+ }
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bne >3
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | bne >4
|
|
+ | cmp CARG1, CARG3
|
|
+ if (vk) {
|
|
+ | it eq
|
|
+ | subeq PC, RB, #0x20000
|
|
+ |1:
|
|
+ } else {
|
|
+ |1:
|
|
+ | it ne
|
|
+ | subne PC, RB, #0x20000
|
|
+ }
|
|
+ |2:
|
|
+ | ins_next
|
|
+ |
|
|
+ |3: // CARG1, CARG2 is not an integer.
|
|
+ |.if FFI
|
|
+ | bhi >7
|
|
+ |.else
|
|
+ if (!vk) {
|
|
+ | it hi
|
|
+ | subhi PC, RB, #0x20000
|
|
+ }
|
|
+ | bhi <2
|
|
+ |.endif
|
|
+ |.if FPU
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | vmov s4, CARG3
|
|
+ | vldr d0, [RA]
|
|
+ | ite lo
|
|
+ | vldrlo d1, [RC]
|
|
+ | vcvths.f64.s32 d1, s4
|
|
+ | b >5
|
|
+ |4: // CARG1 is an integer, d1 is a number.
|
|
+ | vmov s4, CARG1
|
|
+ | vldr d1, [RC]
|
|
+ | vcvt.f64.s32 d0, s4
|
|
+ |5: // d0 and d1 are numbers.
|
|
+ | vcmp.f64 d0, d1
|
|
+ | vmrs
|
|
+ if (vk) {
|
|
+ | it eq
|
|
+ | subeq PC, RB, #0x20000
|
|
+ } else {
|
|
+ | it ne
|
|
+ | subne PC, RB, #0x20000
|
|
+ }
|
|
+ | b <2
|
|
+ |.else
|
|
+ | // CARG1, CARG2 is a number.
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | it lo
|
|
+ | movlo RA, RB // Save RB.
|
|
+ | blo >5
|
|
+ | // CARG1, CARG2 is a number, CARG3 is an integer.
|
|
+ | mov CARG1, CARG3
|
|
+ | mov RC, RA
|
|
+ |4: // CARG1 is an integer, CARG3, CARG4 is a number.
|
|
+ | mov RA, RB // Save RB.
|
|
+ | bl extern __aeabi_i2d
|
|
+ | ldrd CARG3, CARG4, [RC] // Restore other operand.
|
|
+ |5: // CARG1, CARG2 and CARG3, CARG4 are numbers.
|
|
+ | bl extern __aeabi_cdcmpeq
|
|
+ if (vk) {
|
|
+ | it eq
|
|
+ | subeq PC, RA, #0x20000
|
|
+ } else {
|
|
+ | it ne
|
|
+ | subne PC, RA, #0x20000
|
|
+ }
|
|
+ | b <2
|
|
+ |.endif
|
|
+ |
|
|
+ |.if FFI
|
|
+ |7:
|
|
+ | checktp CARG2, LJ_TCDATA
|
|
+ | bne <1
|
|
+ | b ->vmeta_equal_cd
|
|
+ |.endif
|
|
+ break;
|
|
+
|
|
+ case BC_ISEQP: case BC_ISNEP:
|
|
+ vk = op == BC_ISEQP;
|
|
+ | // RA = src*8, RC = primitive_type (~), JMP with RC = target
|
|
+ | ldrd_i CARG1, CARG2, BASE, RA
|
|
+ | ldrh RB, [PC, #2]
|
|
+ | add PC, PC, #4
|
|
+ | mvn RC, RC
|
|
+ | add RB, PC, RB, lsl #2
|
|
+ |.if FFI
|
|
+ | checktp CARG2, LJ_TCDATA
|
|
+ | beq ->vmeta_equal_cd
|
|
+ |.endif
|
|
+ | cmp CARG2, RC
|
|
+ if (vk) {
|
|
+ | it eq
|
|
+ | subeq PC, RB, #0x20000
|
|
+ } else {
|
|
+ | it ne
|
|
+ | subne PC, RB, #0x20000
|
|
+ }
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ /* -- Unary test and copy ops ------------------------------------------- */
|
|
+
|
|
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
|
|
+ | // RA = dst*8 or unused, RC = src, JMP with RC = target
|
|
+ | add RC, BASE, RC, lsl #3
|
|
+ | ldrh RB, [PC, #2]
|
|
+ | ldrd CARG1, CARG2, [RC]
|
|
+ | add PC, PC, #4
|
|
+ | add RB, PC, RB, lsl #2
|
|
+ | checktp CARG2, LJ_TTRUE
|
|
+ if (op == BC_ISTC || op == BC_IST) {
|
|
+ | it ls
|
|
+ | subls PC, RB, #0x20000
|
|
+ if (op == BC_ISTC) {
|
|
+ | it ls
|
|
+ | strdls_i CARG1, CARG2, BASE, RA
|
|
+ }
|
|
+ } else {
|
|
+ | it hi
|
|
+ | subhi PC, RB, #0x20000
|
|
+ if (op == BC_ISFC) {
|
|
+ | it hi
|
|
+ | strdhi_i CARG1, CARG2, BASE, RA
|
|
+ }
|
|
+ }
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_ISTYPE:
|
|
+ | // RA = src*8, RC = -type
|
|
+ | ldrd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next1
|
|
+ | cmn CARG2, RC
|
|
+ | ins_next2
|
|
+ | bne ->vmeta_istype
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_ISNUM:
|
|
+ | // RA = src*8, RC = -(TISNUM-1)
|
|
+ | ldrd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next1
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | ins_next2
|
|
+ | bhs ->vmeta_istype
|
|
+ | ins_next3
|
|
+ break;
|
|
+
|
|
+ /* -- Unary ops --------------------------------------------------------- */
|
|
+
|
|
+ case BC_MOV:
|
|
+ | // RA = dst*8, RC = src
|
|
+ | lsl RC, RC, #3
|
|
+ | ins_next1
|
|
+ | ldrd_i CARG1, CARG2, BASE, RC
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_NOT:
|
|
+ | // RA = dst*8, RC = src
|
|
+ | add RC, BASE, RC, lsl #3
|
|
+ | ins_next1
|
|
+ | ldr CARG1, [RC, #4]
|
|
+ | add RA, BASE, RA
|
|
+ | ins_next2
|
|
+ | checktp CARG1, LJ_TTRUE
|
|
+ | ite ls
|
|
+ | mvnls CARG2, #~LJ_TFALSE
|
|
+ | mvnhi CARG2, #~LJ_TTRUE
|
|
+ | str CARG2, [RA, #4]
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_UNM:
|
|
+ | // RA = dst*8, RC = src
|
|
+ | lsl RC, RC, #3
|
|
+ | ldrd_i CARG1, CARG2, BASE, RC
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bhi ->vmeta_unm
|
|
+ | it ne
|
|
+ | eorne CARG2, CARG2, #0x80000000
|
|
+ | bne >5
|
|
+ | it eq
|
|
+ | rsbseq CARG1, CARG1, #0
|
|
+ | it vs
|
|
+ | ldrdvs CARG1, CARG2, >9
|
|
+ |5:
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ |
|
|
+ |.align 8
|
|
+ |9:
|
|
+ | .long 0x00000000, 0x000041e0 // 2^31. jturnsek: swaped halfwords!!!
|
|
+ break;
|
|
+ case BC_LEN:
|
|
+ | // RA = dst*8, RC = src
|
|
+ | lsl RC, RC, #3
|
|
+ | ldrd_i CARG1, CARG2, BASE, RC
|
|
+ | checkstr CARG2, >2
|
|
+ | ldr CARG1, STR:CARG1->len
|
|
+ |1:
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ |2:
|
|
+ | checktab CARG2, ->vmeta_len
|
|
+#if LJ_52
|
|
+ | ldr TAB:CARG3, TAB:CARG1->metatable
|
|
+ | cmp TAB:CARG3, #0
|
|
+ | bne >9
|
|
+ |3:
|
|
+#endif
|
|
+ |->BC_LEN_Z:
|
|
+ | .IOS mov RC, BASE
|
|
+ | bl extern lj_tab_len // (GCtab *t)
|
|
+ | // Returns uint32_t (but less than 2^31).
|
|
+ | .IOS mov BASE, RC
|
|
+ | b <1
|
|
+#if LJ_52
|
|
+ |9:
|
|
+ | ldrb CARG4, TAB:CARG3->nomm
|
|
+ | tst CARG4, #1<<MM_len
|
|
+ | bne <3 // 'no __len' flag set: done.
|
|
+ | b ->vmeta_len
|
|
+#endif
|
|
+ break;
|
|
+
|
|
+ /* -- Binary ops -------------------------------------------------------- */
|
|
+
|
|
+ |.macro ins_arithcheck, cond, ncond, target
|
|
+ ||if (vk == 1) {
|
|
+ | cmn CARG4, #-LJ_TISNUM
|
|
+ | it cond
|
|
+ | cmn..cond CARG2, #-LJ_TISNUM
|
|
+ ||} else {
|
|
+ | cmn CARG2, #-LJ_TISNUM
|
|
+ | it cond
|
|
+ | cmn..cond CARG4, #-LJ_TISNUM
|
|
+ ||}
|
|
+ | b..ncond target
|
|
+ |.endmacro
|
|
+ |.macro ins_arithcheck_int, target
|
|
+ | ins_arithcheck eq, ne, target
|
|
+ |.endmacro
|
|
+ |.macro ins_arithcheck_num, target
|
|
+ | ins_arithcheck lo, hs, target
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithpre
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
|
|
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
|
|
+ ||switch (vk) {
|
|
+ ||case 0:
|
|
+ | .if FPU
|
|
+ | ldrd_iw CARG1, CARG2, RB, BASE
|
|
+ | ldrd_iw CARG3, CARG4, RC, KBASE
|
|
+ | .else
|
|
+ | ldrd_i CARG1, CARG2, BASE, RB
|
|
+ | ldrd_i CARG3, CARG4, KBASE, RC
|
|
+ | .endif
|
|
+ || break;
|
|
+ ||case 1:
|
|
+ | .if FPU
|
|
+ | ldrd_iw CARG3, CARG4, RB, BASE
|
|
+ | ldrd_iw CARG1, CARG2, RC, KBASE
|
|
+ | .else
|
|
+ | ldrd_i CARG3, CARG4, BASE, RB
|
|
+ | ldrd_i CARG1, CARG2, KBASE, RC
|
|
+ | .endif
|
|
+ || break;
|
|
+ ||default:
|
|
+ | .if FPU
|
|
+ | ldrd_iw CARG1, CARG2, RB, BASE
|
|
+ | ldrd_iw CARG3, CARG4, RC, BASE
|
|
+ | .else
|
|
+ | ldrd_i CARG1, CARG2, BASE, RB
|
|
+ | ldrd_i CARG3, CARG4, BASE, RC
|
|
+ | .endif
|
|
+ || break;
|
|
+ ||}
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithpre_fpu, reg1, reg2
|
|
+ |.if FPU
|
|
+ ||if (vk == 1) {
|
|
+ | vldr reg2, [RB]
|
|
+ | vldr reg1, [RC]
|
|
+ ||} else {
|
|
+ | vldr reg1, [RB]
|
|
+ | vldr reg2, [RC]
|
|
+ ||}
|
|
+ |.endif
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithpost_fpu, reg
|
|
+ | ins_next1
|
|
+ | add RA, BASE, RA
|
|
+ | ins_next2
|
|
+ | vstr reg, [RA]
|
|
+ | ins_next3
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithfallback, ins
|
|
+ ||switch (vk) {
|
|
+ ||case 0:
|
|
+ | ins ->vmeta_arith_vn
|
|
+ || break;
|
|
+ ||case 1:
|
|
+ | ins ->vmeta_arith_nv
|
|
+ || break;
|
|
+ ||default:
|
|
+ | ins ->vmeta_arith_vv
|
|
+ || break;
|
|
+ ||}
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithdn, intins, fpins, fpcall
|
|
+ | ins_arithpre
|
|
+ |.if "intins" ~= "vm_modi" and not FPU
|
|
+ | ins_next1
|
|
+ |.endif
|
|
+ | ins_arithcheck_int >5
|
|
+ |.if "intins" == "smull"
|
|
+ | smull CARG1, RC, CARG3, CARG1
|
|
+ | cmp RC, CARG1, asr #31
|
|
+ | ins_arithfallback bne
|
|
+ |.elif "intins" == "vm_modi"
|
|
+ | movs CARG2, CARG3
|
|
+ | ins_arithfallback beq
|
|
+ | bl ->vm_modi
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ |.else
|
|
+ | intins CARG1, CARG1, CARG3
|
|
+ | ins_arithfallback bvs
|
|
+ |.endif
|
|
+ |4:
|
|
+ |.if "intins" == "vm_modi" or FPU
|
|
+ | ins_next1
|
|
+ |.endif
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ |5: // FP variant.
|
|
+ | ins_arithpre_fpu d6, d7
|
|
+ | ins_arithfallback ins_arithcheck_num
|
|
+ |.if FPU
|
|
+ |.if "intins" == "vm_modi"
|
|
+ | bl fpcall
|
|
+ |.else
|
|
+ | fpins d6, d6, d7
|
|
+ |.endif
|
|
+ | ins_arithpost_fpu d6
|
|
+ |.else
|
|
+ | bl fpcall
|
|
+ |.if "intins" ~= "vm_modi"
|
|
+ | ins_next1
|
|
+ |.endif
|
|
+ | b <4
|
|
+ |.endif
|
|
+ |.endmacro
|
|
+ |
|
|
+ |.macro ins_arithfp, fpins, fpcall
|
|
+ | ins_arithpre
|
|
+ |.if "fpins" ~= "extern" or HFABI
|
|
+ | ins_arithpre_fpu d0, d1
|
|
+ |.endif
|
|
+ | ins_arithfallback ins_arithcheck_num
|
|
+ |.if "fpins" == "extern"
|
|
+ | .IOS mov RC, BASE
|
|
+ | bl fpcall
|
|
+ | .IOS mov BASE, RC
|
|
+ |.elif FPU
|
|
+ | fpins d0, d0, d1
|
|
+ |.else
|
|
+ | bl fpcall
|
|
+ |.endif
|
|
+ |.if ("fpins" ~= "extern" or HFABI) and FPU
|
|
+ | ins_arithpost_fpu d0
|
|
+ |.else
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ |.endif
|
|
+ |.endmacro
|
|
+
|
|
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
|
|
+ | ins_arithdn adds, vadd.f64, extern __aeabi_dadd
|
|
+ break;
|
|
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
|
|
+ | ins_arithdn subs, vsub.f64, extern __aeabi_dsub
|
|
+ break;
|
|
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
|
|
+ | ins_arithdn smull, vmul.f64, extern __aeabi_dmul
|
|
+ break;
|
|
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
|
|
+ | ins_arithfp vdiv.f64, extern __aeabi_ddiv
|
|
+ break;
|
|
+ case BC_MODVN: case BC_MODNV: case BC_MODVV:
|
|
+ | ins_arithdn vm_modi, vm_mod, ->vm_mod
|
|
+ break;
|
|
+ case BC_POW:
|
|
+ | // NYI: (partial) integer arithmetic.
|
|
+ | ins_arithfp extern, extern pow
|
|
+ break;
|
|
+
|
|
+ case BC_CAT:
|
|
+ | decode_RB8 RC, INS
|
|
+ | decode_RC8 RB, INS
|
|
+ | // RA = dst*8, RC = src_start*8, RB = src_end*8 (note: RB/RC swapped!)
|
|
+ | sub CARG3, RB, RC
|
|
+ | str BASE, L->base
|
|
+ | add CARG2, BASE, RB
|
|
+ |->BC_CAT_Z:
|
|
+ | // RA = dst*8, RC = src_start*8, CARG2 = top-1
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | lsr CARG3, CARG3, #3
|
|
+ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
|
|
+ | // Returns NULL (finished) or TValue * (metamethod).
|
|
+ | ldr BASE, L->base
|
|
+ | cmp CRET1, #0
|
|
+ | bne ->vmeta_binop
|
|
+ | ldrd_i CARG3, CARG4, BASE, RC
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG3, CARG4, BASE, RA // Copy result to RA.
|
|
+ | ins_next3
|
|
+ break;
|
|
+
|
|
+ /* -- Constant ops ------------------------------------------------------ */
|
|
+
|
|
+ case BC_KSTR:
|
|
+ | // RA = dst*8, RC = str_const (~)
|
|
+ | mvn RC, RC
|
|
+ | ins_next1
|
|
+ | ldr CARG1, [KBASE, RC, lsl #2]
|
|
+ | mvn CARG2, #~LJ_TSTR
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_KCDATA:
|
|
+ |.if FFI
|
|
+ | // RA = dst*8, RC = cdata_const (~)
|
|
+ | mvn RC, RC
|
|
+ | ins_next1
|
|
+ | ldr CARG1, [KBASE, RC, lsl #2]
|
|
+ | mvn CARG2, #~LJ_TCDATA
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ |.endif
|
|
+ break;
|
|
+ case BC_KSHORT:
|
|
+ | // RA = dst*8, (RC = int16_literal)
|
|
+ | mov CARG1, INS, asr #16 // Refetch sign-extended reg.
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_KNUM:
|
|
+ | // RA = dst*8, RC = num_const
|
|
+ | lsl RC, RC, #3
|
|
+ | ins_next1
|
|
+ | ldrd_i CARG1, CARG2, KBASE, RC
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_KPRI:
|
|
+ | // RA = dst*8, RC = primitive_type (~)
|
|
+ | add RA, BASE, RA
|
|
+ | mvn RC, RC
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | str RC, [RA, #4]
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_KNIL:
|
|
+ | // RA = base*8, RC = end
|
|
+ | add RA, BASE, RA
|
|
+ | add RC, BASE, RC, lsl #3
|
|
+ | mvn CARG1, #~LJ_TNIL
|
|
+ | str CARG1, [RA, #4]
|
|
+ | add RA, RA, #8
|
|
+ |1:
|
|
+ | str CARG1, [RA, #4]
|
|
+ | cmp RA, RC
|
|
+ | add RA, RA, #8
|
|
+ | blt <1
|
|
+ | ins_next_
|
|
+ break;
|
|
+
|
|
+ /* -- Upvalue and function ops ------------------------------------------ */
|
|
+
|
|
+ case BC_UGET:
|
|
+ | // RA = dst*8, RC = uvnum
|
|
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
|
+ | lsl RC, RC, #2
|
|
+ | add RC, RC, #offsetof(GCfuncL, uvptr)
|
|
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RC]
|
|
+ | ldr CARG2, UPVAL:CARG2->v
|
|
+ | ldrd CARG3, CARG4, [CARG2]
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG3, CARG4, BASE, RA
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_USETV:
|
|
+ | // RA = uvnum*8, RC = src
|
|
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
|
+ | lsr RA, RA, #1
|
|
+ | add RA, RA, #offsetof(GCfuncL, uvptr)
|
|
+ | lsl RC, RC, #3
|
|
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
|
|
+ | ldrd_i CARG3, CARG4, BASE, RC
|
|
+ | ldrb RB, UPVAL:CARG2->marked
|
|
+ | ldrb RC, UPVAL:CARG2->closed
|
|
+ | ldr CARG2, UPVAL:CARG2->v
|
|
+ | tst RB, #LJ_GC_BLACK // isblack(uv)
|
|
+ | add RB, CARG4, #-LJ_TISGCV
|
|
+ | it ne
|
|
+ | cmpne RC, #0
|
|
+ | strd CARG3, CARG4, [CARG2]
|
|
+ | bne >2 // Upvalue is closed and black?
|
|
+ |1:
|
|
+ | ins_next
|
|
+ |
|
|
+ |2: // Check if new value is collectable.
|
|
+ | cmn RB, #-(LJ_TNUMX - LJ_TISGCV)
|
|
+ | it hi
|
|
+ | ldrbhi RC, GCOBJ:CARG3->gch.marked
|
|
+ | bls <1 // tvisgcv(v)
|
|
+ | sub CARG1, DISPATCH, #-GG_DISP2G
|
|
+ | tst RC, #LJ_GC_WHITES
|
|
+ | // Crossed a write barrier. Move the barrier forward.
|
|
+ |.if IOS
|
|
+ | beq <1
|
|
+ | mov RC, BASE
|
|
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
|
|
+ | mov BASE, RC
|
|
+ |.else
|
|
+ | it ne
|
|
+ | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv)
|
|
+ |.endif
|
|
+ | b <1
|
|
+ break;
|
|
+ case BC_USETS:
|
|
+ | // RA = uvnum*8, RC = str_const (~)
|
|
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
|
+ | lsr RA, RA, #1
|
|
+ | add RA, RA, #offsetof(GCfuncL, uvptr)
|
|
+ | mvn RC, RC
|
|
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
|
|
+ | ldr STR:CARG3, [KBASE, RC, lsl #2]
|
|
+ | ldrb RB, UPVAL:CARG2->marked
|
|
+ | ldrb RC, UPVAL:CARG2->closed
|
|
+ | ldr CARG2, UPVAL:CARG2->v
|
|
+ | mvn CARG4, #~LJ_TSTR
|
|
+ | tst RB, #LJ_GC_BLACK // isblack(uv)
|
|
+ | ldrb RB, STR:CARG3->marked
|
|
+ | strd CARG3, CARG4, [CARG2]
|
|
+ | bne >2
|
|
+ |1:
|
|
+ | ins_next
|
|
+ |
|
|
+ |2: // Check if string is white and ensure upvalue is closed.
|
|
+ | tst RB, #LJ_GC_WHITES // iswhite(str)
|
|
+ | it ne
|
|
+ | cmpne RC, #0
|
|
+ | sub CARG1, DISPATCH, #-GG_DISP2G
|
|
+ | // Crossed a write barrier. Move the barrier forward.
|
|
+ |.if IOS
|
|
+ | beq <1
|
|
+ | mov RC, BASE
|
|
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
|
|
+ | mov BASE, RC
|
|
+ |.else
|
|
+ | it ne
|
|
+ | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv)
|
|
+ |.endif
|
|
+ | b <1
|
|
+ break;
|
|
+ case BC_USETN:
|
|
+ | // RA = uvnum*8, RC = num_const
|
|
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
|
+ | lsr RA, RA, #1
|
|
+ | add RA, RA, #offsetof(GCfuncL, uvptr)
|
|
+ | lsl RC, RC, #3
|
|
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
|
|
+ | ldrd_i CARG3, CARG4, KBASE, RC
|
|
+ | ldr CARG2, UPVAL:CARG2->v
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd CARG3, CARG4, [CARG2]
|
|
+ | ins_next3
|
|
+ break;
|
|
+ case BC_USETP:
|
|
+ | // RA = uvnum*8, RC = primitive_type (~)
|
|
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
|
+ | lsr RA, RA, #1
|
|
+ | add RA, RA, #offsetof(GCfuncL, uvptr)
|
|
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA]
|
|
+ | mvn RC, RC
|
|
+ | ldr CARG2, UPVAL:CARG2->v
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | str RC, [CARG2, #4]
|
|
+ | ins_next3
|
|
+ break;
|
|
+
|
|
+ case BC_UCLO:
|
|
+ | // RA = level*8, RC = target
|
|
+ | ldr CARG3, L->openupval
|
|
+ | add RC, PC, RC, lsl #2
|
|
+ | str BASE, L->base
|
|
+ | cmp CARG3, #0
|
|
+ | sub PC, RC, #0x20000
|
|
+ | beq >1
|
|
+ | mov CARG1, L
|
|
+ | add CARG2, BASE, RA
|
|
+ | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
|
|
+ | ldr BASE, L->base
|
|
+ |1:
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_FNEW:
|
|
+ | // RA = dst*8, RC = proto_const (~) (holding function prototype)
|
|
+ | mvn RC, RC
|
|
+ | str BASE, L->base
|
|
+ | ldr CARG2, [KBASE, RC, lsl #2]
|
|
+ | str PC, SAVE_PC
|
|
+ | ldr CARG3, [BASE, FRAME_FUNC]
|
|
+ | mov CARG1, L
|
|
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
|
|
+ | bl extern lj_func_newL_gc
|
|
+ | // Returns GCfuncL *.
|
|
+ | ldr BASE, L->base
|
|
+ | mvn CARG2, #~LJ_TFUNC
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ break;
|
|
+
|
|
+ /* -- Table ops --------------------------------------------------------- */
|
|
+
|
|
+ case BC_TNEW:
|
|
+ case BC_TDUP:
|
|
+ | // RA = dst*8, RC = (hbits|asize) | tab_const (~)
|
|
+ if (op == BC_TDUP) {
|
|
+ | mvn RC, RC
|
|
+ }
|
|
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.total)
|
|
+ | ldr CARG3, [CARG1]
|
|
+ | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.threshold)
|
|
+ | ldr CARG4, [CARG1]
|
|
+ | str BASE, L->base
|
|
+ | str PC, SAVE_PC
|
|
+ | cmp CARG3, CARG4
|
|
+ | mov CARG1, L
|
|
+ | bhs >5
|
|
+ |1:
|
|
+ if (op == BC_TNEW) {
|
|
+ | lsl CARG2, RC, #21
|
|
+ | lsr CARG3, RC, #11
|
|
+ | asr RC, CARG2, #21
|
|
+ | lsr CARG2, CARG2, #21
|
|
+ | cmn RC, #1
|
|
+ | it eq
|
|
+ | addeq CARG2, CARG2, #2
|
|
+ | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
|
|
+ | // Returns GCtab *.
|
|
+ } else {
|
|
+ | ldr CARG2, [KBASE, RC, lsl #2]
|
|
+ | bl extern lj_tab_dup // (lua_State *L, Table *kt)
|
|
+ | // Returns GCtab *.
|
|
+ }
|
|
+ | ldr BASE, L->base
|
|
+ | mvn CARG2, #~LJ_TTAB
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ |5:
|
|
+ | bl extern lj_gc_step_fixtop // (lua_State *L)
|
|
+ | mov CARG1, L
|
|
+ | b <1
|
|
+ break;
|
|
+
|
|
+ case BC_GGET:
|
|
+ | // RA = dst*8, RC = str_const (~)
|
|
+ case BC_GSET:
|
|
+ | // RA = dst*8, RC = str_const (~)
|
|
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
|
|
+ | mvn RC, RC
|
|
+ | ldr TAB:CARG1, LFUNC:CARG2->env
|
|
+ | ldr STR:RC, [KBASE, RC, lsl #2]
|
|
+ if (op == BC_GGET) {
|
|
+ | b ->BC_TGETS_Z
|
|
+ } else {
|
|
+ | b ->BC_TSETS_Z
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case BC_TGETV:
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | // RA = dst*8, RB = table*8, RC = key*8
|
|
+ | ldrd_i TAB:CARG1, CARG2, BASE, RB
|
|
+ | ldrd_i CARG3, CARG4, BASE, RC
|
|
+ | checktab CARG2, ->vmeta_tgetv // STALL: load CARG1, CARG2.
|
|
+ | checktp CARG4, LJ_TISNUM // Integer key?
|
|
+ | it eq
|
|
+ | ldreq CARG4, TAB:CARG1->array
|
|
+ | it eq
|
|
+ | ldreq CARG2, TAB:CARG1->asize
|
|
+ | bne >9
|
|
+ |
|
|
+ | add CARG4, CARG4, CARG3, lsl #3
|
|
+ | cmp CARG3, CARG2 // In array part?
|
|
+ | it lo
|
|
+ | ldrdlo CARG3, CARG4, [CARG4]
|
|
+ | bhs ->vmeta_tgetv
|
|
+ | ins_next1 // Overwrites RB!
|
|
+ | checktp CARG4, LJ_TNIL
|
|
+ | beq >5
|
|
+ |1:
|
|
+ | ins_next2
|
|
+ | strd_i CARG3, CARG4, BASE, RA
|
|
+ | ins_next3
|
|
+ |
|
|
+ |5: // Check for __index if table value is nil.
|
|
+ | ldr TAB:CARG2, TAB:CARG1->metatable
|
|
+ | cmp TAB:CARG2, #0
|
|
+ | beq <1 // No metatable: done.
|
|
+ | ldrb CARG2, TAB:CARG2->nomm
|
|
+ | tst CARG2, #1<<MM_index
|
|
+ | bne <1 // 'no __index' flag set: done.
|
|
+ | decode_RB8 RB, INS // Restore RB.
|
|
+ | b ->vmeta_tgetv
|
|
+ |
|
|
+ |9:
|
|
+ | checktp CARG4, LJ_TSTR // String key?
|
|
+ | it eq
|
|
+ | moveq STR:RC, CARG3
|
|
+ | beq ->BC_TGETS_Z
|
|
+ | b ->vmeta_tgetv
|
|
+ break;
|
|
+ case BC_TGETS:
|
|
+ | decode_RB8 RB, INS
|
|
+ | and RC, RC, #255
|
|
+ | // RA = dst*8, RB = table*8, RC = str_const (~)
|
|
+ | ldrd_i CARG1, CARG2, BASE, RB
|
|
+ | mvn RC, RC
|
|
+ | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC.
|
|
+ | checktab CARG2, ->vmeta_tgets1
|
|
+ |->BC_TGETS_Z:
|
|
+ | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
|
|
+ | ldr CARG3, TAB:CARG1->hmask
|
|
+ | ldr CARG4, STR:RC->sid
|
|
+ | ldr NODE:INS, TAB:CARG1->node
|
|
+ | mov TAB:RB, TAB:CARG1
|
|
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
|
|
+ | add CARG3, CARG3, CARG3, lsl #1
|
|
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
|
|
+ |1:
|
|
+ | ldrd CARG1, CARG2, NODE:INS->key // STALL: early NODE:INS.
|
|
+ | ldrd CARG3, CARG4, NODE:INS->val
|
|
+ | ldr NODE:INS, NODE:INS->next
|
|
+ | checktp CARG2, LJ_TSTR
|
|
+ | it eq
|
|
+ | cmpeq CARG1, STR:RC
|
|
+ | bne >4
|
|
+ | checktp CARG4, LJ_TNIL
|
|
+ | beq >5
|
|
+ |3:
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG3, CARG4, BASE, RA
|
|
+ | ins_next3
|
|
+ |
|
|
+ |4: // Follow hash chain.
|
|
+ | cmp NODE:INS, #0
|
|
+ | bne <1
|
|
+ | // End of hash chain: key not found, nil result.
|
|
+ |
|
|
+ |5: // Check for __index if table value is nil.
|
|
+ | ldr TAB:CARG1, TAB:RB->metatable
|
|
+ | mov CARG3, #0 // Optional clear of undef. value (during load stall).
|
|
+ | mvn CARG4, #~LJ_TNIL
|
|
+ | cmp TAB:CARG1, #0
|
|
+ | beq <3 // No metatable: done.
|
|
+ | ldrb CARG2, TAB:CARG1->nomm
|
|
+ | tst CARG2, #1<<MM_index
|
|
+ | bne <3 // 'no __index' flag set: done.
|
|
+ | b ->vmeta_tgets
|
|
+ break;
|
|
+ case BC_TGETB:
|
|
+ | decode_RB8 RB, INS
|
|
+ | and RC, RC, #255
|
|
+ | // RA = dst*8, RB = table*8, RC = index
|
|
+ | ldrd_i CARG1, CARG2, BASE, RB
|
|
+ | checktab CARG2, ->vmeta_tgetb // STALL: load CARG1, CARG2.
|
|
+ | ldr CARG3, TAB:CARG1->asize
|
|
+ | ldr CARG4, TAB:CARG1->array
|
|
+ | lsl CARG2, RC, #3
|
|
+ | cmp RC, CARG3
|
|
+ | ldrdlo_i CARG3, CARG4, CARG4, CARG2
|
|
+ | bhs ->vmeta_tgetb
|
|
+ | ins_next1 // Overwrites RB!
|
|
+ | checktp CARG4, LJ_TNIL
|
|
+ | beq >5
|
|
+ |1:
|
|
+ | ins_next2
|
|
+ | strd_i CARG3, CARG4, BASE, RA
|
|
+ | ins_next3
|
|
+ |
|
|
+ |5: // Check for __index if table value is nil.
|
|
+ | ldr TAB:CARG2, TAB:CARG1->metatable
|
|
+ | cmp TAB:CARG2, #0
|
|
+ | beq <1 // No metatable: done.
|
|
+ | ldrb CARG2, TAB:CARG2->nomm
|
|
+ | tst CARG2, #1<<MM_index
|
|
+ | bne <1 // 'no __index' flag set: done.
|
|
+ | b ->vmeta_tgetb
|
|
+ break;
|
|
+ case BC_TGETR:
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | // RA = dst*8, RB = table*8, RC = key*8
|
|
+ | ldr TAB:CARG1, [BASE, RB]
|
|
+ | ldr CARG2, [BASE, RC]
|
|
+ | ldr CARG4, TAB:CARG1->array
|
|
+ | ldr CARG3, TAB:CARG1->asize
|
|
+ | add CARG4, CARG4, CARG2, lsl #3
|
|
+ | cmp CARG2, CARG3 // In array part?
|
|
+ | bhs ->vmeta_tgetr
|
|
+ | ldrd CARG1, CARG2, [CARG4]
|
|
+ |->BC_TGETR_Z:
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd_i CARG1, CARG2, BASE, RA
|
|
+ | ins_next3
|
|
+ break;
|
|
+
|
|
+ case BC_TSETV:
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | // RA = src*8, RB = table*8, RC = key*8
|
|
+ | ldrd_i TAB:CARG1, CARG2, BASE, RB
|
|
+ | ldrd_i CARG3, CARG4, BASE, RC
|
|
+ | checktab CARG2, ->vmeta_tsetv // STALL: load CARG1, CARG2.
|
|
+ | checktp CARG4, LJ_TISNUM // Integer key?
|
|
+ | it eq
|
|
+ | ldreq CARG2, TAB:CARG1->array
|
|
+ | it eq
|
|
+ | ldreq CARG4, TAB:CARG1->asize
|
|
+ | bne >9
|
|
+ |
|
|
+ | add CARG2, CARG2, CARG3, lsl #3
|
|
+ | cmp CARG3, CARG4 // In array part?
|
|
+ | it lo
|
|
+ | ldrlo INS, [CARG2, #4]
|
|
+ | bhs ->vmeta_tsetv
|
|
+ | ins_next1 // Overwrites RB!
|
|
+ | checktp INS, LJ_TNIL
|
|
+ | ldrb INS, TAB:CARG1->marked
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | beq >5
|
|
+ |1:
|
|
+ | tst INS, #LJ_GC_BLACK // isblack(table)
|
|
+ | strd CARG3, CARG4, [CARG2]
|
|
+ | bne >7
|
|
+ |2:
|
|
+ | ins_next2
|
|
+ | ins_next3
|
|
+ |
|
|
+ |5: // Check for __newindex if previous value is nil.
|
|
+ | ldr TAB:RA, TAB:CARG1->metatable
|
|
+ | cmp TAB:RA, #0
|
|
+ | beq <1 // No metatable: done.
|
|
+ | ldrb RA, TAB:RA->nomm
|
|
+ | tst RA, #1<<MM_newindex
|
|
+ | bne <1 // 'no __newindex' flag set: done.
|
|
+ | ldr INS, [PC, #-4] // Restore RA and RB.
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RA8 RA, INS
|
|
+ | b ->vmeta_tsetv
|
|
+ |
|
|
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
+ | barrierback TAB:CARG1, INS, CARG3
|
|
+ | b <2
|
|
+ |
|
|
+ |9:
|
|
+ | checktp CARG4, LJ_TSTR // String key?
|
|
+ | it eq
|
|
+ | moveq STR:RC, CARG3
|
|
+ | beq ->BC_TSETS_Z
|
|
+ | b ->vmeta_tsetv
|
|
+ break;
|
|
+ case BC_TSETS:
|
|
+ | decode_RB8 RB, INS
|
|
+ | and RC, RC, #255
|
|
+ | // RA = src*8, RB = table*8, RC = str_const (~)
|
|
+ | ldrd_i CARG1, CARG2, BASE, RB
|
|
+ | mvn RC, RC
|
|
+ | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC.
|
|
+ | checktab CARG2, ->vmeta_tsets1
|
|
+ |->BC_TSETS_Z:
|
|
+ | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
|
|
+ | ldr CARG3, TAB:CARG1->hmask
|
|
+ | ldr CARG4, STR:RC->sid
|
|
+ | ldr NODE:INS, TAB:CARG1->node
|
|
+ | mov TAB:RB, TAB:CARG1
|
|
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
|
|
+ | add CARG3, CARG3, CARG3, lsl #1
|
|
+ | mov CARG4, #0
|
|
+ | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
|
|
+ | strb CARG4, TAB:RB->nomm // Clear metamethod cache.
|
|
+ |1:
|
|
+ | ldrd CARG1, CARG2, NODE:INS->key
|
|
+ | ldr CARG4, NODE:INS->val.it
|
|
+ | ldr NODE:CARG3, NODE:INS->next
|
|
+ | checktp CARG2, LJ_TSTR
|
|
+ | it eq
|
|
+ | cmpeq CARG1, STR:RC
|
|
+ | bne >5
|
|
+ | ldrb CARG2, TAB:RB->marked
|
|
+ | checktp CARG4, LJ_TNIL // Key found, but nil value?
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | beq >4
|
|
+ |2:
|
|
+ | tst CARG2, #LJ_GC_BLACK // isblack(table)
|
|
+ | strd CARG3, CARG4, NODE:INS->val
|
|
+ | bne >7
|
|
+ |3:
|
|
+ | ins_next
|
|
+ |
|
|
+ |4: // Check for __newindex if previous value is nil.
|
|
+ | ldr TAB:CARG1, TAB:RB->metatable
|
|
+ | cmp TAB:CARG1, #0
|
|
+ | beq <2 // No metatable: done.
|
|
+ | ldrb CARG1, TAB:CARG1->nomm
|
|
+ | tst CARG1, #1<<MM_newindex
|
|
+ | bne <2 // 'no __newindex' flag set: done.
|
|
+ | b ->vmeta_tsets
|
|
+ |
|
|
+ |5: // Follow hash chain.
|
|
+ | movs NODE:INS, NODE:CARG3
|
|
+ | bne <1
|
|
+ | // End of hash chain: key not found, add a new one.
|
|
+ |
|
|
+ | // But check for __newindex first.
|
|
+ | ldr TAB:CARG1, TAB:RB->metatable
|
|
+ | mov CARG3, TMPDp
|
|
+ | str PC, SAVE_PC
|
|
+ | cmp TAB:CARG1, #0 // No metatable: continue.
|
|
+ | str BASE, L->base
|
|
+ | it ne
|
|
+ | ldrbne CARG2, TAB:CARG1->nomm
|
|
+ | mov CARG1, L
|
|
+ | beq >6
|
|
+ | tst CARG2, #1<<MM_newindex
|
|
+ | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
|
|
+ |6:
|
|
+ | mvn CARG4, #~LJ_TSTR
|
|
+ | str STR:RC, TMPDlo
|
|
+ | mov CARG2, TAB:RB
|
|
+ | str CARG4, TMPDhi
|
|
+ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
|
|
+ | // Returns TValue *.
|
|
+ | ldr BASE, L->base
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | strd CARG3, CARG4, [CRET1]
|
|
+ | b <3 // No 2nd write barrier needed.
|
|
+ |
|
|
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
+ | barrierback TAB:RB, CARG2, CARG3
|
|
+ | b <3
|
|
+ break;
|
|
+ case BC_TSETB:
|
|
+ | decode_RB8 RB, INS
|
|
+ | and RC, RC, #255
|
|
+ | // RA = src*8, RB = table*8, RC = index
|
|
+ | ldrd_i CARG1, CARG2, BASE, RB
|
|
+ | checktab CARG2, ->vmeta_tsetb // STALL: load CARG1, CARG2.
|
|
+ | ldr CARG3, TAB:CARG1->asize
|
|
+ | ldr RB, TAB:CARG1->array
|
|
+ | lsl CARG2, RC, #3
|
|
+ | cmp RC, CARG3
|
|
+ | ldrdlo_iw CARG3, CARG4, CARG2, RB
|
|
+ | bhs ->vmeta_tsetb
|
|
+ | ins_next1 // Overwrites RB!
|
|
+ | checktp CARG4, LJ_TNIL
|
|
+ | ldrb INS, TAB:CARG1->marked
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | beq >5
|
|
+ |1:
|
|
+ | tst INS, #LJ_GC_BLACK // isblack(table)
|
|
+ | strd CARG3, CARG4, [CARG2]
|
|
+ | bne >7
|
|
+ |2:
|
|
+ | ins_next2
|
|
+ | ins_next3
|
|
+ |
|
|
+ |5: // Check for __newindex if previous value is nil.
|
|
+ | ldr TAB:RA, TAB:CARG1->metatable
|
|
+ | cmp TAB:RA, #0
|
|
+ | beq <1 // No metatable: done.
|
|
+ | ldrb RA, TAB:RA->nomm
|
|
+ | tst RA, #1<<MM_newindex
|
|
+ | bne <1 // 'no __newindex' flag set: done.
|
|
+ | ldr INS, [PC, #-4] // Restore INS.
|
|
+ | decode_RA8 RA, INS
|
|
+ | b ->vmeta_tsetb
|
|
+ |
|
|
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
+ | barrierback TAB:CARG1, INS, CARG3
|
|
+ | b <2
|
|
+ break;
|
|
+ case BC_TSETR:
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | // RA = src*8, RB = table*8, RC = key*8
|
|
+ | ldr TAB:CARG2, [BASE, RB]
|
|
+ | ldr CARG3, [BASE, RC]
|
|
+ | ldrb INS, TAB:CARG2->marked
|
|
+ | ldr CARG1, TAB:CARG2->array
|
|
+ | ldr CARG4, TAB:CARG2->asize
|
|
+ | tst INS, #LJ_GC_BLACK // isblack(table)
|
|
+ | add CARG1, CARG1, CARG3, lsl #3
|
|
+ | bne >7
|
|
+ |2:
|
|
+ | cmp CARG3, CARG4 // In array part?
|
|
+ | bhs ->vmeta_tsetr
|
|
+ |->BC_TSETR_Z:
|
|
+ | ldrd_i CARG3, CARG4, BASE, RA
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd CARG3, CARG4, [CARG1]
|
|
+ | ins_next3
|
|
+ |
|
|
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
|
|
+ | barrierback TAB:CARG2, INS, RB
|
|
+ | b <2
|
|
+ break;
|
|
+
|
|
+ case BC_TSETM:
|
|
+ | // RA = base*8 (table at base-1), RC = num_const (start index)
|
|
+ | add RA, BASE, RA
|
|
+ |1:
|
|
+ | ldr RB, SAVE_MULTRES
|
|
+ | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table.
|
|
+ | ldr CARG1, [KBASE, RC, lsl #3] // Integer constant is in lo-word.
|
|
+ | subs RB, RB, #8
|
|
+ | ldr CARG4, TAB:CARG2->asize
|
|
+ | beq >4 // Nothing to copy?
|
|
+ | add CARG3, CARG1, RB, lsr #3
|
|
+ | cmp CARG3, CARG4
|
|
+ | ldr CARG4, TAB:CARG2->array
|
|
+ | add RB, RA, RB
|
|
+ | bhi >5
|
|
+ | add INS, CARG4, CARG1, lsl #3
|
|
+ | ldrb CARG1, TAB:CARG2->marked
|
|
+ |3: // Copy result slots to table.
|
|
+ | ldrd CARG3, CARG4, [RA], #8
|
|
+ | strd CARG3, CARG4, [INS], #8
|
|
+ | cmp RA, RB
|
|
+ | blo <3
|
|
+ | tst CARG1, #LJ_GC_BLACK // isblack(table)
|
|
+ | bne >7
|
|
+ |4:
|
|
+ | ins_next
|
|
+ |
|
|
+ |5: // Need to resize array part.
|
|
+ | str BASE, L->base
|
|
+ | mov CARG1, L
|
|
+ | str PC, SAVE_PC
|
|
+ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
|
|
+ | // Must not reallocate the stack.
|
|
+ | .IOS ldr BASE, L->base
|
|
+ | b <1
|
|
+ |
|
|
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
|
|
+ | barrierback TAB:CARG2, CARG1, CARG3
|
|
+ | b <4
|
|
+ break;
|
|
+
|
|
+ /* -- Calls and vararg handling ----------------------------------------- */
|
|
+
|
|
+ case BC_CALLM:
|
|
+ | // RA = base*8, (RB = nresults+1,) RC = extra_nargs
|
|
+ | ldr CARG1, SAVE_MULTRES
|
|
+ | decode_RC8 NARGS8:RC, INS
|
|
+ | add NARGS8:RC, NARGS8:RC, CARG1
|
|
+ | b ->BC_CALL_Z
|
|
+ break;
|
|
+ case BC_CALL:
|
|
+ | decode_RC8 NARGS8:RC, INS
|
|
+ | // RA = base*8, (RB = nresults+1,) RC = (nargs+1)*8
|
|
+ |->BC_CALL_Z:
|
|
+ | mov RB, BASE // Save old BASE for vmeta_call.
|
|
+ | ldrd_iw CARG3, CARG4, BASE, RA
|
|
+ | sub NARGS8:RC, NARGS8:RC, #8
|
|
+ | add BASE, BASE, #8
|
|
+ | checkfunc CARG4, ->vmeta_call
|
|
+ | ins_call
|
|
+ break;
|
|
+
|
|
+ case BC_CALLMT:
|
|
+ | // RA = base*8, (RB = 0,) RC = extra_nargs
|
|
+ | ldr CARG1, SAVE_MULTRES
|
|
+ | add NARGS8:RC, CARG1, RC, lsl #3
|
|
+ | b ->BC_CALLT1_Z
|
|
+ break;
|
|
+ case BC_CALLT:
|
|
+ | lsl NARGS8:RC, RC, #3
|
|
+ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
|
|
+ |->BC_CALLT1_Z:
|
|
+ | ldrd_iw LFUNC:CARG3, CARG4, RA, BASE
|
|
+ | sub NARGS8:RC, NARGS8:RC, #8
|
|
+ | add RA, RA, #8
|
|
+ | checkfunc CARG4, ->vmeta_callt
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ |->BC_CALLT2_Z:
|
|
+ | mov RB, #0
|
|
+ | ldrb CARG4, LFUNC:CARG3->ffid
|
|
+ | tst PC, #FRAME_TYPE
|
|
+ | bne >7
|
|
+ |1:
|
|
+ | str LFUNC:CARG3, [BASE, FRAME_FUNC] // Copy function down, but keep PC.
|
|
+ | cmp NARGS8:RC, #0
|
|
+ | beq >3
|
|
+ |2:
|
|
+ | ldrd_i CARG1, CARG2, RA, RB
|
|
+ | add INS, RB, #8
|
|
+ | cmp INS, NARGS8:RC
|
|
+ | strd_i CARG1, CARG2, BASE, RB
|
|
+ | mov RB, INS
|
|
+ | bne <2
|
|
+ |3:
|
|
+ | cmp CARG4, #1 // (> FF_C) Calling a fast function?
|
|
+ | bhi >5
|
|
+ |4:
|
|
+ | ins_callt
|
|
+ |
|
|
+ |5: // Tailcall to a fast function with a Lua frame below.
|
|
+ | ldr INS, [PC, #-4]
|
|
+ | decode_RA8 RA, INS
|
|
+ | sub CARG1, BASE, RA
|
|
+ | ldr LFUNC:CARG1, [CARG1, #-16]
|
|
+ | ldr CARG1, LFUNC:CARG1->field_pc
|
|
+ | ldr KBASE, [CARG1, #PC2PROTO(k)]
|
|
+ | b <4
|
|
+ |
|
|
+ |7: // Tailcall from a vararg function.
|
|
+ | eor PC, PC, #FRAME_VARG
|
|
+ | tst PC, #FRAME_TYPEP // Vararg frame below?
|
|
+ | it ne
|
|
+ | movne CARG4, #0 // Clear ffid if no Lua function below.
|
|
+ | bne <1
|
|
+ | sub BASE, BASE, PC
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | tst PC, #FRAME_TYPE
|
|
+ | it ne
|
|
+ | movne CARG4, #0 // Clear ffid if no Lua function below.
|
|
+ | b <1
|
|
+ break;
|
|
+
|
|
+ case BC_ITERC:
|
|
+ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
|
|
+ | add RA, BASE, RA
|
|
+ | mov RB, BASE // Save old BASE for vmeta_call.
|
|
+ | ldrd CARG3, CARG4, [RA, #-16]
|
|
+ | ldrd CARG1, CARG2, [RA, #-8]
|
|
+ | add BASE, RA, #8
|
|
+ | strd CARG3, CARG4, [RA, #8] // Copy state.
|
|
+ | strd CARG1, CARG2, [RA, #16] // Copy control var.
|
|
+ | // STALL: locked CARG3, CARG4.
|
|
+ | ldrd LFUNC:CARG3, CARG4, [RA, #-24]
|
|
+ | mov NARGS8:RC, #16 // Iterators get 2 arguments.
|
|
+ | // STALL: load CARG3, CARG4.
|
|
+ | strd LFUNC:CARG3, CARG4, [RA] // Copy callable.
|
|
+ | checkfunc CARG4, ->vmeta_call
|
|
+ | ins_call
|
|
+ break;
|
|
+
|
|
+ case BC_ITERN:
|
|
+ |.if JIT
|
|
+ | hotloop
|
|
+ |.endif
|
|
+ |->vm_IITERN:
|
|
+ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
|
|
+ | add RA, BASE, RA
|
|
+ | ldr TAB:RB, [RA, #-16]
|
|
+ | ldr CARG1, [RA, #-8] // Get index from control var.
|
|
+ | ldr INS, TAB:RB->asize
|
|
+ | ldr CARG2, TAB:RB->array
|
|
+ | add PC, PC, #4
|
|
+ |1: // Traverse array part.
|
|
+ | subs RC, CARG1, INS
|
|
+ | add CARG3, CARG2, CARG1, lsl #3
|
|
+ | bhs >5 // Index points after array part?
|
|
+ | ldrd CARG3, CARG4, [CARG3]
|
|
+ | checktp CARG4, LJ_TNIL
|
|
+ | it eq
|
|
+ | addeq CARG1, CARG1, #1 // Skip holes in array part.
|
|
+ | beq <1
|
|
+ | ldrh RC, [PC, #-2]
|
|
+ | mvn CARG2, #~LJ_TISNUM
|
|
+ | strd CARG3, CARG4, [RA, #8]
|
|
+ | add RC, PC, RC, lsl #2
|
|
+ | add RB, CARG1, #1
|
|
+ | strd CARG1, CARG2, [RA]
|
|
+ | sub PC, RC, #0x20000
|
|
+ | str RB, [RA, #-8] // Update control var.
|
|
+ |3:
|
|
+ | ins_next
|
|
+ |
|
|
+ |5: // Traverse hash part.
|
|
+ | ldr CARG4, TAB:RB->hmask
|
|
+ | ldr NODE:RB, TAB:RB->node
|
|
+ |6:
|
|
+ | add CARG1, RC, RC, lsl #1
|
|
+ | cmp RC, CARG4 // End of iteration? Branch to ITERL+1.
|
|
+ | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
|
|
+ | bhi <3
|
|
+ | ldrd CARG1, CARG2, NODE:CARG3->val
|
|
+ | checktp CARG2, LJ_TNIL
|
|
+ | add RC, RC, #1
|
|
+ | beq <6 // Skip holes in hash part.
|
|
+ | ldrh RB, [PC, #-2]
|
|
+ | add RC, RC, INS
|
|
+ | ldrd CARG3, CARG4, NODE:CARG3->key
|
|
+ | str RC, [RA, #-8] // Update control var.
|
|
+ | strd CARG1, CARG2, [RA, #8]
|
|
+ | add RC, PC, RB, lsl #2
|
|
+ | sub PC, RC, #0x20000
|
|
+ | strd CARG3, CARG4, [RA]
|
|
+ | b <3
|
|
+ break;
|
|
+
|
|
+ case BC_ISNEXT:
|
|
+ | // RA = base*8, RC = target (points to ITERN)
|
|
+ | add RA, BASE, RA
|
|
+ | add RC, PC, RC, lsl #2
|
|
+ | ldrd CFUNC:CARG1, CFUNC:CARG2, [RA, #-24]
|
|
+ | ldr CARG3, [RA, #-12]
|
|
+ | ldr CARG4, [RA, #-4]
|
|
+ | checktp CARG2, LJ_TFUNC
|
|
+ | it eq
|
|
+ | ldrbeq CARG1, CFUNC:CARG1->ffid
|
|
+ | checktpeq CARG3, LJ_TTAB
|
|
+ | checktpeq CARG4, LJ_TNIL
|
|
+ | it eq
|
|
+ | cmpeq CARG1, #FF_next_N
|
|
+ | it eq
|
|
+ | subeq PC, RC, #0x20000
|
|
+ | bne >5
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | mov CARG1, #0
|
|
+ | mvn CARG2, #~LJ_KEYINDEX
|
|
+ | strd CARG1, CARG2, [RA, #-8] // Initialize control var.
|
|
+ |1:
|
|
+ | ins_next3
|
|
+ |5: // Despecialize bytecode if any of the checks fail.
|
|
+ | mov CARG1, #BC_JMP
|
|
+ | mov OP, #BC_ITERC
|
|
+ | strb CARG1, [PC, #-4]
|
|
+ | sub PC, RC, #0x20000
|
|
+ |.if JIT
|
|
+ | ldrb CARG1, [PC]
|
|
+ | cmp CARG1, #BC_ITERN
|
|
+ | bne >6
|
|
+ |.endif
|
|
+ | strb OP, [PC] // Subsumes ins_next1.
|
|
+ | ins_next2
|
|
+ | b <1
|
|
+ |.if JIT
|
|
+ |6: // Unpatch JLOOP.
|
|
+ | sub CARG2, DISPATCH, #-DISPATCH_J(trace)
|
|
+ | ldr CARG1, [CARG2]
|
|
+ | ldrh CARG2, [PC, #2]
|
|
+ | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
|
|
+ | // Subsumes ins_next1 and ins_next2.
|
|
+ | ldr INS, TRACE:CARG1->startins
|
|
+ | .long 0xf36c0e07 //BFI INS, OP, #0, #8
|
|
+ | str INS, [PC], #4
|
|
+ | b <1
|
|
+ |.endif
|
|
+ break;
|
|
+
|
|
+ case BC_VARG:
|
|
+ | decode_RB8 RB, INS
|
|
+ | decode_RC8 RC, INS
|
|
+ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
|
|
+ | ldr CARG1, [BASE, FRAME_PC]
|
|
+ | add RC, BASE, RC
|
|
+ | add RA, BASE, RA
|
|
+ | add RC, RC, #FRAME_VARG
|
|
+ | add CARG4, RA, RB
|
|
+ | sub CARG3, BASE, #8 // CARG3 = vtop
|
|
+ | sub RC, RC, CARG1 // RC = vbase
|
|
+ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
|
|
+ | cmp RB, #0
|
|
+ | sub CARG1, CARG3, RC
|
|
+ | beq >5 // Copy all varargs?
|
|
+ | sub CARG4, CARG4, #16
|
|
+ |1: // Copy vararg slots to destination slots.
|
|
+ | cmp RC, CARG3
|
|
+ | ite lo
|
|
+ | ldrdlo CARG1, CARG2, [RC], #8
|
|
+ | mvnhs CARG2, #~LJ_TNIL
|
|
+ | cmp RA, CARG4
|
|
+ | strd CARG1, CARG2, [RA], #8
|
|
+ | blo <1
|
|
+ |2:
|
|
+ | ins_next
|
|
+ |
|
|
+ |5: // Copy all varargs.
|
|
+ | ldr CARG4, L->maxstack
|
|
+ | cmp CARG1, #0
|
|
+ | ite le
|
|
+ | movle RB, #8 // MULTRES = (0+1)*8
|
|
+ | addgt RB, CARG1, #8
|
|
+ | add CARG2, RA, CARG1
|
|
+ | str RB, SAVE_MULTRES
|
|
+ | ble <2
|
|
+ | cmp CARG2, CARG4
|
|
+ | bhi >7
|
|
+ |6:
|
|
+ | ldrd CARG1, CARG2, [RC], #8
|
|
+ | strd CARG1, CARG2, [RA], #8
|
|
+ | cmp RC, CARG3
|
|
+ | blo <6
|
|
+ | b <2
|
|
+ |
|
|
+ |7: // Grow stack for varargs.
|
|
+ | lsr CARG2, CARG1, #3
|
|
+ | str RA, L->top
|
|
+ | mov CARG1, L
|
|
+ | str BASE, L->base
|
|
+ | sub RC, RC, BASE // Need delta, because BASE may change.
|
|
+ | str PC, SAVE_PC
|
|
+ | sub RA, RA, BASE
|
|
+ | bl extern lj_state_growstack // (lua_State *L, int n)
|
|
+ | ldr BASE, L->base
|
|
+ | add RA, BASE, RA
|
|
+ | add RC, BASE, RC
|
|
+ | sub CARG3, BASE, #8
|
|
+ | b <6
|
|
+ break;
|
|
+
|
|
+ /* -- Returns ----------------------------------------------------------- */
|
|
+
|
|
+ case BC_RETM:
|
|
+ | // RA = results*8, RC = extra results
|
|
+ | ldr CARG1, SAVE_MULTRES
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | add RA, BASE, RA
|
|
+ | add RC, CARG1, RC, lsl #3
|
|
+ | b ->BC_RETM_Z
|
|
+ break;
|
|
+
|
|
+ case BC_RET:
|
|
+ | // RA = results*8, RC = nresults+1
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | lsl RC, RC, #3
|
|
+ | add RA, BASE, RA
|
|
+ |->BC_RETM_Z:
|
|
+ | str RC, SAVE_MULTRES
|
|
+ |1:
|
|
+ | ands CARG1, PC, #FRAME_TYPE
|
|
+ | eor CARG2, PC, #FRAME_VARG
|
|
+ | bne ->BC_RETV2_Z
|
|
+ |
|
|
+ |->BC_RET_Z:
|
|
+ | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
|
|
+ | ldr INS, [PC, #-4]
|
|
+ | subs CARG4, RC, #8
|
|
+ | sub CARG3, BASE, #8
|
|
+ | beq >3
|
|
+ |2:
|
|
+ | ldrd CARG1, CARG2, [RA], #8
|
|
+ | add BASE, BASE, #8
|
|
+ | subs CARG4, CARG4, #8
|
|
+ | strd CARG1, CARG2, [BASE, #-16]
|
|
+ | bne <2
|
|
+ |3:
|
|
+ | decode_RA8 RA, INS
|
|
+ | sub CARG4, CARG3, RA
|
|
+ | decode_RB8 RB, INS
|
|
+ | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
|
|
+ |5:
|
|
+ | cmp RB, RC // More results expected?
|
|
+ | bhi >6
|
|
+ | mov BASE, CARG4
|
|
+ | ldr CARG2, LFUNC:CARG1->field_pc
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
|
|
+ | ins_next3
|
|
+ |
|
|
+ |6: // Fill up results with nil.
|
|
+ | mvn CARG2, #~LJ_TNIL
|
|
+ | add BASE, BASE, #8
|
|
+ | add RC, RC, #8
|
|
+ | str CARG2, [BASE, #-12]
|
|
+ | b <5
|
|
+ |
|
|
+ |->BC_RETV1_Z: // Non-standard return case.
|
|
+ | add RA, BASE, RA
|
|
+ |->BC_RETV2_Z:
|
|
+ | tst CARG2, #FRAME_TYPEP
|
|
+ | bne ->vm_return
|
|
+ | // Return from vararg function: relocate BASE down.
|
|
+ | sub BASE, BASE, CARG2
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | b <1
|
|
+ break;
|
|
+
|
|
+ case BC_RET0: case BC_RET1:
|
|
+ | // RA = results*8, RC = nresults+1
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | lsl RC, RC, #3
|
|
+ | str RC, SAVE_MULTRES
|
|
+ | ands CARG1, PC, #FRAME_TYPE
|
|
+ | eor CARG2, PC, #FRAME_VARG
|
|
+ | it eq
|
|
+ | ldreq INS, [PC, #-4]
|
|
+ | bne ->BC_RETV1_Z
|
|
+ if (op == BC_RET1) {
|
|
+ | ldrd_i CARG1, CARG2, BASE, RA
|
|
+ }
|
|
+ | sub CARG4, BASE, #8
|
|
+ | decode_RA8 RA, INS
|
|
+ if (op == BC_RET1) {
|
|
+ | strd CARG1, CARG2, [CARG4]
|
|
+ }
|
|
+ | sub BASE, CARG4, RA
|
|
+ | decode_RB8 RB, INS
|
|
+ | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
|
|
+ |5:
|
|
+ | cmp RB, RC
|
|
+ | bhi >6
|
|
+ | ldr CARG2, LFUNC:CARG1->field_pc
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
|
|
+ | ins_next3
|
|
+ |
|
|
+ |6: // Fill up results with nil.
|
|
+ | sub CARG2, CARG4, #4
|
|
+ | mvn CARG3, #~LJ_TNIL
|
|
+ | str CARG3, [CARG2, RC]
|
|
+ | add RC, RC, #8
|
|
+ | b <5
|
|
+ break;
|
|
+
|
|
+ /* -- Loops and branches ------------------------------------------------ */
|
|
+
|
|
+ |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
|
|
+ |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
|
|
+ |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
|
|
+ |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
|
|
+
|
|
+ case BC_FORL:
|
|
+ |.if JIT
|
|
+ | hotloop
|
|
+ |.endif
|
|
+ | // Fall through. Assumes BC_IFORL follows.
|
|
+ break;
|
|
+
|
|
+ case BC_JFORI:
|
|
+ case BC_JFORL:
|
|
+#if !LJ_HASJIT
|
|
+ break;
|
|
+#endif
|
|
+ case BC_FORI:
|
|
+ case BC_IFORL:
|
|
+ | // RA = base*8, RC = target (after end of loop or start of loop)
|
|
+ vk = (op == BC_IFORL || op == BC_JFORL);
|
|
+ | ldrd_iw CARG1, CARG2, RA, BASE
|
|
+ if (op != BC_JFORL) {
|
|
+ | add RC, PC, RC, lsl #2
|
|
+ }
|
|
+ if (!vk) {
|
|
+ | ldrd CARG3, CARG4, FOR_STOP
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | ldr RB, FOR_TSTEP
|
|
+ | bne >5
|
|
+ | checktp CARG4, LJ_TISNUM
|
|
+ | ldr CARG4, FOR_STEP
|
|
+ | checktpeq RB, LJ_TISNUM
|
|
+ | bne ->vmeta_for
|
|
+ | cmp CARG4, #0
|
|
+ | blt >4
|
|
+ | cmp CARG1, CARG3
|
|
+ } else {
|
|
+ | ldrd CARG3, CARG4, FOR_STEP
|
|
+ | checktp CARG2, LJ_TISNUM
|
|
+ | bne >5
|
|
+ | adds CARG1, CARG1, CARG3
|
|
+ | ldr CARG4, FOR_STOP
|
|
+ if (op == BC_IFORL) {
|
|
+ | it vs
|
|
+ | addvs RC, PC, #0x20000 // Overflow: prevent branch.
|
|
+ } else {
|
|
+ | bvs >2 // Overflow: do not enter mcode.
|
|
+ }
|
|
+ | cmp CARG3, #0
|
|
+ | blt >4
|
|
+ | cmp CARG1, CARG4
|
|
+ }
|
|
+ |1:
|
|
+ if (op == BC_FORI) {
|
|
+ | it gt
|
|
+ | subgt PC, RC, #0x20000
|
|
+ } else if (op == BC_JFORI) {
|
|
+ | sub PC, RC, #0x20000
|
|
+ | it le
|
|
+ | ldrhle RC, [PC, #-2]
|
|
+ } else if (op == BC_IFORL) {
|
|
+ | it le
|
|
+ | suble PC, RC, #0x20000
|
|
+ }
|
|
+ if (vk) {
|
|
+ | strd CARG1, CARG2, FOR_IDX
|
|
+ }
|
|
+ |2:
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | strd CARG1, CARG2, FOR_EXT
|
|
+ if (op == BC_JFORI || op == BC_JFORL) {
|
|
+ | ble =>BC_JLOOP
|
|
+ }
|
|
+ |3:
|
|
+ | ins_next3
|
|
+ |
|
|
+ |4: // Invert check for negative step.
|
|
+ if (!vk) {
|
|
+ | cmp CARG3, CARG1
|
|
+ } else {
|
|
+ | cmp CARG4, CARG1
|
|
+ }
|
|
+ | b <1
|
|
+ |
|
|
+ |5: // FP loop.
|
|
+ if (!vk) {
|
|
+ | itt lo
|
|
+ | cmnlo CARG4, #-LJ_TISNUM
|
|
+ | cmnlo RB, #-LJ_TISNUM
|
|
+ | bhs ->vmeta_for
|
|
+ |.if FPU
|
|
+ | vldr d0, FOR_IDX
|
|
+ | vldr d1, FOR_STOP
|
|
+ | cmp RB, #0
|
|
+ | vstr d0, FOR_EXT
|
|
+ |.else
|
|
+ | cmp RB, #0
|
|
+ | strd CARG1, CARG2, FOR_EXT
|
|
+ | blt >8
|
|
+ |.endif
|
|
+ } else {
|
|
+ |.if FPU
|
|
+ | vldr d0, FOR_IDX
|
|
+ | vldr d2, FOR_STEP
|
|
+ | vldr d1, FOR_STOP
|
|
+ | cmp CARG4, #0
|
|
+ | vadd.f64 d0, d0, d2
|
|
+ |.else
|
|
+ | cmp CARG4, #0
|
|
+ | blt >8
|
|
+ | bl extern __aeabi_dadd
|
|
+ | strd CARG1, CARG2, FOR_IDX
|
|
+ | ldrd CARG3, CARG4, FOR_STOP
|
|
+ | strd CARG1, CARG2, FOR_EXT
|
|
+ |.endif
|
|
+ }
|
|
+ |6:
|
|
+ |.if FPU
|
|
+ | ite ge
|
|
+ | vcmpge.f64 d0, d1
|
|
+ | vcmplt.f64 d1, d0
|
|
+ | vmrs
|
|
+ |.else
|
|
+ | bl extern __aeabi_cdcmple
|
|
+ |.endif
|
|
+ if (vk) {
|
|
+ |.if FPU
|
|
+ | vstr d0, FOR_IDX
|
|
+ | vstr d0, FOR_EXT
|
|
+ |.endif
|
|
+ }
|
|
+ if (op == BC_FORI) {
|
|
+ | it hi
|
|
+ | subhi PC, RC, #0x20000
|
|
+ } else if (op == BC_JFORI) {
|
|
+ | sub PC, RC, #0x20000
|
|
+ | it ls
|
|
+ | ldrhls RC, [PC, #-2]
|
|
+ | bls =>BC_JLOOP
|
|
+ } else if (op == BC_IFORL) {
|
|
+ | it ls
|
|
+ | subls PC, RC, #0x20000
|
|
+ } else {
|
|
+ | bls =>BC_JLOOP
|
|
+ }
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ | b <3
|
|
+ |
|
|
+ |.if not FPU
|
|
+ |8: // Invert check for negative step.
|
|
+ if (vk) {
|
|
+ | bl extern __aeabi_dadd
|
|
+ | strd CARG1, CARG2, FOR_IDX
|
|
+ | strd CARG1, CARG2, FOR_EXT
|
|
+ }
|
|
+ | mov CARG3, CARG1
|
|
+ | mov CARG4, CARG2
|
|
+ | ldrd CARG1, CARG2, FOR_STOP
|
|
+ | b <6
|
|
+ |.endif
|
|
+ break;
|
|
+
|
|
+ case BC_ITERL:
|
|
+ |.if JIT
|
|
+ | hotloop
|
|
+ |.endif
|
|
+ | // Fall through. Assumes BC_IITERL follows.
|
|
+ break;
|
|
+
|
|
+ case BC_JITERL:
|
|
+#if !LJ_HASJIT
|
|
+ break;
|
|
+#endif
|
|
+ case BC_IITERL:
|
|
+ | // RA = base*8, RC = target
|
|
+ | ldrd_iw CARG1, CARG2, RA, BASE
|
|
+ if (op == BC_JITERL) {
|
|
+ | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil.
|
|
+ | it ne
|
|
+ | strdne CARG1, CARG2, [RA, #-8]
|
|
+ | bne =>BC_JLOOP
|
|
+ } else {
|
|
+ | add RC, PC, RC, lsl #2
|
|
+ | // STALL: load CARG1, CARG2.
|
|
+ | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil.
|
|
+ | itt ne
|
|
+ | subne PC, RC, #0x20000 // Otherwise save control var + branch.
|
|
+ | strdne CARG1, CARG2, [RA, #-8]
|
|
+ }
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_LOOP:
|
|
+ | // RA = base*8, RC = target (loop extent)
|
|
+ | // Note: RA/RC is only used by trace recorder to determine scope/extent
|
|
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
|
|
+ |.if JIT
|
|
+ | hotloop
|
|
+ |.endif
|
|
+ | // Fall through. Assumes BC_ILOOP follows.
|
|
+ break;
|
|
+
|
|
+ case BC_ILOOP:
|
|
+ | // RA = base*8, RC = target (loop extent)
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_JLOOP:
|
|
+ |.if JIT
|
|
+ | // RA = base (ignored), RC = traceno
|
|
+ | sub RB, DISPATCH, #-DISPATCH_J(trace)
|
|
+ | ldr CARG1, [RB]
|
|
+ | mov CARG2, #0 // Traces on ARM don't store the trace number, so use 0.
|
|
+ | ldr TRACE:RC, [CARG1, RC, lsl #2]
|
|
+ | st_vmstate CARG2
|
|
+ | ldr RA, TRACE:RC->mcode
|
|
+ | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
|
|
+ | sub RB, DISPATCH, #-DISPATCH_GL(tmpbuf.L)
|
|
+ | str L, [RB]
|
|
+ | add RA, RA, #1
|
|
+ | bx RA
|
|
+ |.endif
|
|
+ break;
|
|
+
|
|
+ case BC_JMP:
|
|
+ | // RA = base*8 (only used by trace recorder), RC = target
|
|
+ | add RC, PC, RC, lsl #2
|
|
+ | sub PC, RC, #0x20000
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ /* -- Function headers -------------------------------------------------- */
|
|
+
|
|
+ case BC_FUNCF:
|
|
+ |.if JIT
|
|
+ | hotcall
|
|
+ |.endif
|
|
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
|
|
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
|
|
+ break;
|
|
+
|
|
+ case BC_JFUNCF:
|
|
+#if !LJ_HASJIT
|
|
+ break;
|
|
+#endif
|
|
+ case BC_IFUNCF:
|
|
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
|
|
+ | ldr CARG1, L->maxstack
|
|
+ | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)]
|
|
+ | ldr KBASE, [PC, #-4+PC2PROTO(k)]
|
|
+ | cmp RA, CARG1
|
|
+ | bhi ->vm_growstack_l
|
|
+ if (op != BC_JFUNCF) {
|
|
+ | ins_next1
|
|
+ | ins_next2
|
|
+ }
|
|
+ |2:
|
|
+ | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters.
|
|
+ | mvn CARG4, #~LJ_TNIL
|
|
+ | blo >3
|
|
+ if (op == BC_JFUNCF) {
|
|
+ | decode_RD RC, INS
|
|
+ | b =>BC_JLOOP
|
|
+ } else {
|
|
+ | ins_next3
|
|
+ }
|
|
+ |
|
|
+ |3: // Clear missing parameters.
|
|
+ | strd_i CARG3, CARG4, BASE, NARGS8:RC
|
|
+ | add NARGS8:RC, NARGS8:RC, #8
|
|
+ | b <2
|
|
+ break;
|
|
+
|
|
+ case BC_JFUNCV:
|
|
+#if !LJ_HASJIT
|
|
+ break;
|
|
+#endif
|
|
+ | NYI // NYI: compiled vararg functions
|
|
+ break; /* NYI: compiled vararg functions. */
|
|
+
|
|
+ case BC_IFUNCV:
|
|
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
|
|
+ | ldr CARG1, L->maxstack
|
|
+ | add CARG4, BASE, RC
|
|
+ | add RA, RA, RC
|
|
+ | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC.
|
|
+ | add CARG2, RC, #8+FRAME_VARG
|
|
+ | ldr KBASE, [PC, #-4+PC2PROTO(k)]
|
|
+ | cmp RA, CARG1
|
|
+ | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG.
|
|
+ | bhs ->vm_growstack_l
|
|
+ | ldrb RB, [PC, #-4+PC2PROTO(numparams)]
|
|
+ | mov RA, BASE
|
|
+ | mov RC, CARG4
|
|
+ | cmp RB, #0
|
|
+ | add BASE, CARG4, #8
|
|
+ | beq >3
|
|
+ | mvn CARG3, #~LJ_TNIL
|
|
+ |1:
|
|
+ | cmp RA, RC // Less args than parameters?
|
|
+ | ite lo
|
|
+ | ldrdlo CARG1, CARG2, [RA], #8
|
|
+ | movhs CARG2, CARG3
|
|
+ | it lo
|
|
+ | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC).
|
|
+ |2:
|
|
+ | subs RB, RB, #1
|
|
+ | strd CARG1, CARG2, [CARG4, #8]!
|
|
+ | bne <1
|
|
+ |3:
|
|
+ | ins_next
|
|
+ break;
|
|
+
|
|
+ case BC_FUNCC:
|
|
+ case BC_FUNCCW:
|
|
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
|
|
+ if (op == BC_FUNCC) {
|
|
+ | ldr CARG4, CFUNC:CARG3->f
|
|
+ } else {
|
|
+ | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)]
|
|
+ }
|
|
+ | add CARG2, RA, NARGS8:RC
|
|
+ | ldr CARG1, L->maxstack
|
|
+ | add RC, BASE, NARGS8:RC
|
|
+ | str BASE, L->base
|
|
+ | cmp CARG2, CARG1
|
|
+ | str RC, L->top
|
|
+ if (op == BC_FUNCCW) {
|
|
+ | ldr CARG2, CFUNC:CARG3->f
|
|
+ }
|
|
+ | mv_vmstate CARG3, C
|
|
+ | mov CARG1, L
|
|
+ | bhi ->vm_growstack_c // Need to grow stack.
|
|
+ | st_vmstate CARG3
|
|
+ | blx CARG4 // (lua_State *L [, lua_CFunction f])
|
|
+ | // Returns nresults.
|
|
+ | ldr BASE, L->base
|
|
+ | mv_vmstate CARG3, INTERP
|
|
+ | ldr CRET2, L->top
|
|
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
|
|
+ | lsl RC, CRET1, #3
|
|
+ | st_vmstate CARG3
|
|
+ | ldr PC, [BASE, FRAME_PC]
|
|
+ | sub RA, CRET2, RC // RA = L->top - nresults*8
|
|
+ | b ->vm_returnc
|
|
+ break;
|
|
+
|
|
+ /* ---------------------------------------------------------------------- */
|
|
+
|
|
+ default:
|
|
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
|
|
+ exit(2);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int build_backend(BuildCtx *ctx)
|
|
+{
|
|
+ int op;
|
|
+
|
|
+ dasm_growpc(Dst, BC__MAX);
|
|
+
|
|
+ build_subroutines(ctx);
|
|
+
|
|
+ |.code_op
|
|
+ for (op = 0; op < BC__MAX; op++)
|
|
+ build_ins(ctx, (BCOp)op, op);
|
|
+
|
|
+ return BC__MAX;
|
|
+}
|
|
+
|
|
+/* Emit pseudo frame-info for all assembler functions. */
|
|
+static void emit_asm_debug(BuildCtx *ctx)
|
|
+{
|
|
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
|
|
+ int i;
|
|
+ switch (ctx->mode) {
|
|
+ case BUILD_elfasm:
|
|
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
|
|
+ fprintf(ctx->fp,
|
|
+ ".Lframe0:\n"
|
|
+ "\t.long .LECIE0-.LSCIE0\n"
|
|
+ ".LSCIE0:\n"
|
|
+ "\t.long 0xffffffff\n"
|
|
+ "\t.byte 0x1\n"
|
|
+ "\t.string \"\"\n"
|
|
+ "\t.uleb128 0x1\n"
|
|
+ "\t.sleb128 -4\n"
|
|
+ "\t.byte 0xe\n" /* Return address is in lr. */
|
|
+ "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */
|
|
+ "\t.align 2\n"
|
|
+ ".LECIE0:\n\n");
|
|
+ fprintf(ctx->fp,
|
|
+ ".LSFDE0:\n"
|
|
+ "\t.long .LEFDE0-.LASFDE0\n"
|
|
+ ".LASFDE0:\n"
|
|
+ "\t.long .Lframe0\n"
|
|
+ "\t.long .Lbegin\n"
|
|
+ "\t.long %d\n"
|
|
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
|
|
+ "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */
|
|
+ fcofs, CFRAME_SIZE);
|
|
+ for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */
|
|
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i));
|
|
+#if LJ_ARCH_HASFPU
|
|
+ for (i = 15; i >= 8; i--) /* offset d8-d15 */
|
|
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n",
|
|
+ 64+2*i, 10+2*(15-i));
|
|
+ fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */
|
|
+#endif
|
|
+ fprintf(ctx->fp,
|
|
+ "\t.align 2\n"
|
|
+ ".LEFDE0:\n\n");
|
|
+#if LJ_HASFFI
|
|
+ fprintf(ctx->fp,
|
|
+ ".LSFDE1:\n"
|
|
+ "\t.long .LEFDE1-.LASFDE1\n"
|
|
+ ".LASFDE1:\n"
|
|
+ "\t.long .Lframe0\n"
|
|
+ "\t.long lj_vm_ffi_call\n"
|
|
+ "\t.long %d\n"
|
|
+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
|
|
+ "\t.byte 0x8e\n\t.uleb128 1\n" /* offset lr */
|
|
+ "\t.byte 0x8b\n\t.uleb128 2\n" /* offset r11 */
|
|
+ "\t.byte 0x85\n\t.uleb128 3\n" /* offset r5 */
|
|
+ "\t.byte 0x84\n\t.uleb128 4\n" /* offset r4 */
|
|
+ "\t.byte 0xd\n\t.uleb128 0xb\n" /* def_cfa_register r11 */
|
|
+ "\t.align 2\n"
|
|
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
|
|
+#endif
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|