diff --git a/Makefile b/Makefile index b0288b4d..f387077c 100644 --- a/Makefile +++ b/Makefile @@ -90,7 +90,7 @@ FILE_MAN= luajit.1 FILE_PC= luajit.pc FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ - dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ + dis_x86.lua dis_x64.lua dis_arm.lua dis_armv7m.lua dis_arm64.lua \ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ dis_mips64.lua dis_mips64el.lua vmdef.lua diff --git a/dynasm/dasm_armv7m.h b/dynasm/dasm_armv7m.h new file mode 100644 index 00000000..8f94ba40 --- /dev/null +++ b/dynasm/dasm_armv7m.h @@ -0,0 +1,563 @@ +/* +** DynASM ARM encoding engine. +** Copyright (C) 2018 Jernej Turnsek. All rights reserved. +** Copyright (C) 2005-2017 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include +#include +#include +#include + +#define DASM_ARCH "armv7m" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, + DASM_SECTION, + DASM_ESC, + DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, + DASM_REL_LG, + DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, + DASM_LABEL_PC, + DASM_IMM, + DASM_IMM12, + DASM_IMM16, + DASM_IMML8, + DASM_IMML12, + DASM_IMMV8, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + int i; + unsigned int m = n; + + if (m <= 255) { + /* i:imm3 = 0000 */ + return ((((m) & 0xff) << 16) | (((m) & 0x700) << 20) | (((m) & 0x800) >> 1)); + } + else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) { + /* i:imm3 = 0001 */ + return ((((0x100 | (m & 0xff)) & 0xff) << 16) | (((0x100 | (m & 0xff)) & 0x700) << 20) | (((0x100 | (m & 0xff)) & 0x800) >> 1)); + } + else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) { + /* i:imm3 = 0010 */ + return ((((0x200 | (m >> 8 & 0xff)) & 0xff) << 16) | (((0x200 | (m >> 8 & 0xff)) & 0x700) << 20) | (((0x200 | (m >> 8 & 0xff)) & 0x800) >> 1)); + } + else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) { + /* i:imm3 = 0011 */ + return ((((0x300 | (m & 0xff)) & 0xff) << 16) | (((0x300 | (m & 0xff)) & 0x700) << 20) | (((0x300 | (m & 0xff)) & 0x800) >> 1)); + } + else { + for (i = 0; i < 4096; i += 128, m = ((m << 1) | (m >> (-(unsigned int)(1)&(8*sizeof(m) - 1))))) { + if (m <= 255) { + if ((m & 0x80) && (i >= 128 * 8)) + return ((((i | (m & 0x7f)) & 0xff) << 16) | (((i | (m & 0x7f)) & 0x700) << 20) | (((i | (m & 0x7f)) & 0x800) >> 1)); + else + continue; + } + } + } + if (n < 4096) { + return -2; /* Used for additional encoding of add/sub TODO: better solution! */ + } + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, + int, + sec->buf, + sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize / sizeof(int) - DASM_MAXSECPOS + DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } + else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n >= 10 || *pl < 0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); +putrel: + n = *pl; + if (n < 0) { + /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } + else { +linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); +putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: + case DASM_IMM16: +#ifdef DASM_CHECKS + CK((n & ((1 << ((ins >> 10) & 31)) - 1)) == 0, RANGE_I); + if ((ins & 0x8000)) + CK(((n + (1 << (((ins >> 5) & 31) - 1))) >> ((ins >> 5) & 31)) == 0, RANGE_I); + else + CK((n >> ((ins >> 5) & 31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMMV8: + CK((n & 3) == 0, RANGE_I); + n >>= 2; + /* fallthrough */ + case DASM_IMML8: + case DASM_IMML12: + CK(n >= 0 ? ((n >> ((ins >> 5) & 31)) == 0) : + (((-n) >> ((ins >> 5) & 31)) == 0), + RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC | pc; + } +#endif + + { + /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM12: case DASM_IMM16: + case DASM_IMML8: case DASM_IMML12: case DASM_IMMV8: pos++; break; + } + } +stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: + case DASM_SECTION: + goto stop; + case DASM_ESC: + //*cp++ = *p++; //jturnsek: do I need to swap this also? + *cp++ = ((*p >> 16) & 0x0000ffff) | ((*p << 16) & 0xffff0000); /* jturnsek: swap of half-words!!! */ + p++; + break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), !(ins & 2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x8000f3af; /* jturnsek: NOP.W */ + break; + case DASM_REL_LG: + CK(n >= 0, UNDEF_LG); + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base); +patchrel: + if ((ins & 0x800) == 0) { + /* jturnsek: B or BL */ + if (cp[-1] & 0x10000000) { + /* BL */ + CK((n & 1) == 0 && ((n + 0x01000000) >> 25) == 0, RANGE_REL); + cp[-1] |= ((((n & 0x1000000) >> 24) & 0x1) << 10) | + (((~((n & 0x800000) >> 23) & 0x1) ^ (((n & 0x1000000) >> 24) & 0x1)) << 29) | + (((~((n & 0x400000) >> 22) & 0x1) ^ (((n & 0x1000000) >> 24) & 0x1)) << 27) | + ((n >> 12) & 0x3ff) | + (((n >> 1) & 0x7ff) << 16); + } + else { + /* B (T3) */ + CK((n & 1) == 0 && ((n + 0x00100000) >> 21) == 0, RANGE_REL); + cp[-1] |= ((((n & 0x100000) >> 20) & 0x1) << 10) | + ((((n & 0x80000) >> 19) & 0x1) << 27) | + ((((n & 0x40000) >> 18) & 0x1) << 29) | + ((n >> 12) & 0x3f) | + (((n >> 1) & 0x7ff) << 16); + } + } + else if ((ins & 0x1000)) { + CK((n & 3) == 0 && -256 <= n && n <= 256, RANGE_REL); + goto patchimml8; + } + else if ((ins & 0x2000) == 0) { + CK((n & 3) == 0 && -4096 <= n && n <= 4096, RANGE_REL); + goto patchimml; + } + else { + CK((n & 3) == 0 && -1020 <= n && n <= 1020, RANGE_REL); + n >>= 2; + goto patchimmv; + } + break; + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins - 10] = (void *)(base + n); + break; + case DASM_LABEL_PC: + break; + case DASM_IMM: + if (((ins >> 5) & 31) == 2) { + /* 2 bit shift for load/store lsl */ + cp[-1] |= ((n & 0x3) << 20); + } + else { + /* 5 bit shift */ + cp[-1] |= ((n & 0x3) << 22) | ((n & 0x1c) << 26); + } + //cp[-1] |= ((n >> ((ins >> 10) & 31)) & ((1 << ((ins >> 5) & 31)) - 1)) << (ins & 31); + break; + case DASM_IMM12: + if (dasm_imm12((unsigned int)n) == -2) { + cp[-1] ^= 0x00000300; + cp[-1] &= ~0x00000010; + cp[-1] |= ((((n) & 0xff) << 16) | (((n) & 0x700) << 20) | (((n) & 0x800) >> 1)); + } + else { + cp[-1] |= dasm_imm12((unsigned int)n); + } + break; + case DASM_IMM16: + cp[-1] |= ((n & 0xf000) >> 12) | + ((n & 0x0800) >> 1) | + ((n & 0x0700) << 20) | + ((n & 0x00ff) << 16); + break; + case DASM_IMML8: +patchimml8: + cp[-1] |= n >= 0 ? (0x02000000 | ((n & 0xff) << 16)) : ((-n & 0xff) << 16); + break; + case DASM_IMML12: +patchimml: + cp[-1] |= n >= 0 ? (0x00000080 | ((n & 0xfff) << 16)) : ((-n & 0xfff) << 16); + if (((cp[-1] & 0x0000000f) != 0x0000000f) && (n < 0)) { + CK(-255 <= n && n < 0, RANGE_I); + cp[-1] &= ~0x03000000; + cp[-1] |= 0x0c000000; + } + break; + case DASM_IMMV8: +patchimmv: + cp[-1] |= n >= 0 ? (0x00000080 | ((n & 0xff) << 16)) : ((-n & 0xff) << 16); + break; + default: + *cp++ = ((ins >> 16) & 0x0000ffff) | ((ins << 16) & 0xffff0000); /* jturnsek: swap of half-words!!! */ + break; + } + } +stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG | i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC | (D->section - D->sections); + return D->status; +} +#endif + diff --git a/dynasm/dasm_armv7m.lua b/dynasm/dasm_armv7m.lua new file mode 100644 index 00000000..8e877d26 --- /dev/null +++ b/dynasm/dasm_armv7m.lua @@ -0,0 +1,1010 @@ +------------------------------------------------------------------------------ +-- DynASM ARMV7M module. +-- +-- Copyright (C) 2018 Jernej Turnsek. All rights reserved. +-- Copyright (C) 2005-2017 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "armv7m", + description = "DynASM ARMV7M module", + version = "1.4.0", + vernum = 10400, + release = "2018-12-07", + author = "Jernej Turnsek", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex = bit.ror, bit.tohex +local bxor = bit.bxor + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "IMM", "IMM12", "IMM16", "IMML8", "IMML12", "IMMV8", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { sp = "r13", lr = "r14", pc = "r15", } + +-- Int. register name -> ext. name. +local map_reg_rev = { r13 = "sp", r14 = "lr", r15 = "pc", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, ror = 3, } + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +-- Template strings for ARM instructions. +-- jturnsek: dasm_encode will do the swap of half-words!!! +local map_op = { + and_3 = "ea000000DNPs", + eor_3 = "ea800000DNPs", + sub_3 = "eba00000DNPs", + rsb_3 = "ebc00000DNPs", + add_3 = "eb000000DNPs", + sbc_3 = "eb600000DNPs", + tst_2 = "ea100f00NP", + cmp_2 = "ebb00f00NP", + cmn_2 = "eb100f00NP", + orr_3 = "ea400000DNPs", + mov_2 = "ea4f0000DPs", + bic_3 = "ea200000DNPs", + mvn_2 = "ea6f0000DPs", + + and_4 = "ea000000DNMps", + eor_4 = "ea800000DNMps", + sub_4 = "eba00000DNMps", + rsb_4 = "ebc00000DNMps", + add_4 = "eb000000DNMps", + sbc_4 = "eb600000DNMps", + tst_3 = "ea100f00NMp", + cmp_3 = "ebb00f00NMp", + cmn_3 = "eb100f00NMp", + orr_4 = "ea400000DNMps", + mov_3 = "ea4f0000DMps", + bic_4 = "ea200000DNMps", + mvn_3 = "ea6f0000DMps", + + lsl_3 = "ea400000DNws", + lsr_3 = "ea400010DNws", + asr_3 = "ea400020DNws", + ror_3 = "ea400030DNws", + + smull_4 = "fb800000SDNM", + + clz_2 = "fab0f080Da", -- a is used for Consistent(M) + rbit_2 = "fa90f0a0Da", -- a is used for Consistent(M) + + str_2 = "f8400000SL", str_3 = "f8400000SL", str_4 = "f8400000SL", + strb_2 = "f8000000SL", strb_3 = "f8000000SL", strb_4 = "f8000000SL", + ldr_2 = "f8500000SL", ldr_3 = "f8500000SL", ldr_4 = "f8500000SL", + ldrb_2 = "f8100000SL", ldrb_3 = "f8100000SL", ldrb_4 = "f8100000SL", + strh_2 = "f8200000SL", strh_3 = "f8200000SL", + ldrh_2 = "f8300000SL", ldrh_3 = "f8300000SL", + ldrd_3 = "e8500000SDL", ldrd_4 = "e8500000SDL", + strd_3 = "e8400000SDL", strd_4 = "e8400000SDL", + + ldm_2 = "e8900000oR", + pop_1 = "e8bd0000R", + push_1 = "e92d0000R", + + b_1 = "f0009000B", + bl_1 = "f000d000B", + bx_1 = "bf004700C", + blx_1 = "bf004780C", + + nop_0 = "f3af8000", + bkpt_1 = "bf00be00K", + + ["vadd.f64_3"] = "ee300b00Gdnm", + ["vsub.f64_3"] = "ee300b40Gdnm", + ["vmul.f64_3"] = "ee200b00Gdnm", + ["vdiv.f64_3"] = "ee800b00Gdnm", + ["vcmp.f64_2"] = "eeb40b40Gdm", + ["vcvt.f64.s32_2"] = "eeb80bc0GdFm", + ["vsqrt.f64_2"] = "eeb10bc0Gdm", + + vldr_2 = "ed100a00dl|ed100b00Gdl", + vstr_2 = "ed000a00dl|ed000b00Gdl", + vldm_2 = "ec900a00or", + vpop_1 = "ecbd0a00r", + vstmdb_2 = "ed000a00or", + vpush_1 = "ed2d0a00r", + + ["vmov.f64_2"] = "eeb00b40Gdm|eeb00b00GdY", + vmov_2 = "ee100a10Sn|ee000a10nS", + vmov_3 = "ec500a10SNm|ec400a10mSN|ec500b10GSNm|ec400b10GmSN", + vmrs_0 = "eef1fa10", + + it_1 = "bf00bf08c", + ite_1 = "bf00bf04c", + itt_1 = "bf00bf04c", + ittt_1 = "bf00bf02c", + itttt_1 = "bf00bf01c", + iteee_1 = "bf00bf01c", +} + +-- Add mnemonics for "s" variants. +do + local t = {} + for k,v in pairs(map_op) do + if sub(v, -1) == "s" then + local v2 = sub(v, 1, 2)..char(byte(v, 3)+1)..sub(v, 4, -2) + t[sub(k, 1, -3).."s"..sub(k, -2)] = v2 + end + end + for k,v in pairs(t) do + map_op[k] = v + end +end + +------------------------------------------------------------------------------ + +local function parse_gpr(expr) + local tname, ovreg = match(expr, "^([%w_]+):(r1?[0-9])$") + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local r = match(expr, "^r(1?[0-9])$") + if r then + r = tonumber(r) + if r <= 15 then return r, tp end + end + werror("bad register name `"..expr.."'") +end + +local function parse_gpr_pm(expr) + local pm, expr2 = match(expr, "^([+-]?)(.*)$") + return parse_gpr(expr2), (pm == "-") +end + +local function parse_vr(expr, tp) + local t, r = match(expr, "^([sd])([0-9]+)$") + if t == tp then + r = tonumber(r) + if r <= 31 then + if t == "s" then return shr(r, 1), band(r, 1) end + return band(r, 15), shr(r, 4) + end + end + werror("bad register name `"..expr.."'") +end + +local function parse_reglist(reglist) + reglist = match(reglist, "^{%s*([^}]*)}$") + if not reglist then werror("register list expected") end + local rr = 0 + for p in gmatch(reglist..",", "%s*([^,]*),") do + local rbit = shl(1, parse_gpr(gsub(p, "%s+$", ""))) + if band(rr, rbit) ~= 0 then + werror("duplicate register `"..p.."'") + end + rr = rr + rbit + end + return rr +end + +local function parse_vrlist(reglist) + local ta, ra, tb, rb = match(reglist, + "^{%s*([sd])([0-9]+)%s*%-%s*([sd])([0-9]+)%s*}$") + ra, rb = tonumber(ra), tonumber(rb) + if ta and ta == tb and ra and rb and ra <= 31 and rb <= 31 and ra <= rb then + local nr = rb + 1 - ra + if ta == "s" then + return shl(shr(ra, 1), 12) + shl(band(ra, 1), 22) + nr + else + return shl(band(ra, 15), 12) + shl(shr(ra, 4), 22) + nr * 2 + 0x100 + end + end + werror("register list expected") +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + local n = tonumber(imm) + if n then + if n <= 255 then + return band(n, 0xff) + elseif band(n, 0xff00ff00) == 0 and band(shr(n, 16), 0xff) == band(n, 0xff) then + return band(n, 0xff) + shl(1, 12) + elseif band(n, 0x00ff00ff) == 0 and band(shr(n, 16), 0xff00) == band(n, 0xff00) then + return band(shr(n, 8), 0xff) + shl(2, 12) + elseif band(shr(n, 24), 0xff) == band(n, 0xff) and + band(shr(n, 16), 0xff) == band(n, 0xff) and + band(shr(n, 8), 0xff) == band(n, 0xff) then + return band(n, 0xff) + shl(3, 12) + else + for i=31, 8, -1 do + n = ror(n, 1) + if n >= 128 and n <= 255 then + return shl(band(i, 0x10), 22) + shl(band(i, 0x0e), 11) + shl(band(i, 0x01), 7) + band(n, 0x7f) + end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm16(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = tonumber(imm) + if n then + if shr(n, 16) == 0 then + return band(n, 0x00ff) + shl(band(n, 0x0700), 4) + shl(band(n, 0x0800), 15) + shl(band(n, 0xf000), 4) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM16", 32*16, imm) + return 0 + end +end + +local function parse_imm_load(imm, ext, flags) + local n = tonumber(imm) + local p, w = match(flags, "P"), match(flags, "W") + if n then + if ext then + if n >= -1020 and n <= 1020 then + local up = 0x00800000 + if n < 0 then n = -n; up = 0 end + return n/4 + up + (p and 0x01000000 or 0) + (w and 0x00200000 or 0) + end + else + if w then + if n >= -255 and n <= 255 then + if n >= 0 then + return n + 0x00000a00 + (p and 0x00000400 or 0) + (w and 0x00000100 or 0) + else + return -n + 0x00000800 + (p and 0x00000400 or 0) + (w and 0x00000100 or 0) + end + end + else + if n >= 0 and n <= 4095 then + return n + 0x00800000 + elseif n >= -255 and n < 0 then + return -n + 0x00000800 + (p and 0x00000400 or 0) + end + end + end + werror("out of range immediate `"..imm.."'") + else + waction(ext and "IMMV8" or "IMML12", 32768 + (ext and 32*8 or 32*12), imm) + local pw = 0 + if p then pw = (ext and 0x01000000 or 0) end + if w then pw = (ext and 0x00200000 or 0) end + return pw + end +end + +local function parse_shift(shift) + if shift == "rrx" then + return 3 * 16 + else + local s, s2 = match(shift, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + if sub(s2, 1, 1) == "#" then + local imm = parse_imm(s2, 5, 0, 0, false) + return shl(band(imm, 0x1c), 10) + shl(band(imm, 0x03), 6) + shl(s, 4) + else + werror("expected immediate shift operand") + end + end +end + +local function parse_shift_load(shift) + if not match(shift, "lsl", 1) then + werror("expected lsl shift operand") + else + local s, s2 = match(shift, "^(%S+)%s*(.*)$") + if not s then werror("expected shift operand") end + if sub(s2, 1, 1) == "#" then + return parse_imm(s2, 2, 4, 0, false) + else + werror("expected immediate shift operand") + end + end +end + +local function parse_label(label, def) + local prefix = sub(label, 1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, sub(label, 3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[sub(label, 3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + end + werror("bad label `"..label.."'") +end + +local function parse_load(params, nparams, n, op) + local ext = (band(op, 0x10000000) == 0) + local pn = params[n] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + local p2 = params[n+1] + if not p1 then + if not p2 then + if match(pn, "^[<>=%-]") or match(pn, "^extern%s+") then + local mode, n, s = parse_label(pn, false) + waction("REL_"..mode, n + (ext and 0x2800 or 0x0800), s, 1) + return op + 15 * 65536 + (ext and 0x01000000 or 0) --set P if ext==true + end + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction(ext and "IMMV8" or "IMML12", 32768 + (ext and 32*8 or 32*12), + format(tp.ctypefmt, tailr)) + return op + shl(d, 16) + (ext and 0x01000000 or 0) --set P if ext==true, using imm12 if ext==false + end + end + end + werror("expected address operand") + end + if p2 then + if wb == "!" then werror("bad use of '!'") end + local p3 = params[n+2] + op = op + shl(parse_gpr(p1), 16) + local imm = match(p2, "^#(.*)$") + if imm then + if p3 then werror("too many parameters") end + op = op + parse_imm_load(imm, ext, "W") --always imm8, set W + else + if ext then werror("not in ARMV7M") end + op = op + parse_gpr(p2) + if p3 then op = op + parse_shift_load(p3) end + end + else + local p1a, p2 = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + shl(parse_gpr(p1a), 16) + if p2 ~= "" then + local imm = match(p2, "^,%s*#(.*)$") + if imm then + op = op + parse_imm_load(imm, ext, (wb == "!" and "PW" or "P")) --set P (and W) + else + local p2a, p3 = match(p2, "^,%s*([^,%s]*)%s*,?%s*(.*)$") + if ext then werror("not in ARMV7M") end + op = op + parse_gpr(p2a) + if p3 ~= "" then + op = op + parse_shift_load(p3) + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + (ext and 0x01000000 or 0) + 0x00800000 --no imm, thus using imm12 if ext==false, set U + end + end + return op +end + +local function parse_vload(q) + local reg, imm = match(q, "^%[%s*([^,%s]*)%s*(.*)%]$") + if reg then + local d = shl(parse_gpr(reg), 16) + if imm == "" then return d end + imm = match(imm, "^,%s*#(.*)$") + if imm then + local n = tonumber(imm) + if n then + if n >= -1020 and n <= 1020 and n%4 == 0 then + return d + (n >= 0 and n/4+0x00800000 or -n/4) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMMV8", 32768 + 32*8, imm) + return d + end + end + else + if match(q, "^[<>=%-]") or match(q, "^extern%s+") then + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n + 0x2800, s, 1) + return 15 * 65536 + end + local reg, tailr = match(q, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local d, tp = parse_gpr(reg) + if tp then + waction("IMMV8", 32768 + 32*8, format(tp.ctypefmt, tailr)) + return shl(d, 16) + end + end + end + werror("expected address operand") +end + +local function parse_it(name, cond) + local mask, it = 0, match(name, "it", 1) + if not it then + werror("not IT instruction") + end + local it2 = sub(name, 3, -1) + if not it2 then + return shl(map_cond[cond], 4) + end + local shift = 3 + for p in gmatch(it2, "[te]") do + if p == "t" then + mask = mask + shl(band(map_cond[cond], 1), shift) + elseif p == "e" then + mask = mask + shl(band(bxor(map_cond[cond], 15), 1), shift) + else + werror("wrong syntax") + end + if shift ~= 0 then shift = shift - 1 end + end + return shl(map_cond[cond], 4) + mask +end + + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + local op = tonumber(sub(template, 1, 8), 16) + local n = 1 + local vr = "s" + + -- Process each character. + for p in gmatch(sub(template, 9), ".") do + local q = params[n] + if p == "D" then + op = op + shl(parse_gpr(q), 8); n = n + 1 + elseif p == "N" then + op = op + shl(parse_gpr(q), 16); n = n + 1 + elseif p == "S" then + op = op + shl(parse_gpr(q), 12); n = n + 1 + elseif p == "M" then + op = op + parse_gpr(q); n = n + 1 + elseif p == "a" then + local m = parse_gpr(q) + op = op + m + shl(m, 16); n = n + 1 + elseif p == "d" then + local r,h = parse_vr(q, vr); op = op + shl(r, 12) + shl(h, 22); n = n + 1 + elseif p == "n" then + local r,h = parse_vr(q, vr); op = op + shl(r, 16) + shl(h, 7); n = n + 1 + elseif p == "m" then + local r,h = parse_vr(q, vr); op = op + r + shl(h, 5); n = n + 1 + elseif p == "P" then + local imm = match(q, "^#(.*)$") + if imm then + op = op + 0x6000000 + parse_imm12(imm) + else + op = op + parse_gpr(q) + end + n = n + 1 + elseif p == "p" then + op = op + parse_shift(q); n = n + 1 + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "l" then + op = op + parse_vload(q) + elseif p == "B" then + local mode, n, s = parse_label(q, false) + waction("REL_"..mode, n, s, 1) + elseif p == "C" then -- blx gpr only + if match(q, "^([%w_]+):(r1?[0-9])$") or match(q, "^r(1?[0-9])$") then + local r = parse_gpr(q) + op = op + shl(r, 3) + else + werror("not supported") + end + elseif p == "c" then + op = op + parse_it(params.op, q) + elseif p == "F" then + vr = "s" + elseif p == "G" then + vr = "d" + elseif p == "o" then + local r, wb = match(q, "^([^!]*)(!?)$") + op = op + shl(parse_gpr(r), 16) + (wb == "!" and 0x00200000 or 0) + n = n + 1 + elseif p == "R" then + if params[1] == "{r15}" and params.op == "pop" then + op = 0xf85dfb04; -- pop {pc} coded as T3 + elseif params[1] == "{r12}" and params.op == "pop" then + op = 0xf85dcb04; -- pop {r12} coded as T3 + elseif params[1] == "{r12}" and params.op == "push" then + op = 0xf84dcd04; -- push {r12} coded as T3 + else + op = op + parse_reglist(q) + end + n = n + 1 + elseif p == "r" then + op = op + parse_vrlist(q); n = n + 1 + elseif p == "w" then + local imm = match(q, "^#(.*)$") + if imm then + local imm5 = parse_imm(q, 5, 0, 0, false) + local m = band(op, 0x000f0000) + op = op - m + 0x000f0000 + shr(m, 16) + shl(band(imm5, 0x1c), 10) + shl(band(imm5, 0x03), 6); n = n + 1 + else + local type = band(op, 0x00000030) + op = op - 0xea400000 + 0xfa00f000 - type + shl(type, 17) + parse_gpr(q) + end + elseif p == "Y" then + local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 + if not imm or shr(imm, 8) ~= 0 then + werror("bad immediate operand") + end + op = op + shl(band(imm, 0xf0), 12) + band(imm, 0x0f) + elseif p == "K" then + local imm = tonumber(match(q, "^#(.*)$")); n = n + 1 + if not imm or shr(imm, 8) ~= 0 then + werror("bad immediate operand") + end + op = op + band(imm, 0x00ff) + elseif p == "s" then + -- Ignored. + else + assert(false) + end + end + wputpos(pos, op) +end + +map_op[".template__"] = function(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 3 positions. + if secpos+3 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams, pos) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + end + error(err, 0) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +map_op[".long_*"] = function(params) + if not params then return "imm..." end + for _,p in ipairs(params) do + local n = tonumber(p) + if not n then werror("bad immediate `"..p.."'") end + if n < 0 then n = n + 2^32 end + wputw(n) + if secpos+2 > maxsecpos then wflush() end + end +end + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +local map_cond_b = { + eq = "f0008000B", ne = "f0408000B", cs = "f0808000B", cc = "f0c08000B", mi = "f1008000B", pl = "f1408000B", vs = "f1808000B", vc = "f1c08000B", + hi = "f2008000B", ls = "f2408000B", ge = "f2808000B", lt = "f2c08000B", gt = "f3008000B", le = "f3408000B", al = "f3808000B", + hs = "f0808000B", lo = "f0c08000B", +} + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = function(t, k) + local v = map_coreop[k] + if v then return v end + local k1, cc, k2 = match(k, "^(.-)(..)([._].*)$") + local cv = map_cond[cc] + if cv then + local v = rawget(t, k1..k2) + if type(v) == "string" and k1 == "b" then + local scv = map_cond_b[cc] + return scv + elseif type(v) == "string" then + return v + end + end + end }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/src/Makefile b/src/Makefile old mode 100644 new mode 100755 index 30d64be2..83d592f3 --- a/src/Makefile +++ b/src/Makefile @@ -36,7 +36,8 @@ CC= $(DEFAULT_CC) # to slow down the C part by not omitting it. Debugging, tracebacks and # unwinding are not affected -- the assembler part has frame unwind # information and GCC emits it where needed (x64) or with -g (see CCDEBUG). -CCOPT= -O2 -fomit-frame-pointer +#CCOPT= -O2 -fomit-frame-pointer +CCOPT= -O2 -fomit-frame-pointer -D__ARM_ARCH_7M__ -DLUAJIT_NO_UNWIND -DLUAJIT_DISABLE_PROFILE # Use this if you want to generate a smaller binary (but it's slower): #CCOPT= -Os -fomit-frame-pointer # Note: it's no longer recommended to use -O3 with GCC 4.x. @@ -49,7 +50,7 @@ CCOPT= -O2 -fomit-frame-pointer # CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse CCOPT_x64= -CCOPT_arm= +CCOPT_arm= -mthumb -march=armv7e-m -mfloat-abi=hard -mfpu=fpv5-d16 CCOPT_arm64= CCOPT_ppc= CCOPT_mips= @@ -71,7 +72,7 @@ CCWARN= -Wall # as dynamic mode. # # Mixed mode creates a static + dynamic library and a statically linked luajit. -BUILDMODE= mixed +BUILDMODE= static # # Static mode creates a static library and a statically linked luajit. #BUILDMODE= static @@ -242,6 +243,9 @@ ifneq (,$(findstring LJ_TARGET_X86 ,$(TARGET_TESTARCH))) TARGET_LJARCH= x86 else ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __ARM_ARCH_7M__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__ARM_ARCH_7M__=1 + endif TARGET_LJARCH= arm else ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) @@ -443,6 +447,9 @@ ifeq (x64,$(TARGET_LJARCH)) endif else ifeq (arm,$(TARGET_LJARCH)) + ifneq (,$(findstring __ARM_ARCH_7M__ ,$(TARGET_TESTARCH))) + DASM_ARCH= armv7m + endif ifeq (iOS,$(TARGET_SYS)) DASM_AFLAGS+= -D IOS endif diff --git a/src/host/buildvm.c b/src/host/buildvm.c index 9ee47ada..ca0ee47e 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -60,7 +60,11 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); #if LJ_TARGET_X86ORX64 #include "../dynasm/dasm_x86.h" #elif LJ_TARGET_ARM +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) +#include "../dynasm/dasm_armv7m.h" +#else #include "../dynasm/dasm_arm.h" +#endif #elif LJ_TARGET_ARM64 #include "../dynasm/dasm_arm64.h" #elif LJ_TARGET_PPC diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 7baa011f..1fc72a9d 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -114,6 +114,20 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, emit_asm_words(ctx, p, n-4); ins = *(uint32_t *)(p+n-4); #if LJ_TARGET_ARM +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + if ((ins & 0xd000f800) == 0xd000f000) { + fprintf(ctx->fp, "\tbl %s\n", sym); + } + else if ((ins & 0xd000f800) == 0x9000f000) { + fprintf(ctx->fp, "\tb %s\n", sym); + } + else { + fprintf(stderr, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); + } +#else if ((ins & 0xff000000u) == 0xfa000000u) { fprintf(ctx->fp, "\tblx %s\n", sym); } else if ((ins & 0x0e000000u) == 0x0a000000u) { @@ -125,6 +139,7 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, ins, sym); exit(1); } +#endif #elif LJ_TARGET_ARM64 if ((ins >> 26) == 0x25u) { fprintf(ctx->fp, "\tbl %s\n", sym); @@ -193,6 +208,16 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc break; } #endif +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + fprintf(ctx->fp, + "\n\t.globl %s\n" + "\t.thumb_func\n" + "\t.hidden %s\n" + "\t.type %s, " ELFASM_PX "%s\n" + "\t.size %s, %d\n" + "%s:\n", + name,name,name,isfunc ? "function" : "object",name,size,name); +#else fprintf(ctx->fp, "\n\t.globl %s\n" "\t.hidden %s\n" @@ -200,6 +225,7 @@ static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc "\t.size %s, %d\n" "%s:\n", name, name, name, isfunc ? "function" : "object", name, size, name); +#endif break; case BUILD_coffasm: fprintf(ctx->fp, "\n\t.globl %s\n", name); @@ -242,8 +268,16 @@ void emit_asm(BuildCtx *ctx) int i, rel; fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + fprintf(ctx->fp, "\t.syntax unified\n"); + fprintf(ctx->fp, "\t.thumb\n"); +#endif fprintf(ctx->fp, "\t.text\n"); +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + emit_asm_align(ctx, 2); +#else emit_asm_align(ctx, 4); +#endif #if LJ_TARGET_PS3 emit_asm_label(ctx, ctx->beginsym, ctx->codesz, 0); diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 20e60493..4cadd673 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -38,10 +38,17 @@ #define MAX_SIZE_T (~(size_t)0) #define MALLOC_ALIGNMENT ((size_t)8U) +#if LJ_TARGET_NUTTX +#define DEFAULT_GRANULARITY ((size_t)32U * (size_t)1024U) +#define DEFAULT_TRIM_THRESHOLD ((size_t)512U * (size_t)1024U) +#define DEFAULT_MMAP_THRESHOLD ((size_t)32U * (size_t)1024U) +#define MAX_RELEASE_CHECK_RATE 255 +#else #define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U) #define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) #define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U) #define MAX_RELEASE_CHECK_RATE 255 +#endif /* ------------------- size_t and alignment properties -------------------- */ @@ -90,9 +97,36 @@ #include /* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ +#if LJ_TARGET_NUTTX +#include +#include + +static struct mm_heap_s *g_alloc_heap; + +static void init_mmap(void) +{ + static uint8_t buffer[CONFIG_LUAJIT_ALLOC_SIZE] + locate_data(CONFIG_LUAJIT_ALLOC_SECTION_NAME); + g_alloc_heap = mm_initialize("alloc", + (void *)buffer, + CONFIG_LUAJIT_ALLOC_SIZE); +} +#define INIT_MMAP() init_mmap() + +#define CALL_MMAP(prng, size) mm_malloc(g_alloc_heap, (size_t)size) +#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s) + +static int CALL_MUNMAP(void *ptr, size_t size) +{ + if (ptr == NULL) return -1; + mm_free(g_alloc_heap, ptr); + return 0; +} +#else #include #define LJ_ALLOC_MMAP 1 +#endif #if LJ_64 diff --git a/src/lj_arch.h b/src/lj_arch.h index 882c99cb..92fbae5e 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -39,6 +39,7 @@ #define LUAJIT_OS_OSX 3 #define LUAJIT_OS_BSD 4 #define LUAJIT_OS_POSIX 5 +#define LUAJIT_OS_NUTTX 6 /* Number mode. */ #define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ @@ -110,6 +111,8 @@ #define LJ_OS_NAME "BSD" #elif LUAJIT_OS == LUAJIT_OS_POSIX #define LJ_OS_NAME "POSIX" +#elif LUAJIT_OS == LUAJIT_OS_NUTTX +#define LJ_OS_NAME "NUTTX" #else #define LJ_OS_NAME "Other" #endif @@ -119,6 +122,7 @@ #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) #define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD) #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) +#define LJ_TARGET_NUTTX (LUAJIT_OS == LUAJIT_OS_NUTTX) #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX #if TARGET_OS_IPHONE @@ -214,8 +218,11 @@ #endif #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM - +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) +#define LJ_ARCH_NAME "armv7m" +#else #define LJ_ARCH_NAME "arm" +#endif #define LJ_ARCH_BITS 32 #define LJ_ARCH_ENDIAN LUAJIT_LE #if !defined(LJ_ARCH_HASFPU) && __SOFTFP__ @@ -227,8 +234,12 @@ #define LJ_ABI_EABI 1 #define LJ_TARGET_ARM 1 #define LJ_TARGET_EHRETREG 0 -#define LJ_TARGET_EHRAREG 14 +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) +/* No need to test jump address range, because we use veeners. */ +//#define LJ_TARGET_JUMPRANGE 24 /* +-2^24 = +-16MB */ +#else #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ +#endif #define LJ_TARGET_MASKSHIFT 0 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ @@ -236,7 +247,7 @@ #if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #define LJ_ARCH_VERSION 80 -#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ +#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ #define LJ_ARCH_VERSION 70 #elif __ARM_ARCH_6T2__ #define LJ_ARCH_VERSION 61 @@ -479,7 +490,7 @@ #if defined(__ARMEB__) #error "No support for big-endian ARM" #endif -#if __ARM_ARCH_6M__ || __ARM_ARCH_7M__ || __ARM_ARCH_7EM__ +#if __ARM_ARCH_6M__ /*|| __ARM_ARCH_7M__ || __ARM_ARCH_7EM__*/ #error "No support for Cortex-M CPUs" #endif #if !(__ARM_EABI__ || LJ_TARGET_IOS) diff --git a/src/lj_asm.c b/src/lj_asm.c index 6f5e0c45..429aa161 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -178,7 +178,11 @@ IRFLDEF(FLOFS) #if LJ_TARGET_X86ORX64 #include "lj_emit_x86.h" #elif LJ_TARGET_ARM +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) +#include "lj_emit_armv7m.h" +#else #include "lj_emit_arm.h" +#endif #elif LJ_TARGET_ARM64 #include "lj_emit_arm64.h" #elif LJ_TARGET_PPC @@ -1655,7 +1659,11 @@ static void asm_loop(ASMState *as) #if LJ_TARGET_X86ORX64 #include "lj_asm_x86.h" #elif LJ_TARGET_ARM +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) +#include "lj_asm_armv7m.h" +#else #include "lj_asm_arm.h" +#endif #elif LJ_TARGET_ARM64 #include "lj_asm_arm64.h" #elif LJ_TARGET_PPC diff --git a/src/lj_asm_armv7m.h b/src/lj_asm_armv7m.h new file mode 100644 index 00000000..1bdd4a8a --- /dev/null +++ b/src/lj_asm_armv7m.h @@ -0,0 +1,2520 @@ +/* +** ARM IR assembler (SSA IR -> machine code). +** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h +** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h +*/ + +/* -- Register allocator extensions --------------------------------------- */ + +/* Allocate a register with a hint. */ +static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; +} + +/* Allocate a scratch register pair. */ +static Reg ra_scratchpair(ASMState *as, RegSet allow) +{ + RegSet pick1 = as->freeset & allow; + RegSet pick2 = pick1 & (pick1 >> 1) & RSET_GPREVEN; + Reg r; + if (pick2) { + r = rset_picktop(pick2); + } + else { + RegSet pick = pick1 & (allow >> 1) & RSET_GPREVEN; + if (pick) { + r = rset_picktop(pick); + ra_restore(as, regcost_ref(as->cost[r + 1])); + } + else { + pick = pick1 & (allow << 1) & RSET_GPRODD; + if (pick) { + r = ra_restore(as, regcost_ref(as->cost[rset_picktop(pick) - 1])); + } + else { + r = ra_evict(as, allow & (allow >> 1) & RSET_GPREVEN); + ra_restore(as, regcost_ref(as->cost[r + 1])); + } + } + } + lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r); + ra_modified(as, r); + ra_modified(as, r + 1); + RA_DBGX((as, "scratchpair $r $r", r, r + 1)); + return r; +} + +#if !LJ_SOFTFP +/* Allocate two source registers for three-operand instructions. */ +static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + Reg left = irl->r, right = irr->r; + if (ra_hasreg(left)) { + ra_noweak(as, left); + if (ra_noreg(right)) + right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); + else + ra_noweak(as, right); + } + else if (ra_hasreg(right)) { + ra_noweak(as, right); + left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); + } + else if (ra_hashint(right)) { + right = ra_allocref(as, ir->op2, allow); + left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); + } + else { + left = ra_allocref(as, ir->op1, allow); + right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); + } + return left | (right << 8); +} +#endif + +/* -- Guard handling ------------------------------------------------------ */ + +/* Generate an exit stub group at the bottom of the reserved MCode memory. */ +static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) +{ + MCode *mxp = as->mcbot; + int i; + if (mxp + 8 * 4 + 4*EXITSTUBS_PER_GROUP >= as->mctop) + asm_mclimit(as); + /* lj_vm_exit_interp_veneer */ + *mxp++ = ARMI_LDR | ARMF_N(RID_PC) | ARMF_T(RID_PC); /* LDR.W PC, [PC, #-0] */ + *mxp++ = (MCode)lj_vm_exit_interp; + /* lj_vm_exit_handler_veneer */ + *mxp++ = ARMI_LDR | ARMF_N(RID_PC) | ARMF_T(RID_PC); /* LDR.W PC, [PC, #-0] */ + *mxp++ = (MCode)lj_vm_exit_handler; + /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ + *mxp++ = ARMI_STR | ARMI_LSX_U | ARMF_T(RID_LR) | ARMF_N(RID_SP); + *mxp = ARMI_BL | ARMC_BL((-4) << 1); /* lj_vm_exit_handler_veneer */ + mxp++; + *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ + *mxp++ = group*EXITSTUBS_PER_GROUP; + for (i = 0; i < EXITSTUBS_PER_GROUP; i++) + *mxp++ = ARMI_B_T4 | ARMC_BL((-5 - i) << 1); + lj_mcode_sync(as->mcbot, mxp); + lj_mcode_commitbot(as->J, mxp); + as->mcbot = mxp; + as->mclim = as->mcbot + MCLIM_REDZONE; + return mxp - EXITSTUBS_PER_GROUP; +} + +/* Setup all needed exit stubs. */ +static void asm_exitstub_setup(ASMState *as, ExitNo nexits) +{ + ExitNo i; + if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) + lj_trace_err(as->J, LJ_TRERR_SNAPOV); + for (i = 0; i < (nexits + EXITSTUBS_PER_GROUP - 1) / EXITSTUBS_PER_GROUP; i++) + if (as->J->exitstubgroup[i] == NULL) + as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); +} + +/* Emit conditional branch to exit for guard. */ +static void asm_guardcc(ASMState *as, ARMCC cc) +{ + MCode *target = exitstub_addr(as->J, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = ARMI_BL | ARMC_BL((target - p - 1) << 1); + emit_branch(as, ARMF_CC(ARMI_B, cc ^ 1), p); + return; + } + emit_branchlink(as, ARMI_BL, target); + ARMI_IT(cc); +} + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse the array base of colocated arrays. */ +static int32_t asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) + return (int32_t)sizeof(GCtab); + return 0; +} + +/* Fuse array/hash/upvalue reference into register+offset operand. */ +static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, int lim) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (ofs > -lim && ofs < lim) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } + } + } + else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (ofs < lim) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } + else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); + *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ + return ra_allock(as, (ofs & ~255), allow); + } + } else if (ir->o == IR_TMPREF) { + *ofsp = 0; + return RID_SP; + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); +} + +/* Fuse m operand into arithmetic/logic instructions. */ +static uint32_t asm_fuseopm(ASMState *as, ARMIns ai, IRRef ref, RegSet allow, uint32_t *rs) +{ + IRIns *ir = IR(ref); + if (ra_hasreg(ir->r)) { + ra_noweak(as, ir->r); + return ARMF_M(ir->r); + } + else if (irref_isk(ref)) { + uint32_t k = emit_isk12(ai, ir->i); + if (k) + return k; + } + else if (mayfuse(as, ref)) { + if (ir->o >= IR_BSHL && ir->o <= IR_BROR) { + Reg m = ra_alloc1(as, ir->op1, allow); + ARMShift sh = ir->o == IR_BSHL ? ARMSH_LSL : + ir->o == IR_BSHR ? ARMSH_LSR : + ir->o == IR_BSAR ? ARMSH_ASR : ARMSH_ROR; + if (irref_isk(ir->op2)) { + return ARMF_M(m) | ARMF_SH(sh, (IR(ir->op2)->i & 31)); + } + else { + Reg s = ra_alloc1(as, ir->op2, rset_exclude(allow, m)); + *rs = ARMF_RSH(sh, s); + return ARMF_M(m); + } + } + else if (ir->o == IR_ADD && ir->op1 == ir->op2) { + Reg m = ra_alloc1(as, ir->op1, allow); + return ARMF_M(m) | ARMF_SH(ARMSH_LSL, 1); + } + } + return ARMF_M(ra_allocref(as, ref, allow)); +} + +/* Fuse shifts into loads/stores. Only bother with BSHL 2 => lsl #2. */ +static IRRef asm_fuselsl2(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r) && mayfuse(as, ref) && ir->o == IR_BSHL && + irref_isk(ir->op2) && IR(ir->op2)->i == 2) + return ir->op1; + return 0; /* No fusion. */ +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, + ARMIns ai, + Reg rd, + IRRef ref, + RegSet allow, + int32_t ofs) +{ + IRIns *ir = IR(ref); + Reg base; + if (ra_noreg(ir->r) && canfuse(as, ir)) { + int32_t lim = (!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00)) ? 1024 : + (ai & 0x00000080) ? 4096 : 256; + if (ir->o == IR_ADD) { + int32_t ofs2; + if (irref_isk(ir->op2) && + (ofs2 = ofs + IR(ir->op2)->i) > -lim && ofs2 < lim && + (!(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00)) || !(ofs2 & 3))) { + ofs = ofs2; + ref = ir->op1; + } + else if (ofs == 0 && !(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00))) { + IRRef lref = ir->op1, rref = ir->op2; + Reg rn, rm; + IRRef sref = asm_fuselsl2(as, rref); + if (sref) { + rref = sref; + ai |= ARMF_LSL(2); + } + else if ((sref = asm_fuselsl2(as, lref)) != 0) { + lref = rref; + rref = sref; + ai |= ARMF_LSL(2); + } + rn = ra_alloc1(as, lref, allow); + rm = ra_alloc1(as, rref, rset_exclude(allow, rn)); + emit_tnm(as, ai, rd, rn, rm); + return; + } + } + else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && ((ai & 0x0000ff00) == 0x0000ed00))) { + lj_assertA(ofs == 0, "bad usage"); + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { + ofs += IR(ir->op2)->i; + ref = ir->op1; + } + else if (irref_isk(ir->op1)) { + ofs += IR(ir->op1)->i; + ref = ir->op2; + } + else { + /* NYI: Fuse ADD with constant. */ + Reg rn = ra_alloc1(as, ir->op1, allow); + uint32_t rs = 0; + uint32_t m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn), &rs); + if ((ai & 0x0000fe00) == 0x0000f800) + emit_lso(as, ai, rd, rd, ofs); + else + emit_lsox(as, ai, rd, rd, ofs); + emit_dn(as, ARMI_ADD ^ m, rd, rn); + if (rs) + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16)); + return; + } + if (ofs <= -lim || ofs >= lim) { + Reg rn = ra_alloc1(as, ref, allow); + Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); + emit_tnm(as, ai, rd, rn, rm); + return; + } + } + } + base = ra_alloc1(as, ref, allow); +#if !LJ_SOFTFP + if ((ai & 0x0000ff00) == 0x0000ed00) + emit_vlso(as, ai, rd, base, ofs); + else +#endif + if ((ai & 0x0000fe00) == 0x0000f800) + emit_lso(as, ai, rd, base, ofs); + else + emit_lsox(as, ai, rd, base, ofs); +} + +#if !LJ_SOFTFP +/* Fuse to multiply-add/sub instruction. */ +static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ai = air, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg right, left = ra_alloc2(as, + irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + right = (left >> 8); left &= 255; + emit_tnm(as, ai, (dest & 15), (left & 15), (right & 15)); + if (dest != add) emit_tm(as, ARMI_VMOV_D, (dest & 15), (add & 15)); + return 1; + } + return 0; +} +#endif + +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 0; +#if LJ_SOFTFP + Reg gpr = REGARG_FIRSTGPR; +#else + Reg gpr, fpr = REGARG_FIRSTFPR, fprodd = 0; +#endif + if ((void *)ci->func) + emit_call(as, (void *)ci->func); +#if !LJ_SOFTFP + for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) + as->cost[gpr] = REGCOST(~0u, ASMREF_L); + gpr = REGARG_FIRSTGPR; +#endif + for (n = 0; n < nargs; n++) { + /* Setup args. */ + IRRef ref = args[n]; + IRIns *ir = IR(ref); +#if !LJ_SOFTFP + if (ref && irt_isfp(ir->t)) { + RegSet of = as->freeset; + Reg src; + if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { + if (irt_isnum(ir->t)) { + if (fpr <= REGARG_LASTFPR) { + ra_leftov(as, fpr, ref); + fpr++; + continue; + } + } + else if (fprodd) { + /* Ick. */ + src = ra_alloc1(as, ref, RSET_FPR); + emit_tm(as, ARMI_VMOV_S, (fprodd & 15), (src & 15) | 0x00000040); + fprodd = 0; + continue; + } + else if (fpr <= REGARG_LASTFPR) { + ra_leftov(as, fpr, ref); + fprodd = fpr++; + continue; + } + /* Workaround to protect argument GPRs from being used for remat. */ + as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1); + src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ + as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1)); + fprodd = 0; + goto stackfp; + } + /* Workaround to protect argument GPRs from being used for remat. */ + as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1); + src = ra_alloc1(as, ref, RSET_FPR); /* May alloc GPR to remat FPR. */ + as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR + 1)); + if (irt_isnum(ir->t)) gpr = (gpr + 1) & ~1u; + if (gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + if (irt_isnum(ir->t)) { + lj_assertA(rset_test(as->freeset, gpr+1), + "reg %d not free", gpr+1); /* Ditto. */ + emit_tnm(as, ARMI_VMOV_RR_D, gpr, gpr + 1, (src & 15)); + gpr += 2; + } + else { + emit_tn(as, ARMI_VMOV_R_S, gpr, (src & 15)); + gpr++; + } + } + else { +stackfp: + if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; + emit_spstore(as, ir, src, ofs); + ofs += irt_isnum(ir->t) ? 8 : 4; + } + } + else +#endif + { + if (gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + if (ref) ra_leftov(as, gpr, ref); + gpr++; + } + else { + if (ref) { + Reg r = ra_alloc1(as, ref, RSET_GPR); + emit_spstore(as, ir, r, ofs); + } + ofs += 4; + } + } + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + RegSet drop = RSET_SCRATCH; + int hiop = ((ir + 1)->o == IR_HIOP && !irt_isnil((ir + 1)->t)); + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + if (hiop && ra_hasreg((ir + 1)->r)) + rset_clear(drop, (ir + 1)->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (!LJ_SOFTFP && irt_isfp(ir->t)) { + if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64 | CCI_VARARG))) { + Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); + if (irt_isnum(ir->t)) + emit_tnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, dest); + else + emit_tn(as, ARMI_VMOV_S_R, RID_RET, dest); + } + else { + ra_destreg(as, ir, RID_FPRET); + } + } + else if (hiop) { + ra_destpair(as, ir); + } + else { + ra_destreg(as, ir, RID_RET); + } + } + UNUSED(ci); +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX * 2]; + CCallInfo ci; + IRRef func; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { + /* Call to constant address. */ + ci.func = (ASMFunction)(void *)(irf->i); + } + else { + /* Need a non-argument register for indirect calls. */ + Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_R4, RID_R12 + 1)); + emit_m(as, ARMI_BLXr, freg); + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + +/* -- Returns ------------------------------------------------------------- */ + +/* Return to lower frame. Guard that it goes to the right spot. */ +static void asm_retf(ASMState *as, IRIns *ir) +{ + Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); + void *pc = ir_kptr(IR(ir->op2)); + int32_t delta = 1 + LJ_FR2 + bc_a(*((const BCIns *)pc - 1)); + as->topslot -= (BCReg)delta; + if ((int32_t)as->topslot < 0) as->topslot = 0; + irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ + /* Need to force a spill on REF_BASE now to update the stack slot. */ + emit_lso(as, ARMI_STR, base, RID_SP, ra_spill(as, IR(REF_BASE))); + emit_setgl(as, base, jit_base); + emit_addptr(as, base, -8*delta); + asm_guardcc(as, CC_NE); + emit_nm(as, + ARMI_CMP, + RID_TMP, + ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); + emit_lso(as, ARMI_LDR, RID_TMP, base, -4); +} + +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L); + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + //if ((as->flags & JIT_F_ARMV6T2)) { //jturnsek!!! + // emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp); + //} else { + emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp); + emit_dn(as, ARMC_K12(ARMI_AND, 255), tmp, tmp); + //} + emit_lso(as, ARMI_LDR, RID_TMP, + ra_allock(as, (addr & ~4095), + rset_exclude(rset_exclude(RSET_GPR, sb), tmp)), + (addr & 4095)); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + +/* -- Type conversions ---------------------------------------------------- */ + +#if !LJ_SOFTFP +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_guardcc(as, CC_NE); + emit_t(as, ARMI_VMRS, 0); + emit_tm(as, ARMI_VCMP_D, (tmp & 15), (left & 15)); + emit_tm(as, ARMI_VCVT_F64_S32, (tmp & 15), (tmp & 15)); + emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); + emit_tm(as, ARMI_VCVT_S32_F64, (tmp & 15), (left & 15)); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_FPR; + Reg left = ra_alloc1(as, ir->op1, allow); + Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); + Reg tmp = ra_scratch(as, rset_clear(allow, right)); + Reg dest = ra_dest(as, ir, RSET_GPR); + emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); + emit_tnm(as, ARMI_VADD_D, (tmp & 15), (left & 15), (right & 15)); +} +#endif + +static void asm_conv(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if !LJ_SOFTFP + int stfp = (st == IRT_NUM || st == IRT_FLOAT); +#endif + IRRef lref = ir->op1; + /* 64 bit integer conversions are handled by SPLIT. */ + lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); +#if LJ_SOFTFP + /* FP conversions are handled by SPLIT. */ + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ +#else + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { + /* FP to FP conversion. */ + emit_tm(as, + st == IRT_NUM ? ARMI_VCVT_F32_F64 : ARMI_VCVT_F64_F32, + (dest & 15), + (ra_alloc1(as, lref, RSET_FPR) & 15)); + } + else { + /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + ARMIns ai = irt_isfloat(ir->t) ? + (st == IRT_INT ? ARMI_VCVT_F32_S32 : ARMI_VCVT_F32_U32) : + (st == IRT_INT ? ARMI_VCVT_F64_S32 : ARMI_VCVT_F64_U32); + emit_tm(as, ai, (dest & 15), (dest & 15)); + emit_tn(as, ARMI_VMOV_S_R, left, (dest & 15)); + } + } + else if (stfp) { + /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } + else { + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR); + ARMIns ai; + emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); + ai = irt_isint(ir->t) ? + (st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) : + (st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32); + emit_tm(as, ai, (tmp & 15), (left & 15)); + } + } + else +#endif + { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (st >= IRT_I8 && st <= IRT_U16) { + /* Extend to 32 bit integer. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + if ((as->flags & JIT_F_ARMV6)) { + ARMIns ai = st == IRT_I8 ? ARMI_SXTB : + st == IRT_U8 ? ARMI_UXTB : + st == IRT_I16 ? ARMI_SXTH : ARMI_UXTH; + emit_dm(as, ai, dest, left); + } + else if (st == IRT_U8) { + emit_dn(as, ARMC_K12(ARMI_AND, 255), dest, left); + } + else { + uint32_t shift = st == IRT_I8 ? 24 : 16; + ARMShift sh = st == IRT_U16 ? ARMSH_LSR : ARMSH_ASR; + emit_dm(as, ARMI_MOV | ARMF_SH(sh, shift), dest, RID_TMP); + emit_dm(as, ARMI_MOV | ARMF_SH(ARMSH_LSL, shift), RID_TMP, left); + } + } + else { + /* Handle 32/32 bit no-op (cast). */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } + } +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + Reg rlo = 0, rhi = 0, tmp; + int destused = ra_used(ir); + int32_t ofs = 0; + ra_evictset(as, RSET_SCRATCH); +#if LJ_SOFTFP + if (destused) { + if (ra_hasspill(ir->s) && ra_hasspill((ir + 1)->s) && + (ir->s & 1) == 0 && ir->s + 1 == (ir + 1)->s) { + int i; + for (i = 0; i < 2; i++) { + Reg r = (ir + i)->r; + if (ra_hasreg(r)) { + ra_free(as, r); + ra_modified(as, r); + emit_spload(as, ir + i, r, sps_scale((ir + i)->s)); + } + } + ofs = sps_scale(ir->s); + destused = 0; + } + else { + rhi = ra_dest(as, ir + 1, RSET_GPR); + rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); + } + } + asm_guardcc(as, CC_EQ); + if (destused) { + emit_lso(as, ARMI_LDR, rhi, RID_SP, 4); + emit_lso(as, ARMI_LDR, rlo, RID_SP, 0); + } +#else + UNUSED(rhi); + if (destused) { + if (ra_hasspill(ir->s)) { + ofs = sps_scale(ir->s); + destused = 0; + if (ra_hasreg(ir->r)) { + ra_free(as, ir->r); + ra_modified(as, ir->r); + emit_spload(as, ir, ir->r, ofs); + } + } + else { + rlo = ra_dest(as, ir, RSET_FPR); + } + } + asm_guardcc(as, CC_EQ); + if (destused) + emit_vlso(as, ARMI_VLDR_D, rlo, RID_SP, 0); +#endif + emit_n(as, ARMC_K12(ARMI_CMP, 0), RID_RET); /* Test return status. */ + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + tmp = ra_releasetmp(as, ASMREF_TMP1); + if (ofs == 0) + emit_dm(as, ARMI_MOV, tmp, RID_SP); + else + emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); +} + +/* -- Memory references --------------------------------------------------- */ + +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) +{ + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if ((mode & IRTMPREF_OUT1)) { +#if LJ_SOFTFP + lj_assertA(irref_isk(ref), "unsplit FP op"); + emit_dm(as, ARMI_MOV, dest, RID_SP); + emit_lso(as, ARMI_STR, + ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), + RID_SP, 0); + emit_lso(as, ARMI_STR, + ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), + RID_SP, 4); +#else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_dm(as, ARMI_MOV, dest, RID_SP); + emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0); +#endif + } else if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i32ptr(ir_knum(ir)), dest); + } else { +#if LJ_SOFTFP + lj_assertA(0, "unsplit FP op"); +#else + /* Otherwise force a spill and use the spill slot. */ + emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); +#endif + } + } else { + /* Otherwise use [sp] and [sp+4] to hold the TValue. + ** This assumes the following call has max. 4 args. + */ + Reg type; + emit_dm(as, ARMI_MOV, dest, RID_SP); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_GPR); + emit_lso(as, ARMI_STR, src, RID_SP, 0); + } + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) + type = ra_alloc1(as, ref+1, RSET_GPR); + else + type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); + emit_lso(as, ARMI_STR, type, RID_SP, 4); + } + } + else { + emit_dm(as, ARMI_MOV, dest, RID_SP); + } +} + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx, base; + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + uint32_t k = emit_isk12(ARMI_ADD, ofs + 8*IR(ir->op2)->i); + if (k) { + base = ra_alloc1(as, refa, RSET_GPR); + emit_dn(as, ARMI_ADD ^ k, dest, base); + return; + } + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); + emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 3), dest, base, idx); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir, IROp merge) +{ + RegSet allow = RSET_GPR; + int destused = ra_used(ir); + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = 0, keyhi = 0, keynumhi = RID_NONE, tmp = RID_TMP; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + IRType1 kt = irkey->t; + int32_t k = 0, khi = emit_isk12(ARMI_CMP, irt_toitype(kt)); + uint32_t khash; + MCLabel l_end, l_loop; + rset_clear(allow, tab); + if (!irref_isk(refkey) || irt_isstr(kt)) { +#if LJ_SOFTFP + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + if (irkey[1].o == IR_HIOP) { + if (ra_hasreg((irkey + 1)->r)) { + keynumhi = (irkey + 1)->r; + keyhi = RID_TMP; + ra_noweak(as, keynumhi); + } + else { + keyhi = keynumhi = ra_allocref(as, refkey + 1, allow); + } + rset_clear(allow, keynumhi); + khi = 0; + } +#else + if (irt_isnum(kt)) { + key = ra_scratch(as, allow); + rset_clear(allow, key); + keyhi = keynumhi = ra_scratch(as, allow); + rset_clear(allow, keyhi); + khi = 0; + } + else { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } +#endif + } + else if (irt_isnum(kt)) { + int32_t val = (int32_t)ir_knum(irkey)->u32.lo; + k = emit_isk12(ARMI_CMP, val); + if (!k) { + key = ra_allock(as, val, allow); + rset_clear(allow, key); + } + val = (int32_t)ir_knum(irkey)->u32.hi; + khi = emit_isk12(ARMI_CMP, val); + if (!khi) { + keyhi = ra_allock(as, val, allow); + rset_clear(allow, keyhi); + } + } + else if (!irt_ispri(kt)) { + k = emit_isk12(ARMI_CMP, irkey->i); + if (!k) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } + } + if (!irt_ispri(kt)) + tmp = ra_scratchpair(as, allow); + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + as->invmcp = NULL; + if (merge == IR_NE) + asm_guardcc(as, CC_AL); + else if (destused) + emit_loada(as, dest, niltvg(J2G(as->J))); + + /* Follow hash chain until the end. */ + l_loop = --as->mcp; + emit_n(as, ARMC_K12(ARMI_CMP, 0), dest); + emit_lso(as, ARMI_LDR, dest, dest, (int32_t)offsetof(Node, next)); + + /* Type and value comparison. */ + if (merge == IR_EQ) + asm_guardcc(as, CC_EQ); + else { + emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); + } + if (!irt_ispri(kt)) { + emit_nm(as, ARMI_CMP ^ k, tmp, key); + ARMI_IT(CC_EQ); + emit_nm(as, ARMI_CMP ^ khi, tmp + 1, keyhi); + emit_lsox(as, ARMI_LDRD, tmp, dest, (int32_t)offsetof(Node, key)); + } + else { + emit_n(as, ARMI_CMP ^ khi, tmp); + emit_lso(as, ARMI_LDR, tmp, dest, (int32_t)offsetof(Node, key.it)); + } + *l_loop = ARMF_CC(ARMI_B, CC_NE) | ARMC_B((as->mcp - l_loop - 1) << 1); + + /* Load main position relative to tab->node into dest. */ + khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); + } + else { + emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); + emit_dnm(as, ARMI_ADD | ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); + if (irt_isstr(kt)) { + /* Fetch of str->sid is cheaper than ra_allock. */ + emit_dnm(as, ARMI_AND, tmp, tmp + 1, RID_TMP); + emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_lso(as, ARMI_LDR, tmp + 1, key, (int32_t)offsetof(GCstr, sid)); + emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); + } + else if (irref_isk(refkey)) { + emit_opk(as, + ARMI_AND, + tmp, + RID_TMP, + (int32_t)khash, + rset_exclude(rset_exclude(RSET_GPR, tab), dest)); + emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); + } + else { + /* Must match with hash*() in lj_tab.c. */ + if (ra_hasreg(keynumhi)) { + /* Canonicalize +-0.0 to 0.0. */ + if (keyhi == RID_TMP) { + emit_dm(as, ARMI_MOV, keyhi, keynumhi); + ARMI_IT(CC_NE); + } + emit_d(as, ARMC_K12(ARMI_MOV, 0), keyhi); + ARMI_IT(CC_EQ); + } + emit_dnm(as, ARMI_AND, tmp, tmp, RID_TMP); + emit_dnm(as, ARMI_SUB | ARMF_SH(ARMSH_ROR, 32 - HASH_ROT3), tmp, tmp, tmp + 1); + emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_dnm(as, + ARMI_EOR|ARMF_SH(ARMSH_ROR, 32 - ((HASH_ROT2 + HASH_ROT1) & 31)), + tmp, + tmp+1, + tmp); + emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); + emit_dnm(as, ARMI_SUB | ARMF_SH(ARMSH_ROR, 32 - HASH_ROT1), tmp + 1, tmp + 1, tmp); + if (ra_hasreg(keynumhi)) { + emit_dnm(as, ARMI_EOR, tmp + 1, tmp, key); + emit_dnm(as, ARMI_ORR | ARMI_S, RID_TMP, tmp, key); /* Test for +-0.0. */ + emit_dnm(as, ARMI_ADD, tmp, keynumhi, keynumhi); +#if !LJ_SOFTFP + emit_tnm(as, + ARMI_VMOV_RR_D, + key, + keynumhi, + (ra_alloc1(as, refkey, RSET_FPR) & 15)); +#endif + } + else { + emit_dnm(as, ARMI_EOR, tmp + 1, tmp, key); + emit_opk(as, + ARMI_ADD, + tmp, + key, + (int32_t)HASH_BIAS, + rset_exclude(rset_exclude(RSET_GPR, tab), key)); + } + } + } +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + Reg dest = (ra_used(ir) || ofs > 4095) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + Reg key = RID_NONE, type = RID_TMP, idx = node; + RegSet allow = rset_exclude(RSET_GPR, node); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (ofs > 4095) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } + else if (ra_hasreg(dest)) { + emit_opk(as, ARMI_ADD, dest, node, ofs, allow); + } + asm_guardcc(as, CC_NE); + if (!irt_ispri(irkey->t)) { + RegSet even = (as->freeset & allow); + even = even & (even >> 1) & RSET_GPREVEN; + if (even) { + key = ra_scratch(as, even); + if (rset_test(as->freeset, key + 1)) { + type = key + 1; + ra_modified(as, type); + } + } + else { + key = ra_scratch(as, allow); + } + rset_clear(allow, key); + } + rset_clear(allow, type); + if (irt_isnum(irkey->t)) { + emit_opk(as, + ARMI_CMP, + 0, + type, + (int32_t)ir_knum(irkey)->u32.hi, + allow); + ARMI_IT(CC_EQ); + emit_opk(as, + ARMI_CMP, + 0, + key, + (int32_t)ir_knum(irkey)->u32.lo, + allow); + } + else { + if (ra_hasreg(key)) { + emit_opk(as, ARMI_CMP, 0, key, irkey->i, allow); + ARMI_IT(CC_EQ); + } + emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype(irkey->t)), type); + } + emit_lso(as, ARMI_LDR, type, idx, kofs + 4); + if (ra_hasreg(key)) emit_lso(as, ARMI_LDR, key, idx, kofs); + if (ofs > 4095) + emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, ARMI_LDR, dest, v); + } + else { + Reg uv = ra_scratch(as, RSET_GPR); + Reg func = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->o == IR_UREFC) { + asm_guardcc(as, CC_NE); + emit_n(as, ARMC_K12(ARMI_CMP, 1), RID_TMP); + emit_opk(as, + ARMI_ADD, + dest, + uv, + (int32_t)offsetof(GCupval, tv), + RSET_GPR); + emit_lso(as, ARMI_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + } + else { + emit_lso(as, ARMI_LDR, dest, uv, (int32_t)offsetof(GCupval, v)); + } + emit_lso(as, + ARMI_LDR, + uv, + func, + (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); + lj_assertA(!ra_used(ir), "unfused FREF"); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + IRRef ref = ir->op2, refk = ir->op1; + Reg r; + if (irref_isk(ref)) { + IRRef tmp = refk; refk = ref; ref = tmp; + } + else if (!irref_isk(refk)) { + uint32_t k, m = ARMC_K12(0, sizeof(GCstr)); + Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); + IRIns *irr = IR(ir->op2); + if (ra_hasreg(irr->r)) { + ra_noweak(as, irr->r); + right = irr->r; + } + else if (mayfuse(as, irr->op2) && + irr->o == IR_ADD && irref_isk(irr->op2) && + (k = emit_isk12(ARMI_ADD, + (int32_t)sizeof(GCstr) + IR(irr->op2)->i))) { + m = k; + right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); + } + else { + right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); + } + emit_dn(as, ARMI_ADD ^ m, dest, dest); + emit_dnm(as, ARMI_ADD, dest, left, right); + return; + } + r = ra_alloc1(as, ref, RSET_GPR); + emit_opk(as, + ARMI_ADD, + dest, + r, + sizeof(GCstr) + IR(refk)->i, + rset_exclude(RSET_GPR, r)); +} + +/* -- Loads and stores ---------------------------------------------------- */ + +static ARMIns asm_fxloadins(ASMState *as, IRIns *ir) +{ + UNUSED(as); + switch (irt_type(ir->t)) { + case IRT_I8: return ARMI_LDRSB; + case IRT_U8: return ARMI_LDRB; + case IRT_I16: return ARMI_LDRSH; + case IRT_U16: return ARMI_LDRH; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D; + case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ + default: return ARMI_LDR; + } +} + +static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) +{ + UNUSED(as); + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return ARMI_STRB; + case IRT_I16: case IRT_U16: return ARMI_STRH; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D; + case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ + default: return ARMI_STR; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + ARMIns ai = asm_fxloadins(as, ir); + Reg idx; + int32_t ofs; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR); + ofs = 0; + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { + /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, ARMC_K12(ARMI_ADD, ofs), dest, idx); + return; + } + } + ofs = field_ofs[ir->op2]; + } + emit_lso(as, ai, dest, idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + ARMIns ai = asm_fxstoreins(as, ir); + emit_lso(as, ai, src, idx, ofs); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, + ir, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); +} + +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, + ir->op2, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, + asm_fxstoreins(as, ir), + src, + ir->op1, + rset_exclude(RSET_GPR, src), + ofs); + } +} + +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + +static void asm_ahuvload(ASMState *as, IRIns *ir) +{ + int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP); + IRType t = hiop ? IRT_NUM : irt_type(ir->t); + Reg dest = RID_NONE, type = RID_NONE, idx; + RegSet allow = RSET_GPR; + int32_t ofs = 0; + if (hiop && ra_used(ir + 1)) { + type = ra_dest(as, ir + 1, allow); + rset_clear(allow, type); + } + if (ra_used(ir)) { + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); + rset_clear(allow, dest); + } + idx = asm_fuseahuref(as, + ir->op1, + &ofs, + allow, + (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + if (!hiop || type == RID_NONE) { + rset_clear(allow, idx); + if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && + rset_test((as->freeset & allow), dest + 1)) { + type = dest + 1; + ra_modified(as, type); + } + else { + type = RID_TMP; + } + } + asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); + emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype_(t)), type); + if (ra_hasreg(dest)) { +#if !LJ_SOFTFP + if (t == IRT_NUM) + emit_vlso(as, ARMI_VLDR_D, dest, idx, ofs); + else +#endif + emit_lso(as, ARMI_LDR, dest, idx, ofs); + } + emit_lso(as, ARMI_LDR, type, idx, ofs + 4); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, type = RID_NONE; + int32_t ofs = 0; +#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow, 1024); + emit_vlso(as, ARMI_VSTR_D, src, idx, ofs); + } + else +#endif + { + int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP); + if (!irt_ispri(ir->t)) { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + } + if (hiop) + type = ra_alloc1(as, (ir + 1)->op2, allow); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), 4096); + if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); + emit_lso(as, ARMI_STR, type, idx, ofs + 4); + } + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + int32_t ofs = 8*((int32_t)ir->op1 - 1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + int hiop = (LJ_SOFTFP && (ir + 1)->o == IR_HIOP); + IRType t = hiop ? IRT_NUM : irt_type(ir->t); + Reg dest = RID_NONE, type = RID_NONE, base; + RegSet allow = RSET_GPR; + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); +#if LJ_SOFTFP + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir + 1)) { + type = ra_dest(as, ir + 1, allow); + rset_clear(allow, type); + } +#else + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(ir->t) && t == IRT_INT) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t = IRT_NUM; /* Continue with a regular number type check. */ + } + else +#endif + if (ra_used(ir)) { + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad SLOAD type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + if ((ir->op2 & IRSLOAD_CONVERT)) { + if (t == IRT_INT) { + emit_tn(as, ARMI_VMOV_R_S, dest, (tmp & 15)); + emit_tm(as, ARMI_VCVT_S32_F64, (tmp & 15), (tmp & 15)); + t = IRT_NUM; /* Check for original type. */ + } + else { + emit_tm(as, ARMI_VCVT_F64_S32, (dest & 15), (dest & 15)); + emit_tn(as, ARMI_VMOV_S_R, tmp, (dest & 15)); + t = IRT_INT; /* Check for original type. */ + } + dest = tmp; + } + goto dotypecheck; + } + base = ra_alloc1(as, REF_BASE, allow); +dotypecheck: + rset_clear(allow, base); + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + if (ra_noreg(type)) { + if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && + rset_test((as->freeset & allow), dest + 1)) { + type = dest + 1; + ra_modified(as, type); + } + else { + type = RID_TMP; + } + } + asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); + if ((ir->op2 & IRSLOAD_KEYINDEX)) { + emit_n(as, ARMC_K12(ARMI_CMN, 1), type); + emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type); + } else { + emit_n(as, ARMC_K12(ARMI_CMN, -irt_toitype_(t)), type); + } + } + if (ra_hasreg(dest)) { +#if !LJ_SOFTFP + if (t == IRT_NUM) { + if (ofs < 1024) { + emit_vlso(as, ARMI_VLDR_D, dest, base, ofs); + } + else { + if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs + 4); + emit_vlso(as, ARMI_VLDR_D, dest, RID_TMP, 0); + emit_opk(as, ARMI_ADD, RID_TMP, base, ofs, allow); + return; + } + } + else +#endif + emit_lso(as, ARMI_LDR, dest, base, ofs); + } + if (ra_hasreg(type)) emit_lso(as, ARMI_LDR, type, base, ofs + 4); +} + +/* -- Allocations --------------------------------------------------------- */ + +#if LJ_HASFFI +static void asm_cnew(ASMState *as, IRIns *ir) +{ + CTState *cts = ctype_ctsG(J2G(as->J)); + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + RegSet drop = RSET_SCRATCH; + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); + + as->gcsteps++; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + if (ra_used(ir)) + ra_destreg(as, ir, RID_RET); /* GCcdata * */ + + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + int32_t ofs = sizeof(GCcdata); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + if (sz == 8) { + ofs += 4; ir++; + lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI"); + } + for (;;) { + Reg r = ra_alloc1(as, ir->op2, allow); + emit_lso(as, ARMI_STR, r, RID_RET, ofs); + rset_clear(allow, r); + if (ofs == sizeof(GCcdata)) break; + ofs -= 4; ir--; + } + } + else if (ir->op2 != REF_NIL) { + /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; + } + + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ + { + uint32_t k = emit_isk12(ARMI_MOV, id); + Reg r = k ? RID_R1 : ra_allock(as, id, allow); + emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); + emit_lso(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); + emit_d(as, ARMC_K12(ARMI_MOV, ~LJ_TCDATA), RID_TMP); + if (k) emit_d(as, ARMI_MOV ^ k, RID_R1); + } + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ + asm_gencall(as, ci, args); + ra_allockreg(as, + (int32_t)(sz + sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); +} +#endif + +/* -- Write barriers ------------------------------------------------------ */ + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + Reg gr = ra_allock(as, + i32ptr(J2G(as->J)), + rset_exclude(rset_exclude(RSET_GPR, tab), link)); + Reg mark = RID_TMP; + MCLabel l_end = emit_label(as); + emit_lso(as, ARMI_STR, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_lso(as, ARMI_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_lso(as, + ARMI_STR, + tab, + gr, + (int32_t)offsetof(global_State, gc.grayagain)); + emit_dn(as, ARMC_K12(ARMI_BIC, LJ_GC_BLACK), mark, mark); + emit_lso(as, + ARMI_LDR, + link, + gr, + (int32_t)offsetof(global_State, gc.grayagain)); + emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); + emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_BLACK), mark); + emit_lso(as, ARMI_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; + MCLabel l_end; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); + //if ((l_end[-1] >> 28) == CC_AL) jturnsek!!! + // l_end[-1] = ARMF_CC(l_end[-1], CC_NE); + //else { + emit_branch(as, ARMF_CC(ARMI_B, CC_EQ), l_end); + //} + ra_allockreg(as, i32ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1)); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); + emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_BLACK), tmp); + ARMI_IT(CC_NE); + emit_n(as, ARMC_K12(ARMI_TST, LJ_GC_WHITES), RID_TMP); + val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); + emit_lso(as, + ARMI_LDRB, + tmp, + obj, + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); + emit_lso(as, ARMI_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); +} + +/* -- Arithmetic and logic operations ------------------------------------- */ + +#if !LJ_SOFTFP +static void asm_fparith(ASMState *as, IRIns *ir, ARMIns ai) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_tnm(as, ai, (dest & 15), (left & 15), (right & 15)); +} + +static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + emit_tm(as, ai, (dest & 15), (left & 15)); +} + +static void asm_callround(ASMState *as, IRIns *ir, int id) +{ + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RID2RSET(RID_R0) | RID2RSET(RID_R1) | RID2RSET(RID_R2) | + RID2RSET(RID_R3) | RID2RSET(RID_R12); + RegSet of; + Reg dest, src; + ra_evictset(as, drop); + dest = ra_dest(as, ir, RSET_FPR); + emit_tnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); + emit_call(as, + id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : + id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : + (void *)lj_vm_trunc_sf); + /* Workaround to protect argument GPRs from being used for remat. */ + of = as->freeset; + as->freeset &= ~RSET_RANGE(RID_R0, RID_R1 + 1); + as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); + src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ + as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1 + 1)); + emit_tnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + if (ir->op2 <= IRFPM_TRUNC) + asm_callround(as, ir, ir->op2); + else if (ir->op2 == IRFPM_SQRT) + asm_fpunary(as, ir, ARMI_VSQRT_D); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); +} +#endif + +static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) +{ + IRIns *ir; + if (irref_isk(rref)) + return 0; /* Don't swap constants to the left. */ + if (irref_isk(lref)) + return 1; /* But swap constants to the right. */ + ir = IR(rref); + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || + (ir->o == IR_ADD && ir->op1 == ir->op2)) + return 0; /* Don't swap fusable operands to the left. */ + ir = IR(lref); + if ((ir->o >= IR_BSHL && ir->o <= IR_BROR) || + (ir->o == IR_ADD && ir->op1 == ir->op2)) + return 1; /* But swap fusable operands to the right. */ + return 0; /* Otherwise don't swap. */ +} + +static void asm_intop(ASMState *as, IRIns *ir, ARMIns ai) +{ + IRRef lref = ir->op1, rref = ir->op2; + Reg left, dest = ra_dest(as, ir, RSET_GPR); + uint32_t m, rs = 0; + if (asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + if ((ai & ~ARMI_S) == ARMI_SUB || (ai & ~ARMI_S) == ARMI_SBC) + ai ^= (ARMI_SUB ^ ARMI_RSB); + } + left = ra_hintalloc(as, lref, dest, RSET_GPR); + m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left), &rs); + if (irt_isguard(ir->t)) { + /* For IR_ADDOV etc. */ + asm_guardcc(as, CC_VS); + ai |= ARMI_S; + } + emit_dn(as, ai ^ m, dest, left); + if (rs) + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16)); +} + +/* Try to drop cmp r, #0. */ +static ARMIns asm_drop_cmp0(ASMState *as, ARMIns ai) //jturnsek!!! +{ + if (as->flagmcp == as->mcp) { + uint32_t cc = ((as->mcp[1] >> 20) & 0xf); + as->flagmcp = NULL; + if (cc <= CC_NE) { + as->mcp++; + ai |= ARMI_S; + } + else if (cc == CC_GE) { + * ++as->mcp ^= ((CC_GE ^ CC_PL) << 20); + ai |= ARMI_S; + } + else if (cc == CC_LT) { + * ++as->mcp ^= ((CC_LT ^ CC_MI) << 20); + ai |= ARMI_S; + } /* else: other conds don't work in general. */ + } + return ai; +} + +static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) +{ + asm_intop(as, ir, asm_drop_cmp0(as, ai)); +} + +static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dn(as, ARMC_K12(ai, 0), dest, left); +} + +/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ +static void asm_intmul(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + Reg tmp = RID_NONE; + /* ARMv5 restriction: dest != left and dest_hi != left. */ + if (dest == left && left != right) { left = right; right = dest; } + if (irt_isguard(ir->t)) { + /* IR_MULOV */ + if (!(as->flags & JIT_F_ARMV6) && dest == left) + tmp = left = ra_scratch(as, rset_exclude(RSET_GPR, left)); + asm_guardcc(as, CC_NE); + emit_nm(as, ARMI_TEQ | ARMF_SH(ARMSH_ASR, 31), RID_TMP, dest); + emit_dnm(as, ARMI_SMULL | ARMF_T(dest), RID_TMP, left, right); + } + else { + if (!(as->flags & JIT_F_ARMV6) && dest == left) tmp = left = RID_TMP; + emit_dnm(as, ARMI_MUL, dest, left, right); + } + /* Only need this for the dest == left == right case. */ + if (ra_hasreg(tmp)) emit_dm(as, ARMI_MOV, tmp, right); +} + +static void asm_add(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, ARMI_VMLA_D, ARMI_VMLA_D)) + asm_fparith(as, ir, ARMI_VADD_D); + return; + } +#endif + asm_intop_s(as, ir, ARMI_ADD); +} + +static void asm_sub(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, ARMI_VNMLS_D, ARMI_VMLS_D)) + asm_fparith(as, ir, ARMI_VSUB_D); + return; + } +#endif + asm_intop_s(as, ir, ARMI_SUB); +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, ARMI_VMUL_D); + return; + } +#endif + asm_intmul(as, ir); +} + +#define asm_addov(as, ir) asm_add(as, ir) +#define asm_subov(as, ir) asm_sub(as, ir) +#define asm_mulov(as, ir) asm_mul(as, ir) + +#if !LJ_SOFTFP +#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) +#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) +#endif + +static void asm_neg(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, ARMI_VNEG_D); + return; + } +#endif + asm_intneg(as, ir, ARMI_RSB); +} + +static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) +{ + ai = asm_drop_cmp0(as, ai); + if (ir->op2 == 0) { + Reg dest = ra_dest(as, ir, RSET_GPR); + uint32_t rs = 0; + uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR, &rs); + emit_d(as, ai ^ m, dest); + if (rs) + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16)); + } + else { + /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ + asm_intop(as, ir, ai); + } +} + +#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) + +static void asm_bswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + if ((as->flags & JIT_F_ARMV6)) { + emit_dnm(as, ARMI_REV, dest, left, left); + } + else { + Reg tmp2 = dest; + if (tmp2 == left) + tmp2 = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, dest), left)); + emit_dnm(as, ARMI_EOR | ARMF_SH(ARMSH_LSR, 8), dest, tmp2, RID_TMP); + emit_dm(as, ARMI_MOV | ARMF_SH(ARMSH_ROR, 8), tmp2, left); + emit_dn(as, ARMC_K12(ARMI_BIC, 256 * 8 | 127), RID_TMP, RID_TMP); + emit_dnm(as, ARMI_EOR | ARMF_SH(ARMSH_ROR, 16), RID_TMP, left, left); + } +} + +#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) +#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) +#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) + +static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) +{ + if (irref_isk(ir->op2)) { + /* Constant shifts. */ + /* NYI: Turn SHL+SHR or BAND+SHR into uxtb, uxth or ubfx. */ + /* NYI: Turn SHL+ASR into sxtb, sxth or sbfx. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + int32_t shift = (IR(ir->op2)->i & 31); + emit_dm(as, ARMI_MOV | ARMF_SH(sh, shift), dest, left); + } + else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | ARMF_RSH(sh, right), dest, left); + } +} + +#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) +#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) +#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") + +static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) +{ + uint32_t kcmp = 0, kmov = 0; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + Reg right = 0; + if (irref_isk(ir->op2)) { + kcmp = emit_isk12(ARMI_CMP, IR(ir->op2)->i); + if (kcmp) kmov = emit_isk12(ARMI_MOV, IR(ir->op2)->i); + } + if (!kmov) { + kcmp = 0; + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + } + if (kmov || dest != right) { + emit_dm(as, ARMI_MOV ^ kmov, dest, right); + ARMI_IT(cc); + cc ^= 1; /* Must use opposite conditions for paired moves. */ + } + else { + cc ^= (CC_LT ^ CC_GT); /* Otherwise may swap CC_LT <-> CC_GT. */ + } + if (dest != left) { + emit_dm(as, ARMI_MOV, dest, left); + ARMI_IT(cc); + } + emit_nm(as, ARMI_CMP ^ kcmp, left, right); +} + +#if LJ_SOFTFP +static void asm_sfpmin_max(ASMState *as, IRIns *ir, int cc) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; + IRRef args[4]; + args[0] = ir->op1; args[1] = (ir + 1)->op1; + args[2] = ir->op2; args[3] = (ir + 1)->op2; + /* __aeabi_cdcmple preserves r0-r3. */ + if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); + if (ra_hasreg((ir + 1)->r)) rset_clear(drop, (ir + 1)->r); + if (!rset_test(as->freeset, RID_R2) && + regcost_ref(as->cost[RID_R2]) == args[2]) rset_clear(drop, RID_R2); + if (!rset_test(as->freeset, RID_R3) && + regcost_ref(as->cost[RID_R3]) == args[3]) rset_clear(drop, RID_R3); + ra_evictset(as, drop); + ra_destpair(as, ir); + emit_dm(as, ARMI_MOV, RID_RETHI, RID_R3); + ARMI_IT(cc); + emit_dm(as, ARMI_MOV, RID_RETLO, RID_R2); + ARMI_IT(cc); + emit_call(as, (void *)ci->func); + for (r = RID_R0; r <= RID_R3; r++) + ra_leftov(as, r, args[r - RID_R0]); +} +#else +static void asm_fpmin_max(ASMState *as, IRIns *ir, int cc) +{ + Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = ((left >> 8) & 15); left &= 15; + if (dest != left) { + emit_tm(as, ARMI_VMOV_D, dest, left); + ARMI_IT(cc ^ 1); + } + if (dest != right) { + emit_tm(as, ARMI_VMOV_D, dest, right); + ARMI_IT(cc); + } + emit_t(as, ARMI_VMRS, 0); + emit_tm(as, ARMI_VCMP_D, left, right); +} +#endif + +static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) +{ +#if LJ_SOFTFP + UNUSED(fcc); +#else + if (irt_isnum(ir->t)) + asm_fpmin_max(as, ir, fcc); + else +#endif + asm_intmin_max(as, ir, cc); +} + +#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) + +/* -- Comparisons --------------------------------------------------------- */ + +/* Map of comparisons to flags. ORDER IR. */ +static const uint8_t asm_compmap[IR_ABC + 1] = { + /* op FP swp int cc FP cc */ + /* LT */ CC_GE + (CC_HS << 4), + /* GE x */ CC_LT + (CC_HI << 4), + /* LE */ CC_GT + (CC_HI << 4), + /* GT x */ CC_LE + (CC_HS << 4), + /* ULT x */ CC_HS + (CC_LS << 4), + /* UGE */ CC_LO + (CC_LO << 4), + /* ULE x */ CC_HI + (CC_LO << 4), + /* UGT */ CC_LS + (CC_LS << 4), + /* EQ */ CC_NE + (CC_NE << 4), + /* NE */ CC_EQ + (CC_EQ << 4), + /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ +}; + +#if LJ_SOFTFP +/* FP comparisons. */ +static void asm_sfpcomp(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; + IRRef args[4]; + int swp = (((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1) << 1); + args[swp ^ 0] = ir->op1; args[swp ^ 1] = (ir + 1)->op1; + args[swp ^ 2] = ir->op2; args[swp ^ 3] = (ir + 1)->op2; + /* __aeabi_cdcmple preserves r0-r3. This helps to reduce spills. */ + for (r = RID_R0; r <= RID_R3; r++) + if (!rset_test(as->freeset, r) && + regcost_ref(as->cost[r]) == args[r - RID_R0]) rset_clear(drop, r); + ra_evictset(as, drop); + asm_guardcc(as, (asm_compmap[ir->o] >> 4)); + emit_call(as, (void *)ci->func); + for (r = RID_R0; r <= RID_R3; r++) + ra_leftov(as, r, args[r - RID_R0]); +} +#else +/* FP comparisons. */ +static void asm_fpcomp(ASMState *as, IRIns *ir) +{ + Reg left, right; + ARMIns ai; + int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); + if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { + left = (ra_alloc1(as, ir->op1, RSET_FPR) & 15); + right = 0; + ai = ARMI_VCMPZ_D; + } + else { + left = ra_alloc2(as, ir, RSET_FPR); + if (swp) { + right = (left & 15); left = ((left >> 8) & 15); + } + else { + right = ((left >> 8) & 15); left &= 15; + } + ai = ARMI_VCMP_D; + } + asm_guardcc(as, (asm_compmap[ir->o] >> 4)); + emit_t(as, ARMI_VMRS, 0); + emit_tm(as, ai, left, right); +} +#endif + +/* Integer comparisons. */ +static void asm_intcomp(ASMState *as, IRIns *ir) +{ + ARMCC cc = (asm_compmap[ir->o] & 15); + IRRef lref = ir->op1, rref = ir->op2; + Reg left; + uint32_t m, rs = 0; + int cmpprev0 = 0; + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "bad comparison data type %d", irt_type(ir->t)); + if (asm_swapops(as, lref, rref)) { + Reg tmp = lref; lref = rref; rref = tmp; + if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ + else if(cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ + } + if (irref_isk(rref) && IR(rref)->i == 0) { + IRIns *irl = IR(lref); + cmpprev0 = (irl + 1 == ir); + /* Combine comp(BAND(left, right), 0) into tst left, right. */ + if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { + IRRef blref = irl->op1, brref = irl->op2; + uint32_t m2 = 0; + Reg bleft; + if (asm_swapops(as, blref, brref)) { + Reg tmp = blref; blref = brref; brref = tmp; + } + if (irref_isk(brref)) { + m2 = emit_isk12(ARMI_AND, IR(brref)->i); + if ((m2 & (ARMI_AND ^ ARMI_BIC))) + goto notst; /* Not beneficial if we miss a constant operand. */ + } + if (cc == CC_GE) cc = CC_PL; + else if (cc == CC_LT) cc = CC_MI; + else if (cc > CC_NE) goto notst; /* Other conds don't work with tst. */ + bleft = ra_alloc1(as, blref, RSET_GPR); + if (!m2) m2 = asm_fuseopm(as, 0, brref, rset_exclude(RSET_GPR, bleft), &rs); + asm_guardcc(as, cc); + emit_n(as, ARMI_TST ^ m2, bleft); + if (rs) + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m2 >> 16), (m2 >> 16)); + return; + } + } +notst: + left = ra_alloc1(as, lref, RSET_GPR); + m = asm_fuseopm(as, ARMI_CMP, rref, rset_exclude(RSET_GPR, left), &rs); + asm_guardcc(as, cc); + emit_n(as, ARMI_CMP ^ m, left); + if (rs) + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rs, (m >> 16), (m >> 16)); + /* Signed comparison with zero and referencing previous ins? */ + if (cmpprev0 && (cc <= CC_NE || cc >= CC_GE)) + as->flagmcp = as->mcp; /* Allow elimination of the compare. */ +} + +static void asm_comp(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else +#endif + asm_intcomp(as, ir); +} + +#define asm_equal(as, ir) asm_comp(as, ir) + +#if LJ_HASFFI +/* 64 bit integer comparisons. */ +static void asm_int64comp(ASMState *as, IRIns *ir) +{ + int signedcomp = (ir->o <= IR_GT); + ARMCC cclo, cchi; + Reg leftlo, lefthi; + uint32_t mlo, mhi, rslo = 0, rshi = 0; + RegSet allow = RSET_GPR, oldfree; + + /* Always use unsigned comparison for loword. */ + cclo = asm_compmap[ir->o + (signedcomp ? 4 : 0)] & 15; + leftlo = ra_alloc1(as, ir->op1, allow); + oldfree = as->freeset; + mlo = asm_fuseopm(as, ARMI_CMP, ir->op2, rset_clear(allow, leftlo), &rslo); + allow &= ~(oldfree & ~as->freeset); /* Update for allocs of asm_fuseopm. */ + + /* Use signed or unsigned comparison for hiword. */ + cchi = asm_compmap[ir->o] & 15; + lefthi = ra_alloc1(as, (ir + 1)->op1, allow); + mhi = asm_fuseopm(as, ARMI_CMP, (ir + 1)->op2, rset_clear(allow, lefthi), &rshi); + + /* All register allocations must be performed _before_ this point. */ + if (signedcomp) { + MCLabel l_around = emit_label(as); + asm_guardcc(as, cclo); + emit_n(as, ARMI_CMP ^ mlo, leftlo); + if (rslo) + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rslo, (mlo >> 16), (mlo >> 16)); + emit_branch(as, ARMF_CC(ARMI_B, CC_NE), l_around); + if (cchi == CC_GE || cchi == CC_LE) cchi ^= 6; /* GE -> GT, LE -> LT */ + asm_guardcc(as, cchi); + } + else { + asm_guardcc(as, cclo); + emit_n(as, ARMI_CMP ^ mlo, leftlo); + if (rslo) + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rslo, (mlo >> 16), (mlo >> 16)); + ARMI_IT(CC_EQ); + } + emit_n(as, ARMI_CMP ^ mhi, lefthi); + if (rshi) + emit_dn(as, (ARMI_MOV ^ 0x0000104f) | rshi, (mhi >> 16), (mhi >> 16)); +} +#endif + +/* -- Split register ops -------------------------------------------------- */ + +/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */ +static void asm_hiop(ASMState *as, IRIns *ir) +{ + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir - 1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_HASFFI || LJ_SOFTFP + if ((ir - 1)->o <= IR_NE) { + /* 64 bit integer or FP comparisons. ORDER IR. */ + as->curins--; /* Always skip the loword comparison. */ +#if LJ_SOFTFP + if (!irt_isint(ir->t)) { + asm_sfpcomp(as, ir - 1); + return; + } +#endif +#if LJ_HASFFI + asm_int64comp(as, ir - 1); +#endif + return; +#if LJ_SOFTFP + } + else if ((ir - 1)->o == IR_MIN || (ir - 1)->o == IR_MAX) { + as->curins--; /* Always skip the loword min/max. */ + if (uselo || usehi) + asm_sfpmin_max(as, ir - 1, (ir - 1)->o == IR_MIN ? CC_PL : CC_LE); + return; +#elif LJ_HASFFI + } + else if ((ir - 1)->o == IR_CONV) { + as->curins--; /* Always skip the CONV. */ + if (usehi || uselo) + asm_conv64(as, ir); + return; +#endif + } + else if ((ir - 1)->o == IR_XSTORE) { + if ((ir - 1)->r != RID_SINK) + asm_xstore_(as, ir, 4); + return; + } +#endif + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir - 1)->o) { +#if LJ_HASFFI + case IR_ADD: + as->curins--; + asm_intop(as, ir, ARMI_ADC); + asm_intop(as, ir - 1, ARMI_ADD | ARMI_S); + break; + case IR_SUB: + as->curins--; + asm_intop(as, ir, ARMI_SBC); + asm_intop(as, ir - 1, ARMI_SUB | ARMI_S); + break; + case IR_NEG: + as->curins--; + { + /* asm_intnegr */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dn(as, ARMC_K12(ARMI_SBC, 0), left, dest); + } + asm_intneg(as, ir - 1, ARMI_RSB | ARMI_S); + break; + case IR_CNEWI: + /* Nothing to do here. Handled by lo op itself. */ + break; +#endif +#if LJ_SOFTFP + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + case IR_STRTO: + if (!uselo) + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ + break; + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: + /* Nothing to do here. Handled by lo op itself. */ + break; +#endif + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_n(as, ARMC_K12(ARMI_TST, HOOK_PROFILE), RID_TMP); + emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); +} + +/* -- Stack handling ------------------------------------------------------ */ + +/* Check Lua stack size for overflow. Use exit handler as fallback. */ +static void asm_stack_check(ASMState *as, + BCReg topslot, + IRIns *irp, + RegSet allow, + ExitNo exitno) +{ + Reg pbase; + uint32_t k; + if (irp) { + if (!ra_hasspill(irp->s)) { + pbase = irp->r; + lj_assertA(ra_hasreg(pbase), "base reg lost"); + } + else if (allow) { + pbase = rset_pickbot(allow); + } + else { + pbase = RID_RET; + emit_lso(as, ARMI_LDR, RID_RET, RID_SP, 0); /* Restore temp. register. */ + } + } + else { + pbase = RID_BASE; + } + emit_branchlink(as, ARMI_BL, exitstub_addr(as->J, exitno)); + ARMI_IT(CC_LS); + k = emit_isk12(0, (int32_t)(8*topslot)); + lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); + emit_n(as, ARMI_CMP ^ k, RID_TMP); + emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); + emit_lso(as, + ARMI_LDR, + RID_TMP, + RID_TMP, + (int32_t)offsetof(lua_State, maxstack)); + if (irp) { + /* Must not spill arbitrary registers in head of side trace. */ + int32_t i = i32ptr(&J2G(as->J)->cur_L); + if (ra_hasspill(irp->s)) + emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); + emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); + if (ra_hasspill(irp->s) && !allow) + emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ + emit_loadi(as, RID_TMP, (i & ~4095)); + } + else { + emit_getgl(as, RID_TMP, cur_L); + } +} + +/* Restore Lua stack from on-trace state. */ +static void asm_stack_restore(ASMState *as, SnapShot *snap) +{ + SnapEntry *map = &as->T->snapmap[snap->mapofs]; + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap) - 1]; + MSize n, nent = snap->nent; + /* Store the value of all modified slots to the Lua stack. */ + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + BCReg s = snap_slot(sn); + int32_t ofs = 8*((int32_t)s - 1); + IRRef ref = snap_ref(sn); + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { +#if LJ_SOFTFP + RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); + Reg tmp; + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); + tmp = ra_allock(as, + (int32_t)ir_knum(ir)->u32.lo, + rset_exclude(RSET_GPREVEN, RID_BASE)); + emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); + if (rset_test(as->freeset, tmp + 1)) odd = RID2RSET(tmp + 1); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, odd); + emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs + 4); +#else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_vlso(as, ARMI_VSTR_D, src, RID_BASE, ofs); +#endif + } + else { + RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); + Reg type; + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); + emit_lso(as, ARMI_STR, src, RID_BASE, ofs); + if (rset_test(as->freeset, src + 1)) odd = RID2RSET(src + 1); + } + if ((sn & (SNAP_CONT | SNAP_FRAME))) { + if (s == 0) continue; /* Do not overwrite link to previous frame. */ + type = ra_allock(as, (int32_t)(*flinks--), odd); +#if LJ_SOFTFP + } + else if ((sn & SNAP_SOFTFPNUM)) { + type = ra_alloc1(as, ref + 1, rset_exclude(RSET_GPRODD, RID_BASE)); +#endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); + } + else { + type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); + } + emit_lso(as, ARMI_STR, type, RID_BASE, ofs + 4); + } + checkmclim(as); + } + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); +} + +/* -- GC handling --------------------------------------------------------- */ + +/* Marker to prevent patching the GC check exit. */ +#define ARM_NOPATCH_GC_CHECK (ARMC_K12(ARMI_BIC, 0)) + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; + Reg tmp1, tmp2; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ + * --as->mcp = ARM_NOPATCH_GC_CHECK; + emit_n(as, ARMC_K12(ARMI_CMP, 0), RID_RET); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); + tmp1 = ra_releasetmp(as, ASMREF_TMP1); + tmp2 = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp2, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_branch(as, ARMF_CC(ARMI_B, CC_LS), l_end); + emit_nm(as, ARMI_CMP, RID_TMP, tmp2); + emit_lso(as, + ARMI_LDR, + tmp2, + tmp1, + (int32_t)offsetof(global_State, gc.threshold)); + emit_lso(as, + ARMI_LDR, + RID_TMP, + tmp1, + (int32_t)offsetof(global_State, gc.total)); + ra_allockreg(as, i32ptr(J2G(as->J)), tmp1); + as->gcsteps = 0; + checkmclim(as); +} + +/* -- Loop handling ------------------------------------------------------- */ + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + MCode *p = as->mctop; + MCode *target = as->mcp; + if (as->loopinv) { + /* Inverted loop branch? */ + /* asm_guardcc already inverted the bcc and patched the final bl. */ + p[-2] |= ARMC_B((uint32_t)((target - p + 1) << 1)); + } + else { + p[-1] = ARMI_B_T4 | ARMC_BL((uint32_t)((target - p) << 1)); + } +} + +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do. */ +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Reload L register from g->cur_L. */ +static void asm_head_lreg(ASMState *as) +{ + IRIns *ir = IR(ASMREF_L); + if (ra_used(ir)) { + Reg r = ra_dest(as, ir, RSET_GPR); + emit_getgl(as, r, cur_L); + ra_evictk(as); + } +} + +/* Coalesce BASE register for a root trace. */ +static void asm_head_root_base(ASMState *as) +{ + IRIns *ir; + asm_head_lreg(as); + ir = IR(REF_BASE); + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) + ra_spill(as, ir); + ra_destreg(as, ir, RID_BASE); +} + +/* Coalesce BASE register for a side trace. */ +static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) +{ + IRIns *ir; + asm_head_lreg(as); + ir = IR(REF_BASE); + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) + ra_spill(as, ir); + if (ra_hasspill(irp->s)) { + rset_clear(allow, ra_dest(as, ir, allow)); + } + else { + Reg r = irp->r; + lj_assertA(ra_hasreg(r), "base reg lost"); + rset_clear(allow, r); + if (r != ir->r && !rset_test(as->freeset, r)) + ra_restore(as, regcost_ref(as->cost[r])); + ra_destreg(as, ir, r); + } + return allow; +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + MCode *p = as->mctop; + MCode *target; + int32_t spadj = as->T->spadjust; + if (spadj == 0) { + as->mctop = --p; + } + else { + /* Patch stack adjustment. */ + uint32_t k = emit_isk12(ARMI_ADD, spadj); + lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); + p[-2] = (ARMI_ADD ^ k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + } + /* Patch exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)as->J->exitstubgroup[0] - 8;//lj_vm_exit_interp; + p[-1] = ARMI_B_T4 | ARMC_BL((target - p) << 1); +} + +/* Prepare tail of code. */ +static void asm_tail_prep(ASMState *as) +{ + MCode *p = as->mctop - 1; /* Leave room for exit branch. */ + if (as->loopref) { + as->invmcp = as->mcp = p; + } + else { + as->mcp = p - 1; /* Leave room for stack pointer adjustment. */ + as->invmcp = NULL; + } + *p = 0; /* Prevent load/store merging. */ +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX * 2]; + uint32_t i, nargs = CCI_XNARGS(ci); + int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { + if (!LJ_ABI_SOFTFP && !(ci->flags & CCI_VARARG)) { + if (irt_isnum(IR(args[i])->t)) { + if (nfpr > 0) nfpr--; + else fprodd = 0, nslots = (nslots + 3) & ~1; + } + else { + if (fprodd) fprodd--; + else if (nfpr > 0) fprodd = 1, nfpr--; + else nslots++; + } + } + else if (irt_isnum(IR(args[i])->t)) { + ngpr &= ~1; + if (ngpr > 0) ngpr -= 2; + else nslots += 2; + } + else { + if (ngpr > 0) ngpr--; + else nslots++; + } + } + else { + if (ngpr > 0) ngpr--; + else nslots++; + } + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return REGSP_HINT(RID_RET); +} + +static void asm_setup_target(ASMState *as) +{ + /* May need extra exit for asm_stack_check on side traces. */ + asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); +} + +/* -- Trace patching ------------------------------------------------------ */ + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) +{ + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); + MCode *cstart = NULL, *cend = p; + MCode *mcarea = lj_mcode_patch(J, p, 0); + MCode *px = exitstub_addr(J, exitno) - 1; + for (; p < pe; p++) { + /* Look for bl_cc exitstub, replace with b_cc target. */ + uint32_t ins = *p; + if ((ins & 0xd000f800u) == 0xd000f000u && + (((ARMC_BL_READ(ins) >> 1) ^ (px - p)) & 0x007fffffu) == 0 && + p[-1] != ARM_NOPATCH_GC_CHECK) { + *p = ARMI_B_T4 | ARMC_BL((uint32_t)(((target - p) - 1) << 1)); + cend = p + 1; + if (!cstart) cstart = p; + } + } + lj_assertJ(cstart != NULL, "exit stub %d not found", exitno); + lj_mcode_sync(cstart, cend); + lj_mcode_patch(J, mcarea, 1); +} + diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 43e44305..2e6b6fd4 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -151,7 +151,7 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; MSize slot; - /* This must match with the saveregs macro in buildvm_arm.dasc. */ + /* This must match with the saveregs macro in buildvm_arm.dasc. */ //jturnsek!!! *p++ = ARMI_SUB|ARMF_D(RID_R12)|ARMF_N(RID_R12)|ARMF_M(RID_PC); *p++ = ARMI_PUSH|ARMF_N(RID_SP)|RSET_RANGE(RID_R4,RID_R11+1)|RID2RSET(RID_LR); *p++ = ARMI_SUB|ARMI_K12|ARMF_D(RID_R12)|ARMF_N(RID_R12)|CALLBACK_MCODE_HEAD; @@ -295,7 +295,7 @@ static void callback_mcode_new(CTState *cts) DWORD oprot; LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot); } -#elif LJ_TARGET_POSIX +#elif LJ_TARGET_POSIX && !LJ_TARGET_NUTTX mprotect(p, sz, (PROT_READ|PROT_EXEC)); #endif } diff --git a/src/lj_clib.c b/src/lj_clib.c index f0ef6edd..0fc6419b 100644 --- a/src/lj_clib.c +++ b/src/lj_clib.c @@ -50,6 +50,8 @@ LJ_NORET LJ_NOINLINE static void clib_error_(lua_State *L) #define CLIB_SOEXT "%s.dylib" #elif LJ_TARGET_CYGWIN #define CLIB_SOEXT "%s.dll" +#elif LJ_TARGET_NUTTX +#define CLIB_SOEXT "%s" #else #define CLIB_SOEXT "%s.so" #endif @@ -428,7 +430,11 @@ void lj_clib_unload(CLibrary *cl) void lj_clib_default(lua_State *L, GCtab *mt) { CLibrary *cl = clib_new(L, mt); +#if LJ_TARGET_NUTTX + cl->handle = clib_loadlib(L, "c", 0); +#else cl->handle = CLIB_DEFHANDLE; +#endif } #endif diff --git a/src/lj_def.h b/src/lj_def.h index b61297aa..03f60c3f 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -87,6 +87,7 @@ typedef unsigned int uintptr_t; #define LJ_MAX_EXITSTUBGR 16 /* Max. # of exit stub groups. */ /* Various macros. */ +#undef UNUSED /* NuttX UNUSED macro is giving us problems. Use our own. */ #ifndef UNUSED #define UNUSED(x) ((void)(x)) /* to avoid warnings */ #endif diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 52762eea..909d4d5a 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -87,25 +87,23 @@ typedef uint16_t HotCount; /* Global state, main thread and extra fields are allocated together. */ typedef struct GG_State { - lua_State L; /* Main thread. */ - global_State g; /* Global state. */ -#if LJ_TARGET_ARM && !LJ_TARGET_NX - /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ - uint8_t align1[(16-sizeof(global_State))&15]; -#endif + lua_State L; /* Main thread. */ #if LJ_TARGET_MIPS - ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ + ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ #endif #if LJ_HASJIT - jit_State J; /* JIT state. */ - HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ + jit_State J; /* JIT state. */ + HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ #if LJ_TARGET_ARM && !LJ_TARGET_NX /* Ditto for J. */ - uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; + uint8_t align1[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; #endif #endif - ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ - BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ + global_State g; /* Global state. */ /* jturnsek: moved here in order to avoid excessive negative offsets when LJ_HASJIT */ + /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ + uint8_t align2[(16-sizeof(global_State))&15]; + ASMFunction dispatch[GG_LEN_DISP]; /* Instruction dispatch tables. */ + BCIns bcff[GG_NUM_ASMFF]; /* Bytecode for ASM fast functions. */ } GG_State; #define GG_OFS(field) ((int)offsetof(GG_State, field)) diff --git a/src/lj_emit_armv7m.h b/src/lj_emit_armv7m.h new file mode 100644 index 00000000..5381df8b --- /dev/null +++ b/src/lj_emit_armv7m.h @@ -0,0 +1,474 @@ +/* +** ARMv7-M instruction emitter. +** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +*/ + +/* -- Constant encoding --------------------------------------------------- */ + +#define INVAI_MASK 0xfbe0 + +static uint32_t emit_invai[16] = { + /* AND, TST */ ((ARMI_AND ^ 0x1a00) ^ (ARMI_BIC ^ 0x1a00)) & INVAI_MASK, + /* BIC */ ((ARMI_BIC ^ 0x1a00) ^ (ARMI_AND ^ 0x1a00)) & INVAI_MASK, + /* MOV, ORR */ ((ARMI_MOV ^ 0x1a00) ^ (ARMI_MVN ^ 0x1a00)) & INVAI_MASK, + /* MVN, ORN */ ((ARMI_MVN ^ 0x1a00) ^ (ARMI_MOV ^ 0x1a00)) & INVAI_MASK, + /* EOR, TEQ */ 0, + 0, + 0, + 0, + /* ADD, CMN */ ((ARMI_ADD ^ 0x1a00) ^ (ARMI_SUB ^ 0x1a00)) & INVAI_MASK, + 0, + /* ADC */ ((ARMI_ADC ^ 0x1a00) ^ (ARMI_SBC ^ 0x1a00)) & INVAI_MASK, + /* SBC */ ((ARMI_SBC ^ 0x1a00) ^ (ARMI_ADC ^ 0x1a00)) & INVAI_MASK, + 0, + /* SUB, CMP */ ((ARMI_SUB ^ 0x1a00) ^ (ARMI_ADD ^ 0x1a00)) & INVAI_MASK, + /* RSB */ 0, + 0 +}; + +/* Encode constant in K12 format for data processing instructions. */ +static unsigned int emit_isk12(ARMIns ai, signed int n) +{ + unsigned int invai, i, m = (unsigned int)n; + /* K12: 1bcdefgh value, rotated in steps of one bit. */ + if (m <= 255) { + /* i:imm3 = 0000 */ + return ARMC_K12(0, m); + } + else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) { + /* i:imm3 = 0001 */ + return ARMC_K12(0, 0x100 | (m & 0xff)); + } + else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) { + /* i:imm3 = 0010 */ + return ARMC_K12(0, 0x200 | (m >> 8 & 0xff)); + } + else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) { + /* i:imm3 = 0011 */ + return ARMC_K12(0, 0x300 | (m & 0xff)); + } + else { + for (i = 0; i < 4096; i += 128, m = lj_rol(m, 1)) { + if (m <= 255) { + if ((m & 0x80) && (i >= 128 * 8)) + return ARMC_K12(0, i | (m & 0x7f)); + else + continue; + } + } + } + + /* Otherwise try negation/complement with the inverse instruction. */ + invai = emit_invai[(ai >> 5) & 0xf]; + if (!invai) return 0; /* Failed. No inverse instruction. */ + m = ~(unsigned int)n; + if (invai == (((ARMI_SUB ^ 0x1a00) ^ (ARMI_ADD ^ 0x1a00)) & INVAI_MASK) || + invai == (((ARMI_CMP ^ 0x1a00) ^ (ARMI_CMN ^ 0x1a00)) & INVAI_MASK)) m++; + if (m <= 255) { + /* i:imm3 = 0000 */ + return ARMC_K12(invai, m); + } + else if (!(m & 0xff00ff00) && !(((m >> 16 & 0xff) ^ m) & 0xff)) { + /* i:imm3 = 0001 */ + return ARMC_K12(invai, 0x100 | (m & 0xff)); + } + else if (!(m & 0x00ff00ff) && !(((m >> 16 & 0xff00) ^ m) & 0xff00)) { + /* i:imm3 = 0010 */ + return ARMC_K12(invai, 0x200 | (m >> 8 & 0xff)); + } + else if (!(((m >> 16 & 0xffff) ^ m) & 0xffff) && !(((m >> 8 & 0xff) ^ m) & 0xff)) { + /* i:imm3 = 0011 */ + return ARMC_K12(invai, 0x300 | (m & 0xff)); + } + else { + for (i = 0; i < 4096; i += 128, m = lj_rol(m, 1)) { + if (m <= 255) { + if ((m & 0x80) && (i >= 128 * 8)) + return ARMC_K12(invai, i | (m & 0x7f)); + else + continue; + } + } + } + + return 0; +} + +/* -- Emit basic instructions --------------------------------------------- */ + +static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm) +{ + * --as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm); +} + +static void emit_tnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm) +{ + * --as->mcp = ai | ARMF_T(rd) | ARMF_N(rn) | ARMF_M(rm); +} + +static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm) +{ + * --as->mcp = ai | ARMF_D(rd) | ARMF_M(rm); +} + +static void emit_tm(ASMState *as, ARMIns ai, Reg rd, Reg rm) +{ + * --as->mcp = ai | ARMF_T(rd) | ARMF_M(rm); +} + +static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn) +{ + * --as->mcp = ai | ARMF_D(rd) | ARMF_N(rn); +} + +static void emit_tn(ASMState *as, ARMIns ai, Reg rd, Reg rn) +{ + * --as->mcp = ai | ARMF_T(rd) | ARMF_N(rn); +} + +static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm) +{ + * --as->mcp = ai | ARMF_N(rn) | ARMF_M(rm); +} + +static void emit_d(ASMState *as, ARMIns ai, Reg rd) +{ + * --as->mcp = ai | ARMF_D(rd); +} + +static void emit_t(ASMState *as, ARMIns ai, Reg rd) +{ + * --as->mcp = ai | ARMF_T(rd); +} + +static void emit_n(ASMState *as, ARMIns ai, Reg rn) +{ + * --as->mcp = ai | ARMF_N(rn); +} + +static void emit_m(ASMState *as, ARMIns ai, Reg rm) +{ + * --as->mcp = ai | ARMF_M(rm); +} + +static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) +{ + lj_assertA(ofs >= -1020 && ofs <= 1020, + "load/store offset %d out of range", ofs); + if (ofs < 0) ofs = -ofs; + else ai |= ARMI_LSX_U; + * --as->mcp = ai | ARMI_LSX_P | ARMF_T(rd) | ARMF_D(rd + 1) | ARMF_N(rn) | + (((ofs >> 2) & 0xff) << 16); /* imm multiples of 4 */ +} + +static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) +{ + lj_assertA(ofs >= -255 && ofs <= 4095, + "load/store offset %d out of range", ofs); + /* Combine LDR/STR pairs to LDRD/STRD. */ + if (*as->mcp == (ai | ARMI_LS_1 | ARMI_LS_P | ARMI_LS_U | ARMF_T(rd ^ 1) | ARMF_N(rn) | (ofs ^ 4)) && + (ai & ~(ARMI_LDR ^ ARMI_STR)) == ARMI_STR && rd != rn && + (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >> 2)) & 1) && + as->mcp != as->mcloop) { + as->mcp++; + emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd & ~1, rn, ofs & ~4); + return; + } + if (ofs > 255) { + * --as->mcp = ai | ARMI_LS_I | ARMF_T(rd) | ARMF_N(rn) | ((ofs & 0xfff) << 16); + return; + } + if (ofs < 0) ofs = -ofs; + else ai |= ARMI_LS_U; + * --as->mcp = ai | ARMI_LS_1 | ARMI_LS_P | ARMF_T(rd) | ARMF_N(rn) | ((ofs & 0xff) << 16); +} + +#if !LJ_SOFTFP +static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) +{ + lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs & 3) == 0, + "load/store offset %d out of range", ofs); + if (ofs < 0) ofs = -ofs; + else ai |= ARMI_LSX_U; + * --as->mcp = ai | ARMF_T(rd & 15) | ARMF_N(rn) | ((ofs >> 2) << 16); +} +#endif + +/* -- Emit loads/stores --------------------------------------------------- */ + +/* Prefer spills of BASE/L. */ +#define emit_canremat(ref) ((ref) < ASMREF_L) + +/* Try to find a one step delta relative to another constant. */ +static int emit_kdelta1(ASMState *as, Reg d, int32_t i) +{ + RegSet work = ~as->freeset & RSET_GPR; + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); + lj_assertA(r != d, "dest reg not free"); + if (emit_canremat(ref)) { + int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); + uint32_t k = emit_isk12(ARMI_ADD, delta); + if (k) { + if (k == ARMI_K12) + emit_dm(as, ARMI_MOV, d, r); + else + emit_dn(as, ARMI_ADD ^ k, d, r); + return 1; + } + } + rset_clear(work, r); + } + return 0; /* Failed. */ +} + +/* Try to find a two step delta relative to another constant. */ +static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) +{ + RegSet work = ~as->freeset & RSET_GPR; + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); + lj_assertA(r != rd, "dest reg %d not free", rd); + if (emit_canremat(ref)) { + int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; + if (other) { + int32_t delta = i - other; + uint32_t sh, inv = 0, k2, k; + if (delta < 0) { delta = -delta; inv = (ARMI_ADD ^ 0x1a00) ^ (ARMI_SUB ^ 0x1a00); } + sh = lj_ffs(delta) & ~1; + k2 = emit_isk12(0, delta & (255 << sh)); + k = emit_isk12(0, delta & ~(255 << sh)); + if (k) { + emit_dn(as, ARMI_ADD ^ k2 ^ inv, rd, rd); + emit_dn(as, ARMI_ADD ^ k ^ inv, rd, r); + return 1; + } + } + } + rset_clear(work, r); + } + return 0; /* Failed. */ +} + +/* Load a 32 bit constant into a GPR. */ +static void emit_loadi(ASMState *as, Reg rd, int32_t i) +{ + uint32_t k = emit_isk12(ARMI_MOV, i); + lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP, + "dest reg %d not free", rd); + if (k) { + /* Standard K12 constant. */ + emit_d(as, ARMI_MOV ^ k, rd); + } + else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { + /* 16 bit loword constant for ARMv6T2. */ + emit_d(as, ARMI_MOVW | ((i & 0xff) << 16) | ((i & 0x700) << 20) | ((i & 0x800) >> 1) | ((i & 0xf000) >> 12), rd); + } + else if (emit_kdelta1(as, rd, i)) { + /* One step delta relative to another constant. */ + } + else if ((as->flags & JIT_F_ARMV6T2)) { + /* 32 bit hiword/loword constant for ARMv6T2. */ + emit_d(as, ARMI_MOVT | (((i >> 16) & 0xff) << 16) | (((i >> 16) & 0x700) << 20) | (((i >> 16) & 0x800) >> 1) | (((i >> 16) & 0xf000) >> 12), rd); + emit_d(as, ARMI_MOVW | ((i & 0xff) << 16) | ((i & 0x700) << 20) | ((i & 0x800) >> 1) | ((i & 0xf000) >> 12), rd); + } + else if (emit_kdelta2(as, rd, i)) { + /* Two step delta relative to another constant. */ + } + else { + /* Otherwise construct the constant with up to 4 instructions. */ + /* NYI: use mvn+bic, use pc-relative loads. */ + for (;;) { + uint32_t sh = lj_ffs(i) & ~1; + int32_t m = i & (255 << sh); + i &= ~(255 << sh); + if (i == 0) { + emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd); + break; + } + emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd); + } + } +} + +#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr))) + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); + +/* Get/set from constant pointer. */ +static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) +{ + int32_t i = i32ptr(p); + emit_lso(as, + ai, + r, + ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)), + (i & 4095)); +} + +#if !LJ_SOFTFP +/* Load a number constant into an FPR. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + cTValue *tv = ir_knum(ir); + int32_t i; + if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { + uint32_t hi = tv->u32.hi; + uint32_t b = ((hi >> 22) & 0x1ff); + if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) { + * --as->mcp = ARMI_VMOVI_D | ARMF_T(r & 15) | + ((((tv->u32.hi >> 12) & 0x00080000) | + ((tv->u32.hi >> 4) & 0x00070000)) >> 16) | + (((tv->u32.hi >> 16) & 0x0000000f) << 16); + return; + } + } + i = i32ptr(tv); + emit_vlso(as, + ARMI_VLDR_D, + r, + ra_allock(as, (i & ~1020), RSET_GPR), + (i & 1020)); +} +#endif + +/* Get/set global_State fields. */ +#define emit_getgl(as, r, field) \ + emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field) +#define emit_setgl(as, r, field) \ + emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field) + +/* Trace number is determined from pc of exit instruction. */ +#define emit_setvmstate(as, i) UNUSED(i) + +/* -- Emit control-flow instructions -------------------------------------- */ + +/* Label for internal jumps. */ +typedef MCode *MCLabel; + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +static void emit_branch(ASMState *as, ARMIns ai, MCode *target) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (target - p) << 1; + lj_assertA(((delta + 0x0080000) >> 20) == 0, "branch target out of range"); + * --p = ai | ARMC_B((uint32_t)delta & 0x00fffffu); + as->mcp = p; +} + +static void emit_branchlink(ASMState *as, ARMIns ai, MCode *target) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (target - p) << 1; + * --p = ai | ARMC_BL((uint32_t)delta & 0x0ffffffu); + as->mcp = p; +} + +static void emit_jmp(ASMState *as, MCode *target) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (target - p) << 1; + lj_assertA(((delta + 0x0800000) >> 24) == 0, "jump target out of range"); + * --p = ARMI_B_T4 | ARMC_BL((uint32_t)delta & 0x00ffffffu); + as->mcp = p; +} + +static void emit_call(ASMState *as, void *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = ((char *)target - (char *)p) - 4; + if ((((delta >> 1) + 0x00100000) >> 21) == 0) { + /* Only Thumb code is allowed */ + *p = ARMI_BL | ARMC_BL((uint32_t)(delta >> 1)); + } + else { + /* Target out of range: need indirect call. But don't use R0-R3. */ + Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12 + 1)); + *p = ARMI_BLXr | ARMF_M2(r); + } +} + +/* -- Emit generic operations --------------------------------------------- */ + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) +{ +#if LJ_SOFTFP + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); +#else + if (dst >= RID_MAX_GPR) { + emit_tm(as, + irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, + (dst & 15), + (src & 15)); + return; + } +#endif + if (as->mcp != as->mcloop) { + /* Swap early registers for loads/stores. */ + MCode ins = *as->mcp, swp = (src ^ dst); + if ((ins & 0x0fc0ff80) == 0x0000f800) { + if (!((ins ^ dst) & 0x0000000f)) + *as->mcp = ins ^ swp; /* Swap N in load/store. */ + if (!(ins & 0x00000010) && !((ins ^ (dst << 28)) & 0xf0000000)) + *as->mcp = ins ^ (swp << 28); /* Swap D in store. */ + } + } + emit_dm(as, ARMI_MOV, dst, src); +} + +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ +#if LJ_SOFTFP + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); +#else + if (r >= RID_MAX_GPR) + emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); + else +#endif + emit_lso(as, ARMI_LDR, r, base, ofs); +} + +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ +#if LJ_SOFTFP + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); +#else + if (r >= RID_MAX_GPR) + emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); + else +#endif + emit_lso(as, ARMI_STR, r, base, ofs); +} + +/* Emit an arithmetic/logic operation with a constant operand. */ +static void emit_opk(ASMState *as, + ARMIns ai, + Reg dest, + Reg src, + int32_t i, + RegSet allow) +{ + uint32_t k = emit_isk12(ai, i); + if (k) + emit_dn(as, ai ^ k, dest, src); + else + emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); +} + +/* Add offset to pointer. */ +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) + emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r)); +} + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) + diff --git a/src/lj_jit.h b/src/lj_jit.h index 32b3861a..10644724 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -107,7 +107,7 @@ #define JIT_P_sizemcode_DEFAULT 64 #else /* Could go as low as 4K, but the mmap() overhead would be rather high. */ -#define JIT_P_sizemcode_DEFAULT 32 +#define JIT_P_sizemcode_DEFAULT 8 #endif /* Optimization parameters and their defaults. Length is a char in octal! */ diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 163aada4..7ea1fe2f 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -45,6 +45,8 @@ void lj_mcode_sync(void *start, void *end) sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC lj_vm_cachesync(start, end); +#elif LJ_TARGET_NUTTX + up_invalidate_icache_all(); #elif defined(__GNUC__) || defined(__clang__) __clear_cache(start, end); #else @@ -86,6 +88,50 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); } +#elif LJ_TARGET_NUTTX + +#include +#include + +static bool initialized = false; +static struct mm_heap_s *g_mcode_heap; + +#define MCPROT_RW 0 +#define MCPROT_RX 0 +#define MCPROT_RWX 0 + +static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) +{ + UNUSED(J); UNUSED(prot); + + if (!initialized) { + static uint8_t buffer[CONFIG_LUAJIT_MCODE_SIZE] + locate_data(CONFIG_LUAJIT_MCODE_SECTION_NAME); + g_mcode_heap = mm_initialize("mcode", + (void *)buffer, + CONFIG_LUAJIT_MCODE_SIZE); + initialized = true; + } + + void *p = mm_malloc(g_mcode_heap, sz); + if (p == NULL) { + if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); + } + return p; +} + +static void mcode_free(jit_State *J, void *p, size_t sz) +{ + UNUSED(J); UNUSED(sz); + mm_free(g_mcode_heap, p); +} + +static int mcode_setprot(void *p, size_t sz, int prot) +{ + UNUSED(p); UNUSED(sz); UNUSED(prot); + return 0; +} + #elif LJ_TARGET_POSIX #include diff --git a/src/lj_target.h b/src/lj_target.h index 19716928..8cee29ea 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -137,7 +137,11 @@ typedef uint32_t RegCost; #if LJ_TARGET_X86ORX64 #include "lj_target_x86.h" #elif LJ_TARGET_ARM +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) +#include "lj_target_armv7m.h" +#else #include "lj_target_arm.h" +#endif #elif LJ_TARGET_ARM64 #include "lj_target_arm64.h" #elif LJ_TARGET_PPC diff --git a/src/lj_target_armv7m.h b/src/lj_target_armv7m.h new file mode 100755 index 00000000..5dc6d488 --- /dev/null +++ b/src/lj_target_armv7m.h @@ -0,0 +1,315 @@ +/* +** Definitions for ARMv7-M CPUs. +** Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h +** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_ARMV7M_H +#define _LJ_TARGET_ARMV7M_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#define GPRDEF(_) \ + _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ + _(R8) _(R9) _(R10) _(R11) _(R12) _(SP) _(LR) _(PC) +#if LJ_SOFTFP +#define FPRDEF(_) +#else +#define FPRDEF(_) \ + _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ + _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) +#endif +#define VRIDDEF(_) + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + RID_TMP = RID_LR, + + /* Calling conventions. */ + RID_RET = RID_R0, + RID_RETLO = RID_R0, + RID_RETHI = RID_R1, +#if LJ_SOFTFP + RID_FPRET = RID_R0, +#else + RID_FPRET = RID_D0, +#endif + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_R9, /* Interpreter BASE. */ + RID_LPC = RID_R6, /* Interpreter PC. */ + RID_DISPATCH = RID_R7, /* Interpreter DISPATCH table. */ + RID_LREG = RID_R8, /* Interpreter L. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_R0, + RID_MAX_GPR = RID_PC + 1, + RID_MIN_FPR = RID_MAX_GPR, +#if LJ_SOFTFP + RID_MAX_FPR = RID_MIN_FPR, +#else + RID_MAX_FPR = RID_D15 + 1, +#endif + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR +}; + +#define RID_NUM_KREF RID_NUM_GPR +#define RID_MIN_KREF RID_R0 + +/* -- Register sets ------------------------------------------------------- */ + +/* Make use of all registers, except sp, lr and pc. */ +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_R12+1)) +#define RSET_GPREVEN \ + (RID2RSET(RID_R0)|RID2RSET(RID_R2)|RID2RSET(RID_R4)|RID2RSET(RID_R6)| \ + RID2RSET(RID_R8)|RID2RSET(RID_R10)) +#define RSET_GPRODD \ + (RID2RSET(RID_R1)|RID2RSET(RID_R3)|RID2RSET(RID_R5)|RID2RSET(RID_R7)| \ + RID2RSET(RID_R9)|RID2RSET(RID_R11)) +#if LJ_SOFTFP +#define RSET_FPR 0 +#else +#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) +#endif +#define RSET_ALL (RSET_GPR|RSET_FPR) +#define RSET_INIT RSET_ALL + +/* ABI-specific register sets. lr is an implicit scratch register. */ +#define RSET_SCRATCH_GPR_ (RSET_RANGE(RID_R0, RID_R3+1)|RID2RSET(RID_R12)) +#ifdef __APPLE__ +#define RSET_SCRATCH_GPR (RSET_SCRATCH_GPR_|RID2RSET(RID_R9)) +#else +#define RSET_SCRATCH_GPR RSET_SCRATCH_GPR_ +#endif +#if LJ_SOFTFP +#define RSET_SCRATCH_FPR 0 +#else +#define RSET_SCRATCH_FPR (RSET_RANGE(RID_D0, RID_D7+1)) +#endif +#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) +#define REGARG_FIRSTGPR RID_R0 +#define REGARG_LASTGPR RID_R3 +#define REGARG_NUMGPR 4 +#if LJ_ABI_SOFTFP +#define REGARG_FIRSTFPR 0 +#define REGARG_LASTFPR 0 +#define REGARG_NUMFPR 0 +#else +#define REGARG_FIRSTFPR RID_D0 +#define REGARG_LASTFPR RID_D7 +#define REGARG_NUMFPR 8 +#endif + +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the *.dasc file(s). +** +** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. +*/ +#define SPS_FIXED 2 +#define SPS_FIRST 2 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { +#if !LJ_SOFTFP + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ +#endif + int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +/* PC after instruction that caused an exit. Used to find the trace number. */ +#define EXITSTATE_PCREG RID_PC +/* Highest exit + 1 indicates stack check. */ +#define EXITSTATE_CHECKEXIT 1 + +#define EXITSTUB_SPACING 4 +#define EXITSTUBS_PER_GROUP 32 + +/* -- Instructions -------------------------------------------------------- */ + +/* Instruction fields. */ +#define ARMF_CC(ai, cc) ((ai) | ((cc) << 6)) +#define ARMF_N(r) ((r)<<0) +#define ARMF_T(r) ((r)<<28) +#define ARMF_D(r) ((r)<<24) +#define ARMF_M(r) ((r)<<16) +#define ARMF_M2(r) ((r)<<19) // BLXr +#define ARMF_SH(sh, n) (((sh)<<20)|(((n)&0x3)<<22)|((((n)>>2)&0x7)<<28)) +#define ARMF_LSL(n) ((n&0x3)<<20) +#define ARMF_RSH(sh, r) (0xf0000000|((sh)<<5)|ARMF_M(r)) + +/* Instruction compositing */ +#define ARMC_K12(arg1, arg2) (((arg1)^ARMI_K12)| \ + (((arg2)&0xff)<<16)| \ + (((arg2)&0x700)<<20)| \ + (((arg2)&0x800)>>1)) +#define ARMC_B(arg) ((((arg)&0x7ff)<<16)| \ + (((arg)&0x1f800)>>11)| \ + (((arg)&0x20000)<<12)| \ + (((arg)&0x40000)<<9)| \ + (((arg)&0x80000)>>9)) +#define ARMC_BL(arg) ((((arg)&0x7ff)<<16)| \ + (((arg)&0x1ff800)>>11)| \ + (((~(((arg)&0x200000)>>21)&0x1)^((((arg)&0x800000)>>23)&0x1))<<27)| \ + (((~(((arg)&0x400000)>>22)&0x1)^((((arg)&0x800000)>>23)&0x1))<<29)| \ + (((((arg)&0x800000)>>23)&0x1)<<10)) +#define ARMC_BL_READ(ins) (((((ins)&0x07ff0000u)>>16))| \ + (((ins)&0x000003ffu)<<11)| \ + (((~((((ins)&0x08000000u)>>27)^(((ins)&0x00000400u)>>10)))&0x1)<<21)| \ + (((~((((ins)&0x20000000u)>>29)^(((ins)&0x00000400u)>>10)))&0x1)<<22)| \ + ((((ins)&0x00000400u)>>10)<<23)) +#define ARMI_IT(cc) *--as->mcp = (0xbf08bf00u|(((cc)&0xf)<<20)) + + +typedef enum ARMIns { + ARMI_CCAL = 0x000003c0, + ARMI_S = 0x00000010, + ARMI_K12 = 0x00001a00, + + ARMI_LS_W = 0x01000000, + ARMI_LS_U = 0x02000000, + ARMI_LS_P = 0x04000000, + ARMI_LS_1 = 0x08000000, + ARMI_LS_I = 0x00000080, + ARMI_LSX_W = 0x00000020, + ARMI_LSX_U = 0x00000080, + ARMI_LSX_P = 0x00000100, + + ARMI_AND = 0x0000ea00, + ARMI_EOR = 0x0000ea80, + ARMI_SUB = 0x0000eba0, + ARMI_RSB = 0x0000ebc0, + ARMI_ADD = 0x0000eb00, + ARMI_ADC = 0x0000eb40, + ARMI_SBC = 0x0000eb60, + // ARMI_RSC = 0xe0e00000, + ARMI_TST = 0x0f00ea10, + ARMI_TEQ = 0x0f00ea90, + ARMI_CMP = 0x0f00ebb0, + ARMI_CMN = 0x0f00eb10, + ARMI_ORR = 0x0000ea40, + ARMI_MOV = 0x0000ea4f, + ARMI_BIC = 0x0000ea20, + ARMI_MVN = 0x0000ea6f, + ARMI_NOP = 0xbf00bf00, + + ARMI_MUL = 0xf000fb00, + ARMI_SMULL = 0x0000fb80, + + ARMI_LDR = 0x0000f850, + ARMI_LDRB = 0x0000f810, + ARMI_LDRH = 0x0000f830, + ARMI_LDRSB = 0x0000f910, + ARMI_LDRSH = 0x0000f930, + ARMI_LDRD = 0x0000e850, + ARMI_STR = 0x0000f840, + ARMI_STRB = 0x0000f800, + ARMI_STRH = 0x0000f820, + ARMI_STRD = 0x0000e840, + ARMI_PUSH = 0x0000e92d, + + ARMI_B = 0x8000f000, + ARMI_B_T4 = 0x9000f000, + ARMI_BL = 0xd000f000, + ARMI_BLXr = 0x4780bf00, + + /* ARMv6 */ + ARMI_REV = 0xf080fa90, + ARMI_SXTB = 0xf080fa4f, + ARMI_SXTH = 0xf080fa0f, + ARMI_UXTB = 0xf080fa5f, + ARMI_UXTH = 0xf080fa1f, + + /* ARMv6T2 */ + ARMI_MOVW = 0x0000f240, + ARMI_MOVT = 0x0000f2c0, + + /* VFP */ + ARMI_VMOV_D = 0x0b40eeb0, + ARMI_VMOV_S = 0x0a40eeb0, + ARMI_VMOVI_D = 0x0b00eeb0, + + ARMI_VMOV_R_S = 0x0a10ee10, + ARMI_VMOV_S_R = 0x0a10ee00, + ARMI_VMOV_RR_D = 0x0b10ec50, + ARMI_VMOV_D_RR = 0x0b10ec40, + + ARMI_VADD_D = 0x0b00ee30, + ARMI_VSUB_D = 0x0b40ee30, + ARMI_VMUL_D = 0x0b00ee20, + ARMI_VMLA_D = 0x0b00ee00, + ARMI_VMLS_D = 0x0b40ee00, + ARMI_VNMLS_D = 0x0b00ee10, + ARMI_VDIV_D = 0x0b00ee80, + + ARMI_VABS_D = 0x0bc0eeb0, + ARMI_VNEG_D = 0x0b40eeb1, + ARMI_VSQRT_D = 0x0bc0eeb1, + + ARMI_VCMP_D = 0x0b40eeb4, + ARMI_VCMPZ_D = 0x0b40eeb5, + + ARMI_VMRS = 0xfa10eef1, + + ARMI_VCVT_S32_F32 = 0x0ac0eebd, + ARMI_VCVT_S32_F64 = 0x0bc0eebd, + ARMI_VCVT_U32_F32 = 0x0ac0eebc, + ARMI_VCVT_U32_F64 = 0x0bc0eebc, + ARMI_VCVT_F32_S32 = 0x0ac0eeb8, + ARMI_VCVT_F64_S32 = 0x0bc0eeb8, + ARMI_VCVT_F32_U32 = 0x0a40eeb8, + ARMI_VCVT_F64_U32 = 0x0b40eeb8, + ARMI_VCVT_F32_F64 = 0x0bc0eeb7, + ARMI_VCVT_F64_F32 = 0x0ac0eeb7, + + ARMI_VLDR_S = 0x0a00ed10, + ARMI_VLDR_D = 0x0b00ed10, + ARMI_VSTR_S = 0x0a00ed00, + ARMI_VSTR_D = 0x0b00ed00, +} ARMIns; + +typedef enum ARMShift { + ARMSH_LSL, + ARMSH_LSR, + ARMSH_ASR, + ARMSH_ROR +} ARMShift; + +/* ARM condition codes. */ +typedef enum ARMCC { + CC_EQ, + CC_NE, + CC_CS, + CC_CC, + CC_MI, + CC_PL, + CC_VS, + CC_VC, + CC_HI, + CC_LS, + CC_GE, + CC_LT, + CC_GT, + CC_LE, + CC_AL, + CC_HS = CC_CS, + CC_LO = CC_CC +} ARMCC; + +#endif diff --git a/src/vm_armv7m.dasc b/src/vm_armv7m.dasc new file mode 100755 index 00000000..13266007 --- /dev/null +++ b/src/vm_armv7m.dasc @@ -0,0 +1,4901 @@ +|// Low-level VM code for ARMV7M CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2018 Jernej Turnsek. See Copyright Notice in luajit.h +|// Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h +| +|.arch armv7m +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|// Note: The ragged indentation of the instructions is intentional. +|// The starting columns indicate data dependencies. +| +|//----------------------------------------------------------------------- +| +|.macro ldrd_i, rt, rt2, rn, rm +| add rt, rn, rm +| ldm rt, {rt, rt2} +|.endmacro +|.macro ldrd_iw, rt, rt2, rn, rm +| add rn, rn, rm +| ldrd rt, rt2, [rn] +|.endmacro +| +|.macro ldrdlo_i, rt, rt2, rn, rm +| itt lo +| addlo rt, rn, rm +| ldmlo rt, {rt, rt2} +|.endmacro +|.macro ldrdlo_iw, rt, rt2, rn, rm +| itt lo +| addlo rn, rn, rm +| ldrdlo rt, rt2, [rn] +|.endmacro +| +|.macro strd_i, rt, rt2, rn, rm +| add rn, rn, rm +| strd rt, rt2, [rn] +| sub rn, rn, rm +|.endmacro +| +|.macro strdne_i, rt, rt2, rn, rm +| ittt ne +| addne rn, rn, rm +| strdne rt, rt2, [rn] +| subne rn, rn, rm +|.endmacro +|.macro strdls_i, rt, rt2, rn, rm +| ittt ls +| addls rn, rn, rm +| strdls rt, rt2, [rn] +| subls rn, rn, rm +|.endmacro +|.macro strdhi_i, rt, rt2, rn, rm +| ittt hi +| addhi rn, rn, rm +| strdhi rt, rt2, [rn] +| subhi rn, rn, rm +|.endmacro +| +|// Fixed register assignments for the interpreter. +| +|// The following must be C callee-save. +|.define MASKR8, r4 // 255*8 constant for fast bytecode decoding. +|.define KBASE, r5 // Constants of current Lua function. +|.define PC, r6 // Next PC. +|.define DISPATCH,r7 // Opcode dispatch table. +|.define LREG, r8 // Register holding lua_State (also in SAVE_L). +| +|// C callee-save in EABI, but often refetched. Temporary in iOS 3.0+. +|.define BASE, r9 // Base of current Lua stack frame. +| +|// The following temporaries are not saved across C calls, except for RA/RC. +|.define RA, r10 // Callee-save. +|.define RC, r11 // Callee-save. +|.define RB, r12 +|.define OP, r12 // Overlaps RB, must not be lr. +|.define INS, lr +| +|// Calling conventions. Also used as temporaries. +|.define CARG1, r0 +|.define CARG2, r1 +|.define CARG3, r2 +|.define CARG4, r3 +| +|.define CRET1, r0 +|.define CRET2, r1 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.define SAVE_R4, [sp, #28] +|.define CFRAME_SPACE, #28 +|.define SAVE_ERRF, [sp, #24] +|.define SAVE_NRES, [sp, #20] +|.define SAVE_CFRAME, [sp, #16] +|.define SAVE_L, [sp, #12] +|.define SAVE_PC, [sp, #8] +|.define SAVE_MULTRES, [sp, #4] +|.define ARG5, [sp] +| +|.define TMPDhi, [sp, #4] +|.define TMPDlo, [sp] +|.define TMPD, [sp] +|.define TMPDp, sp +| +|.if FPU +|.macro saveregs +| push {r5, r6, r7, r8, r9, r10, r11, lr} +| vpush {d8-d15} +| sub sp, sp, CFRAME_SPACE+4 +| str r4, SAVE_R4 +|.endmacro +|.macro restoreregs_ret +| ldr r4, SAVE_R4 +| add sp, sp, CFRAME_SPACE+4 +| vpop {d8-d15} +| pop {r5, r6, r7, r8, r9, r10, r11, pc} +|.endmacro +|.else +|.macro saveregs +| push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +| sub sp, sp, CFRAME_SPACE +|.endmacro +|.macro restoreregs_ret +| add sp, sp, CFRAME_SPACE +| pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +|.endmacro +|.endif +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; bkpt #0; .endmacro +| +|//----------------------------------------------------------------------- +| +|// Access to frame relative to BASE. +|.define FRAME_FUNC, #-8 +|.define FRAME_PC, #-4 +| +|.macro decode_RA8, dst, ins; and dst, MASKR8, ins, lsr #5; .endmacro +|.macro decode_RB8, dst, ins; and dst, MASKR8, ins, lsr #21; .endmacro +|.macro decode_RC8, dst, ins; and dst, MASKR8, ins, lsr #13; .endmacro +|.macro decode_RD, dst, ins; lsr dst, ins, #16; .endmacro +|.macro decode_OP, dst, ins; and dst, ins, #255; .endmacro +| +|// Instruction fetch. +|.macro ins_NEXT1 +| ldrb OP, [PC] +|.endmacro +|.macro ins_NEXT2 +| ldr INS, [PC], #4 +|.endmacro +|// Instruction decode+dispatch. +|.macro ins_NEXT3 +| ldr OP, [DISPATCH, OP, lsl #2] +| decode_RA8 RA, INS +| decode_RD RC, INS +| bx OP +|.endmacro +|.macro ins_NEXT +| ins_NEXT1 +| ins_NEXT2 +| ins_NEXT3 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +| .define ins_next1, ins_NEXT1 +| .define ins_next2, ins_NEXT2 +| .define ins_next3, ins_NEXT3 +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| b ->ins_next +| .endmacro +| .macro ins_next1 +| .endmacro +| .macro ins_next2 +| .endmacro +| .macro ins_next3 +| b ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Avoid register name substitution for field name. +#define field_pc pc +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ldr PC, LFUNC:CARG3->field_pc +| ldrb OP, [PC] // STALL: load PC. early PC. +| ldr INS, [PC], #4 +| ldr OP, [DISPATCH, OP, lsl #2] // STALL: load OP. early OP. +| decode_RA8 RA, INS +| add RA, RA, BASE +| bx OP +|.endmacro +| +|.macro ins_call +| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| str PC, [BASE, FRAME_PC] +| ins_callt // STALL: locked PC. +|.endmacro +| +|//----------------------------------------------------------------------- +| +|// Macros to test operand types. +|.macro checktp, reg, tp; cmn reg, #-tp; .endmacro +|.macro checktpeq, reg, tp; it eq; cmneq reg, #-tp; .endmacro +|.macro checktpne, reg, tp; it ne; cmnne reg, #-tp; .endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR; bne target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB; bne target; .endmacro +|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC; bne target; .endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro hotcheck, delta +| lsr CARG1, PC, #1 +| and CARG1, CARG1, #126 +| sub CARG1, CARG1, #-GG_DISP2HOT +| ldrh CARG2, [DISPATCH, CARG1] +| subs CARG2, CARG2, #delta +| strh CARG2, [DISPATCH, CARG1] +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP +| blo ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL +| blo ->vm_hotcall +|.endmacro +| +|// Set current VM state. +|.macro mv_vmstate, reg, st; mvn reg, #LJ_VMST_..st; .endmacro +|.macro st_vmstate, reg; push {r12}; sub r12, DISPATCH, #-DISPATCH_GL(vmstate); str reg, [r12]; pop {r12}; .endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp +| sub tmp, DISPATCH, #-DISPATCH_GL(gc.grayagain) +| ldr tmp, [tmp] +| str tmp, tab->gclist +| sub tmp, DISPATCH, #-DISPATCH_GL(gc.grayagain) +| bic mark, mark, #LJ_GC_BLACK // black2gray(tab) +| str tab, [tmp] +| strb mark, tab->marked +|.endmacro +| +|.macro .IOS, a, b +|.if IOS +| a, b +|.endif +|.endmacro +| +|//----------------------------------------------------------------------- + +#if !LJ_DUALNUM +#error "Only dual-number mode supported for ARM target" +#endif + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: RB = previous base. + | tst PC, #FRAME_P + | beq ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. + | mvn CARG2, #~LJ_TTRUE + | mov BASE, RB + | // Prepending may overwrite the pcall frame, so do it at the end. + | str CARG2, [RA, FRAME_PC] // Prepend true to results. + | sub RA, RA, #8 + | + |->vm_returnc: + | adds RC, RC, #8 // RC = (nresults+1)*8. + | mov CRET1, #LUA_YIELD + | beq ->vm_unwind_c_eh + | str RC, SAVE_MULTRES + | ands CARG1, PC, #FRAME_TYPE + | beq ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return + | // CARG1 = PC & FRAME_TYPE + | bic RB, PC, #FRAME_TYPEP + | cmp CARG1, #FRAME_C + | sub RB, BASE, RB // RB = previous base. + | bne ->vm_returnp + | + | str RB, L->base + | ldr KBASE, SAVE_NRES + | mv_vmstate CARG4, C + | sub BASE, BASE, #8 + | subs CARG3, RC, #8 + | lsl KBASE, KBASE, #3 // KBASE = (nresults_wanted+1)*8 + | st_vmstate CARG4 + | beq >2 + |1: + | subs CARG3, CARG3, #8 + | ldrd CARG1, CARG2, [RA], #8 + | strd CARG1, CARG2, [BASE], #8 + | bne <1 + |2: + | cmp KBASE, RC // More/less results wanted? + | bne >6 + |3: + | str BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ldr RC, SAVE_CFRAME // Restore previous C frame. + | mov CRET1, #0 // Ok return status for vm_pcall. + | str RC, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | blt >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | ldr CARG3, L->maxstack + | mvn CARG2, #~LJ_TNIL + | cmp BASE, CARG3 + | bhs >8 + | str CARG2, [BASE, #4] + | add RC, RC, #8 + | add BASE, BASE, #8 + | b <2 + | + |7: // Less results wanted. + | sub CARG1, RC, KBASE + | cmp KBASE, #0 // LUA_MULTRET+1 case? + | it ne + | subne BASE, BASE, CARG1 // Either keep top or shrink it. + | b <3 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | str BASE, L->top // Save current top held in BASE (yes). + | lsr CARG2, KBASE, #3 + | mov CARG1, L + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->top // Need the (realloced) L->top in BASE. + | b <2 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mov sp, CARG1 + | mov CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ldr L, SAVE_L + | mv_vmstate CARG4, C + | ldr GL:CARG3, L->glref + | str CARG4, GL:CARG3->vmstate + | b ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | bic CARG1, CARG1, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. + | mov sp, CARG1 + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ldr L, SAVE_L + | mov MASKR8, #255 + | mov RC, #16 // 2 results: false + error message. + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | ldr BASE, L->base + | ldr DISPATCH, L->glref // Setup pointer to dispatch table. + | mvn CARG1, #~LJ_TFALSE + | sub RA, BASE, #8 // Results start at BASE-8. + | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. + | add DISPATCH, DISPATCH, #GG_G2DISP + | mv_vmstate CARG2, INTERP + | str CARG1, [BASE, #-4] // Prepend false to error message. + | st_vmstate CARG2 + | b ->vm_returnc + | + |->vm_unwind_ext: // Complete external unwind. +#if !LJ_NO_UNWIND + | push {r0, r1, r2, lr} + | bl extern _Unwind_Complete + | ldr r0, [sp] + | bl extern _Unwind_DeleteException + | pop {r0, r1, r2, lr} + | mov r0, r1 + | bx r2 +#endif + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | // CARG1 = L + | mov CARG2, #LUA_MINSTACK + | b >2 + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | add RC, BASE, RC + | sub RA, RA, BASE + | mov CARG1, L + | str BASE, L->base + | add PC, PC, #4 // Must point after first instruction. + | str RC, L->top + | lsr CARG2, RA, #3 + |2: + | // L->base = new base, L->top = top + | str PC, SAVE_PC + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->base + | ldr RC, L->top + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | sub NARGS8:RC, RC, BASE + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mov L, CARG1 + | ldr DISPATCH, L:CARG1->glref // Setup pointer to dispatch table. + | mov BASE, CARG2 + | add DISPATCH, DISPATCH, #GG_G2DISP + | str L, SAVE_L + | mov PC, #FRAME_CP + | str CARG3, SAVE_NRES + | add CARG2, sp, #CFRAME_RESUME + | ldrb CARG1, L->status + | str CARG3, SAVE_ERRF + | str L, SAVE_PC // Any value outside of bytecode is ok. + | str CARG3, SAVE_CFRAME + | cmp CARG1, #0 + | str CARG2, L->cframe + | beq >3 + | + | // Resume after yield (like a return). + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | mov RA, BASE + | ldr BASE, L->base + | ldr CARG1, L->top + | mov MASKR8, #255 + | strb CARG3, L->status + | sub RC, CARG1, BASE + | ldr PC, [BASE, FRAME_PC] + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | mv_vmstate CARG2, INTERP + | add RC, RC, #8 + | ands CARG1, PC, #FRAME_TYPE + | st_vmstate CARG2 + | str RC, SAVE_MULTRES + | beq ->BC_RET_Z + | b ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | mov PC, #FRAME_CP + | str CARG4, SAVE_ERRF + | b >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | mov PC, #FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ldr RC, L:CARG1->cframe + | str CARG3, SAVE_NRES + | mov L, CARG1 + | str CARG1, SAVE_L + | ldr DISPATCH, L->glref // Setup pointer to dispatch table. + | mov BASE, CARG2 + | str CARG1, SAVE_PC // Any value outside of bytecode is ok. + | str RC, SAVE_CFRAME + | add DISPATCH, DISPATCH, #GG_G2DISP + | str sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | ldr RB, L->base // RB = old base (for vmeta_call). + | ldr CARG1, L->top + | mov MASKR8, #255 + | add PC, PC, BASE + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | sub PC, PC, RB // PC = frame delta + frame type + | mv_vmstate CARG2, INTERP + | sub NARGS8:RC, CARG1, BASE + | st_vmstate CARG2 + | + |->vm_call_dispatch: + | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ldrd CARG3, CARG4, [BASE, FRAME_FUNC] + | checkfunc CARG4, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | mov L, CARG1 + | ldr RA, L:CARG1->stack + | str CARG1, SAVE_L + | ldr DISPATCH, L->glref // Setup pointer to dispatch table. + | ldr RB, L->top + | str CARG1, SAVE_PC // Any value outside of bytecode is ok. + | ldr RC, L->cframe + | add DISPATCH, DISPATCH, #GG_G2DISP + | sub RA, RA, RB // Compute -savestack(L, L->top). + | mov RB, #0 + | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. + | str RB, SAVE_ERRF // No error function. + | str RC, SAVE_CFRAME + | str sp, L->cframe // Add our C frame to cframe chain. + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | movs BASE, CRET1 + | mov PC, #FRAME_CP + | bne <3 // Else continue with the call. + | b ->vm_leave_cp // No base? Just remove C frame. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 + | ldr LFUNC:CARG3, [RB, FRAME_FUNC] + | ldr CARG1, [BASE, #-16] // Get continuation. + | mov CARG4, BASE + | mov BASE, RB // Restore caller BASE. + |.if FFI + | cmp CARG1, #1 + |.endif + | ldr PC, [CARG4, #-12] // Restore PC from [cont|PC]. + | mvn INS, #~LJ_TNIL + | add CARG2, RA, RC + | str INS, [CARG2, #-4] // Ensure one valid arg. + |.if FFI + | bls >1 + |.endif + | ldr CARG3, LFUNC:CARG3->field_pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | // BASE = base, RA = resultptr, CARG4 = meta base + | bx CARG1 + | + |.if FFI + |1: + | beq ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + | sub CARG4, CARG4, #16 + | sub RC, CARG4, BASE + | b ->vm_call_tail + |.endif + | + |->cont_cat: // RA = resultptr, CARG4 = meta base + | ldr INS, [PC, #-4] + | sub CARG2, CARG4, #16 + | ldrd CARG3, CARG4, [RA] + | str BASE, L->base + | decode_RB8 RC, INS + | decode_RA8 RA, INS + | add CARG1, BASE, RC + | subs CARG1, CARG2, CARG1 + | itt ne + | strdne CARG3, CARG4, [CARG2] + | movne CARG3, CARG1 + | bne ->BC_CAT_Z + | strd_i CARG3, CARG4, BASE, RA + | b ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | add CARG2, BASE, RB + | b >2 + | + |->vmeta_tgets: + | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) + | mvn CARG4, #~LJ_TTAB + | str TAB:RB, [CARG2] + | str CARG4, [CARG2, #4] + |2: + | mvn CARG4, #~LJ_TSTR + | str STR:RC, TMPDlo + | str CARG4, TMPDhi + | mov CARG3, TMPDp + | b >1 + | + |->vmeta_tgetb: // RC = index + | decode_RB8 RB, INS + | str RC, TMPDlo + | mvn CARG4, #~LJ_TISNUM + | add CARG2, BASE, RB + | str CARG4, TMPDhi + | mov CARG3, TMPDp + | b >1 + | + |->vmeta_tgetv: + | add CARG2, BASE, RB + | add CARG3, BASE, RC + |1: + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | .IOS ldr BASE, L->base + | cmp CRET1, #0 + | beq >3 + | ldrd CARG3, CARG4, [CRET1] + | ins_next1 + | ins_next2 + | strd_i CARG3, CARG4, BASE, RA + | ins_next3 + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | rsb CARG1, BASE, #FRAME_CONT + | ldr BASE, L->top + | mov NARGS8:RC, #16 // 2 args for func(t, k). + | str PC, [BASE, #-12] // [cont|PC] + | add PC, CARG1, BASE + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | b ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | .IOS mov RC, BASE + | bl extern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | .IOS mov BASE, RC + | cmp CRET1, #0 + | ite ne + | ldrdne CARG1, CARG2, [CRET1] + | mvneq CARG2, #~LJ_TNIL + | b ->BC_TGETR_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | add CARG2, BASE, RB + | b >2 + | + |->vmeta_tsets: + | sub CARG2, DISPATCH, #-DISPATCH_GL(tmptv) + | mvn CARG4, #~LJ_TTAB + | str TAB:RB, [CARG2] + | str CARG4, [CARG2, #4] + |2: + | mvn CARG4, #~LJ_TSTR + | str STR:RC, TMPDlo + | str CARG4, TMPDhi + | mov CARG3, TMPDp + | b >1 + | + |->vmeta_tsetb: // RC = index + | decode_RB8 RB, INS + | str RC, TMPDlo + | mvn CARG4, #~LJ_TISNUM + | add CARG2, BASE, RB + | str CARG4, TMPDhi + | mov CARG3, TMPDp + | b >1 + | + |->vmeta_tsetv: + | add CARG2, BASE, RB + | add CARG3, BASE, RC + |1: + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | .IOS ldr BASE, L->base + | cmp CRET1, #0 + | ldrd_i CARG3, CARG4, BASE, RA + | beq >3 + | ins_next1 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | strd CARG3, CARG4, [CRET1] + | ins_next2 + | ins_next3 + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | rsb CARG1, BASE, #FRAME_CONT + | ldr BASE, L->top + | mov NARGS8:RC, #24 // 3 args for func(t, k, v). + | strd CARG3, CARG4, [BASE, #16] // Copy value to third argument. + | str PC, [BASE, #-12] // [cont|PC] + | add PC, CARG1, BASE + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | b ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | str BASE, L->base + | .IOS mov RC, BASE + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. + | .IOS mov BASE, RC + | b ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | mov CARG1, L + | sub PC, PC, #4 + | mov CARG2, RA + | str BASE, L->base + | mov CARG3, RC + | str PC, SAVE_PC + | decode_OP CARG4, INS + | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // Returns 0/1 or TValue * (metamethod). + |3: + | .IOS ldr BASE, L->base + | cmp CRET1, #1 + | bhi ->vmeta_binop + |4: + | ldrh RB, [PC, #2] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | it hs + | subhs PC, RB, #0x20000 + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | ldr INS, [PC, #-4] + | ldrd CARG1, CARG2, [RA] + | decode_RA8 CARG3, INS + | strd_i CARG1, CARG2, BASE, CARG3 + | b ->cont_nop + | + |->cont_condt: // RA = resultptr + | ldr CARG2, [RA, #4] + | mvn CARG1, #~LJ_TTRUE + | cmp CARG1, CARG2 // Branch if result is true. + | b <4 + | + |->cont_condf: // RA = resultptr + | ldr CARG2, [RA, #4] + | checktp CARG2, LJ_TFALSE // Branch if result is false. + | b <4 + | + |->vmeta_equal: + | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // Returns 0/1 or TValue * (metamethod). + | b <3 + | + |->vmeta_equal_cd: + |.if FFI + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | mov CARG2, INS + | str PC, SAVE_PC + | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |.endif + | + |->vmeta_istype: + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | lsr CARG2, RA, #3 + | mov CARG3, RC + | str PC, SAVE_PC + | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | .IOS ldr BASE, L->base + | b ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vn: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | add CARG3, BASE, RB + | add CARG4, KBASE, RC + | b >1 + | + |->vmeta_arith_nv: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | add CARG4, BASE, RB + | add CARG3, KBASE, RC + | b >1 + | + |->vmeta_unm: + | ldr INS, [PC, #-8] + | sub PC, PC, #4 + | add CARG3, BASE, RC + | add CARG4, BASE, RC + | b >1 + | + |->vmeta_arith_vv: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | add CARG3, BASE, RB + | add CARG4, BASE, RC + |1: + | decode_OP OP, INS + | add CARG2, BASE, RA + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | str OP, ARG5 + | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // Returns NULL (finished) or TValue * (metamethod). + | .IOS ldr BASE, L->base + | cmp CRET1, #0 + | beq ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | sub CARG2, CRET1, BASE + | str PC, [CRET1, #-12] // [cont|PC] + | add PC, CARG2, #FRAME_CONT + | mov BASE, CRET1 + | mov NARGS8:RC, #16 // 2 args for func(o1, o2). + | b ->vm_call_dispatch + | + |->vmeta_len: + | add CARG2, BASE, RC + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). + | .IOS ldr BASE, L->base +#if LJ_52 + | cmp CRET1, #0 + | bne ->vmeta_binop // Binop call for compatibility. + | ldr TAB:CARG1, [BASE, RC] + | b ->BC_LEN_Z +#else + | b ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // RB = old base, BASE = new base, RC = nargs*8 + | mov CARG1, L + | str RB, L->base // This is the callers base! + | sub CARG2, BASE, #8 + | str PC, SAVE_PC + | add CARG3, BASE, NARGS8:RC + | .IOS mov RA, BASE + | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | .IOS mov BASE, RA + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | mov CARG1, L + | str BASE, L->base + | sub CARG2, RA, #8 + | str PC, SAVE_PC + | add CARG3, RA, NARGS8:RC + | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | .IOS ldr BASE, L->base + | ldr LFUNC:CARG3, [RA, FRAME_FUNC] // Guaranteed to be a function here. + | ldr PC, [BASE, FRAME_PC] + | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. + | b ->BC_CALLT2_Z + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mov CARG1, L + | str BASE, L->base + | mov CARG2, RA + | str PC, SAVE_PC + | bl extern lj_meta_for // (lua_State *L, TValue *base) + | .IOS ldr BASE, L->base + |.if JIT + | ldrb OP, [PC, #-4] + |.endif + | ldr INS, [PC, #-4] + |.if JIT + | cmp OP, #BC_JFORI + |.endif + | decode_RA8 RA, INS + | decode_RD RC, INS + |.if JIT + | beq =>BC_JFORI + |.endif + | b =>BC_FORI + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | ldrd CARG1, CARG2, [BASE] + | cmp NARGS8:RC, #8 + | blo ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | ldrd CARG1, CARG2, [BASE] + | ldrd CARG3, CARG4, [BASE, #8] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc_1 name + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc_2 name + | checktp CARG2, LJ_TISNUM + | it lo + | cmnlo CARG4, #-LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |.macro .ffunc_d, name + | .ffunc name + | ldr CARG2, [BASE, #4] + | cmp NARGS8:RC, #8 + | vldr d0, [BASE] + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |.macro .ffunc_dd, name + | .ffunc name + | ldr CARG2, [BASE, #4] + | ldr CARG4, [BASE, #12] + | cmp NARGS8:RC, #16 + | vldr d0, [BASE] + | vldr d1, [BASE, #8] + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | it lo + | cmnlo CARG4, #-LJ_TISNUM + | bhs ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. + |.macro ffgccheck + | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.total) + | ldr CARG1, [CARG1] + | sub CARG2, DISPATCH, #-DISPATCH_GL(gc.threshold) + | ldr CARG2, [CARG2] + | cmp CARG1, CARG2 + | it ge + | blge ->fff_gcstep + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | checktp CARG2, LJ_TTRUE + | bhi ->fff_fallback + | ldr PC, [BASE, FRAME_PC] + | strd CARG1, CARG2, [BASE, #-8] + | mov RB, BASE + | subs RA, NARGS8:RC, #8 + | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. + | beq ->fff_res // Done if exactly 1 argument. + |1: + | ldrd CARG1, CARG2, [RB, #8] + | subs RA, RA, #8 + | strd CARG1, CARG2, [RB], #8 + | bne <1 + | b ->fff_res + | + |.ffunc type + | ldr CARG2, [BASE, #4] + | cmp NARGS8:RC, #8 + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | it lo + | mvnlo CARG2, #~LJ_TISNUM + | rsb CARG4, CARG2, #(int)(offsetof(GCfuncC, upvalue)>>3)-1 + | lsl CARG4, CARG4, #3 + | ldrd_i CARG1, CARG2, CFUNC:CARG3, CARG4 + | b ->fff_restv + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | checktp CARG2, LJ_TTAB + | it ne + | cmnne CARG2, #-LJ_TUDATA + | bne >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ldr TAB:RB, TAB:CARG1->metatable + |2: + | mvn CARG2, #~LJ_TNIL + | ldr STR:RC, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])] + | cmp TAB:RB, #0 + | beq ->fff_restv + | ldr CARG3, TAB:RB->hmask + | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:RB->node + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 + |3: // Rearranged logic, because we expect _not_ to find the key. + | ldrd CARG3, CARG4, NODE:INS->key // STALL: early NODE:INS. + | ldrd CARG1, CARG2, NODE:INS->val + | ldr NODE:INS, NODE:INS->next + | checktp CARG4, LJ_TSTR + | it eq + | cmpeq CARG3, STR:RC + | beq >5 + | cmp NODE:INS, #0 + | bne <3 + |4: + | mov CARG1, RB // Use metatable as default result. + | mvn CARG2, #~LJ_TTAB + | b ->fff_restv + |5: + | checktp CARG2, LJ_TNIL + | bne ->fff_restv + | b <4 + | + |6: + | checktp CARG2, LJ_TISNUM + | ite hs + | mvnhs CARG2, CARG2 + | movlo CARG2, #~LJ_TISNUM + | add CARG4, DISPATCH, CARG2, lsl #2 + | ldr TAB:RB, [CARG4, #DISPATCH_GL(gcroot[GCROOT_BASEMT])] + | b <2 + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp CARG2, LJ_TTAB + | it eq + | ldreq TAB:RB, TAB:CARG1->metatable + | checktpeq CARG4, LJ_TTAB + | it eq + | ldrbeq CARG4, TAB:CARG1->marked + | it eq + | cmpeq TAB:RB, #0 + | bne ->fff_fallback + | tst CARG4, #LJ_GC_BLACK // isblack(table) + | str TAB:CARG3, TAB:CARG1->metatable + | beq ->fff_restv + | barrierback TAB:CARG1, CARG4, CARG3 + | b ->fff_restv + | + |.ffunc rawget + | ldrd CARG3, CARG4, [BASE] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | mov CARG2, CARG3 + | checktab CARG4, ->fff_fallback + | mov CARG1, L + | add CARG3, BASE, #8 + | .IOS mov RA, BASE + | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | .IOS mov BASE, RA + | ldrd CARG1, CARG2, [CRET1] + | b ->fff_restv + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ldrd CARG1, CARG2, [BASE] + | cmp NARGS8:RC, #8 + | bne ->fff_fallback + | checktp CARG2, LJ_TISNUM + | bls ->fff_restv + | b ->fff_fallback + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | checktp CARG2, LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | beq ->fff_restv + | // Handle numbers inline, unless a number base metatable is present. + | ldr CARG4, [DISPATCH, #DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])] + | str BASE, L->base + | checktp CARG2, LJ_TISNUM + | it ls + | cmpls CARG4, #0 + | str PC, SAVE_PC // Redundant (but a defined value). + | bhi ->fff_fallback + | ffgccheck + | mov CARG1, L + | mov CARG2, BASE + | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | ldr BASE, L->base + | mvn CARG2, #~LJ_TSTR + | b ->fff_restv + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | mvn CARG4, #~LJ_TNIL + | checktab CARG2, ->fff_fallback + | strd_i CARG3, CARG4, BASE, NARGS8:RC // Set missing 2nd arg to nil. + | ldr PC, [BASE, FRAME_PC] + | add CARG2, BASE, #8 + | sub CARG3, BASE, #8 + | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | .IOS ldr BASE, L->base + | cmp CRET1, #0 + | mov RC, #(2+1)*8 + | bgt ->fff_res // Found key/value. + | bmi ->fff_fallback // Invalid key. + | // End of traversal: return nil. + | mvn CRET2, #~LJ_TNIL + | b ->fff_restv + | + |.ffunc_1 pairs + | checktab CARG2, ->fff_fallback +#if LJ_52 + | ldr TAB:RB, TAB:CARG1->metatable +#endif + | ldrd CFUNC:CARG3, CFUNC:CARG4, CFUNC:CARG3->upvalue[0] + | ldr PC, [BASE, FRAME_PC] +#if LJ_52 + | cmp TAB:RB, #0 + | bne ->fff_fallback +#endif + | mvn CARG2, #~LJ_TNIL + | mov RC, #(3+1)*8 + | strd CFUNC:CARG3, CFUNC:CARG4, [BASE, #-8] + | str CARG2, [BASE, #12] + | b ->fff_res + | + |.ffunc_2 ipairs_aux + | checktp CARG2, LJ_TTAB + | checktpeq CARG4, LJ_TISNUM + | bne ->fff_fallback + | ldr RB, TAB:CARG1->asize + | ldr RC, TAB:CARG1->array + | add CARG3, CARG3, #1 + | ldr PC, [BASE, FRAME_PC] + | cmp CARG3, RB + | add RC, RC, CARG3, lsl #3 + | strd CARG3, CARG4, [BASE, #-8] + | it lo + | ldrdlo CARG1, CARG2, [RC] + | mov RC, #(0+1)*8 + | bhs >2 // Not in array part? + |1: + | checktp CARG2, LJ_TNIL + | itt ne + | movne RC, #(2+1)*8 + | strdne CARG1, CARG2, [BASE] + | b ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | ldr RB, TAB:CARG1->hmask + | mov CARG2, CARG3 + | cmp RB, #0 + | beq ->fff_res + | .IOS mov RA, BASE + | bl extern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | .IOS mov BASE, RA + | cmp CRET1, #0 + | beq ->fff_res + | ldrd CARG1, CARG2, [CRET1] + | b <1 + | + |.ffunc_1 ipairs + | checktab CARG2, ->fff_fallback +#if LJ_52 + | ldr TAB:RB, TAB:CARG1->metatable +#endif + | ldrd CFUNC:CARG3, CFUNC:CARG4, CFUNC:CARG3->upvalue[0] + | ldr PC, [BASE, FRAME_PC] +#if LJ_52 + | cmp TAB:RB, #0 + | bne ->fff_fallback +#endif + | mov CARG1, #0 + | mvn CARG2, #~LJ_TISNUM + | mov RC, #(3+1)*8 + | strd CFUNC:CARG3, CFUNC:CARG4, [BASE, #-8] + | strd CARG1, CARG2, [BASE, #8] + | b ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | sub RA, DISPATCH, #-DISPATCH_GL(hookmask) + | ldrb RA, [RA] + | cmp NARGS8:RC, #8 + | blo ->fff_fallback + | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. + | mov RB, BASE + | add BASE, BASE, #8 + | ite eq + | moveq PC, #8+FRAME_PCALL + | movne PC, #8+FRAME_PCALLH + | sub NARGS8:RC, NARGS8:RC, #8 + | b ->vm_call_dispatch + | + |.ffunc_2 xpcall + | sub RA, DISPATCH, #-DISPATCH_GL(hookmask) + | ldrb RA, [RA] + | checkfunc CARG4, ->fff_fallback // Traceback must be a function. + | mov RB, BASE + | strd CARG1, CARG2, [BASE, #8] // Swap function and traceback. + | strd CARG3, CARG4, [BASE] + | tst RA, #HOOK_ACTIVE // Remember active hook before pcall. + | add BASE, BASE, #16 + | ite eq + | moveq PC, #16+FRAME_PCALL + | movne PC, #16+FRAME_PCALLH + | sub NARGS8:RC, NARGS8:RC, #16 + | b ->vm_call_dispatch + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG2, LJ_TTHREAD + | bne ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr + |.endif + | ldr PC, [BASE, FRAME_PC] + | str BASE, L->base + | ldr CARG2, L:CARG1->top + | ldrb RA, L:CARG1->status + | ldr RB, L:CARG1->base + | add CARG3, CARG2, NARGS8:RC + | add CARG4, CARG2, RA + | str PC, SAVE_PC + | cmp CARG4, RB + | beq ->fff_fallback + | ldr CARG4, L:CARG1->maxstack + | ldr RB, L:CARG1->cframe + | cmp RA, #LUA_YIELD + | it ls + | cmpls CARG3, CARG4 + | it ls + | cmpls RB, #0 + | bhi ->fff_fallback + |1: + |.if resume + | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. + | add BASE, BASE, #8 + | sub NARGS8:RC, NARGS8:RC, #8 + |.endif + | str CARG3, L:CARG1->top + | str BASE, L->top + |2: // Move args to coroutine. + | ldrd_i CARG3, CARG4, BASE, RB + | cmp RB, NARGS8:RC + | strdne_i CARG3, CARG4, CARG2, RB + | add RB, RB, #8 + | bne <2 + | + | mov CARG3, #0 + | mov L:RA, L:CARG1 + | mov CARG4, #0 + | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | // Returns thread status. + |4: + | ldr CARG3, L:RA->base + | mv_vmstate CARG2, INTERP + | ldr CARG4, L:RA->top + | cmp CRET1, #LUA_YIELD + | ldr BASE, L->base + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | st_vmstate CARG2 + | bhi >8 + | subs RC, CARG4, CARG3 + | ldr CARG1, L->maxstack + | add CARG2, BASE, RC + | beq >6 // No results? + | cmp CARG2, CARG1 + | mov RB, #0 + | bhi >9 // Need to grow stack? + | + | sub CARG4, RC, #8 + | str CARG3, L:RA->top // Clear coroutine stack. + |5: // Move results from coroutine. + | ldrd_i CARG1, CARG2, CARG3, RB + | cmp RB, CARG4 + | strd_i CARG1, CARG2, BASE, RB + | add RB, RB, #8 + | bne <5 + |6: + |.if resume + | mvn CARG3, #~LJ_TTRUE + | add RC, RC, #16 + |7: + | str CARG3, [BASE, #-4] // Prepend true/false to results. + | sub RA, BASE, #8 + |.else + | mov RA, BASE + | add RC, RC, #8 + |.endif + | ands CARG1, PC, #FRAME_TYPE + | str PC, SAVE_PC + | str RC, SAVE_MULTRES + | beq ->BC_RET_Z + | b ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | ldrd CARG1, CARG2, [CARG4, #-8]! + | mvn CARG3, #~LJ_TFALSE + | mov RC, #(2+1)*8 + | str CARG4, L:RA->top // Remove error from coroutine stack. + | strd CARG1, CARG2, [BASE] // Copy error message. + | b <7 + |.else + | mov CARG1, L + | mov CARG2, L:RA + | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Never returns. + |.endif + | + |9: // Handle stack expansion on return from yield. + | mov CARG1, L + | lsr CARG2, RC, #3 + | bl extern lj_state_growstack // (lua_State *L, int n) + | mov CRET1, #0 + | b <4 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ldr CARG1, L->cframe + | add CARG2, BASE, NARGS8:RC + | str BASE, L->base + | tst CARG1, #CFRAME_RESUME + | str CARG2, L->top + | mov CRET1, #LUA_YIELD + | mov CARG3, #0 + | beq ->fff_fallback + | str CARG3, L->cframe + | strb CRET1, L->status + | b ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.macro math_round, func + | .ffunc_1 math_ .. func + | checktp CARG2, LJ_TISNUM + | beq ->fff_restv + | bhi ->fff_fallback + | // Round FP value and normalize result. + | lsl CARG3, CARG2, #1 + | adds RB, CARG3, #0x00200000 + | bpl >2 // |x| < 1? + | mvn CARG4, #0x3e0 + | subs RB, CARG4, RB, asr #21 + | lsl CARG4, CARG2, #11 + | lsl CARG3, CARG1, #11 + | orr CARG4, CARG4, #0x80000000 + | rsb INS, RB, #32 + | orr CARG4, CARG4, CARG1, lsr #21 + | bls >3 // |x| >= 2^31? + | lsl CARG1, CARG4, INS + | orr CARG3, CARG3, CARG1 + | lsr CARG1, CARG4, RB + |.if "func" == "floor" + | tst CARG3, CARG2, asr #31 + | it ne + | addne CARG1, CARG1, #1 + |.else + | bics CARG3, CARG3, CARG2, asr #31 + | it ne + | addsne CARG1, CARG1, #1 + | it vs + | ldrdvs CARG1, CARG2, >9 + | bvs ->fff_restv + |.endif + | cmp CARG2, #0 + | it lt + | rsblt CARG1, CARG1, #0 + |1: + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + | + |2: // |x| < 1 + | bcs ->fff_restv // |x| is not finite. + | orr CARG3, CARG3, CARG1 // ztest = abs(hi) | lo + |.if "func" == "floor" + | tst CARG3, CARG2, asr #31 // return (ztest & sign) == 0 ? 0 : -1 + | ite eq + | moveq CARG1, #0 + | mvnne CARG1, #0 + |.else + | bics CARG3, CARG3, CARG2, asr #31 // return (ztest & ~sign) == 0 ? 0 : 1 + | ite eq + | moveq CARG1, #0 + | movne CARG1, #1 + |.endif + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + | + |3: // |x| >= 2^31. Check for x == -(2^31). + | it eq + | cmpeq CARG4, #0x80000000 + |.if "func" == "floor" + | it eq + | cmpeq CARG3, #0 + |.endif + | bne >4 + | cmp CARG2, #0 + | it mi + | movmi CARG1, #0x80000000 + | bmi <1 + |4: + | bl ->vm_..func.._sf + | b ->fff_restv + |.endmacro + | + | math_round floor + | math_round ceil + | + |.align 8 + |9: + | .long 0x00000000, 0x000041e0 // 2^31. jturnsek: swaped halfwords!!! + | + |.ffunc_1 math_abs + | checktp CARG2, LJ_TISNUM + | bhi ->fff_fallback + | it ne + | bicne CARG2, CARG2, #0x80000000 + | bne ->fff_restv + | cmp CARG1, #0 + | it lt + | rsbslt CARG1, CARG1, #0 + | it vs + | ldrdvs CARG1, CARG2, <9 + | // Fallthrough. + | + |->fff_restv: + | // CARG1, CARG2 = TValue result. + | ldr PC, [BASE, FRAME_PC] + | strd CARG1, CARG2, [BASE, #-8] + |->fff_res1: + | // PC = return. + | mov RC, #(1+1)*8 + |->fff_res: + | // RC = (nresults+1)*8, PC = return. + | ands CARG1, PC, #FRAME_TYPE + | it eq + | ldreq INS, [PC, #-4] + | str RC, SAVE_MULTRES + | sub RA, BASE, #8 + | bne ->vm_return + | decode_RB8 RB, INS + |5: + | cmp RB, RC // More results expected? + | bhi >6 + | decode_RA8 CARG1, INS + | ins_next1 + | ins_next2 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | sub BASE, RA, CARG1 + | ins_next3 + | + |6: // Fill up results with nil. + | add CARG2, RA, RC + | mvn CARG1, #~LJ_TNIL + | add RC, RC, #8 + | str CARG1, [CARG2, #-4] + | b <5 + | + |.macro math_extern, func + |.if HFABI + | .ffunc_d math_ .. func + |.else + | .ffunc_n math_ .. func + |.endif + | .IOS mov RA, BASE + | bl extern func + | .IOS mov BASE, RA + |.if HFABI + | b ->fff_resd + |.else + | b ->fff_restv + |.endif + |.endmacro + | + |.macro math_extern2, func + |.if HFABI + | .ffunc_dd math_ .. func + |.else + | .ffunc_nn math_ .. func + |.endif + | .IOS mov RA, BASE + | bl extern func + | .IOS mov BASE, RA + |.if HFABI + | b ->fff_resd + |.else + | b ->fff_restv + |.endif + |.endmacro + | + |.if FPU + | .ffunc_d math_sqrt + | vsqrt.f64 d0, d0 + |->fff_resd: + | ldr PC, [BASE, FRAME_PC] + | vstr d0, [BASE, #-8] + | b ->fff_res1 + |.else + | math_extern sqrt + |.endif + | + |.ffunc math_log + |.if HFABI + | ldr CARG2, [BASE, #4] + | cmp NARGS8:RC, #8 // Need exactly 1 argument. + | vldr d0, [BASE] + | bne ->fff_fallback + |.else + | ldrd CARG1, CARG2, [BASE] + | cmp NARGS8:RC, #8 // Need exactly 1 argument. + | bne ->fff_fallback + |.endif + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + | .IOS mov RA, BASE + | bl extern log + | .IOS mov BASE, RA + |.if HFABI + | b ->fff_resd + |.else + | b ->fff_restv + |.endif + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.if HFABI + | .ffunc math_ldexp + | ldr CARG4, [BASE, #4] + | ldrd CARG1, CARG2, [BASE, #8] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | vldr d0, [BASE] + | checktp CARG4, LJ_TISNUM + | bhs ->fff_fallback + | checktp CARG2, LJ_TISNUM + | bne ->fff_fallback + | .IOS mov RA, BASE + | bl extern ldexp // (double x, int exp) + | .IOS mov BASE, RA + | b ->fff_resd + |.else + |.ffunc_2 math_ldexp + | checktp CARG2, LJ_TISNUM + | bhs ->fff_fallback + | checktp CARG4, LJ_TISNUM + | bne ->fff_fallback + | .IOS mov RA, BASE + | bl extern ldexp // (double x, int exp) + | .IOS mov BASE, RA + | b ->fff_restv + |.endif + | + |.if HFABI + |.ffunc_d math_frexp + | mov CARG1, sp + | .IOS mov RA, BASE + | bl extern frexp + | .IOS mov BASE, RA + | ldr CARG3, [sp] + | mvn CARG4, #~LJ_TISNUM + | ldr PC, [BASE, FRAME_PC] + | vstr d0, [BASE, #-8] + | mov RC, #(2+1)*8 + | strd CARG3, CARG4, [BASE] + | b ->fff_res + |.else + |.ffunc_n math_frexp + | mov CARG3, sp + | .IOS mov RA, BASE + | bl extern frexp + | .IOS mov BASE, RA + | ldr CARG3, [sp] + | mvn CARG4, #~LJ_TISNUM + | ldr PC, [BASE, FRAME_PC] + | strd CARG1, CARG2, [BASE, #-8] + | mov RC, #(2+1)*8 + | strd CARG3, CARG4, [BASE] + | b ->fff_res + |.endif + | + |.if HFABI + |.ffunc_d math_modf + | sub CARG1, BASE, #8 + | ldr PC, [BASE, FRAME_PC] + | .IOS mov RA, BASE + | bl extern modf + | .IOS mov BASE, RA + | mov RC, #(2+1)*8 + | vstr d0, [BASE] + | b ->fff_res + |.else + |.ffunc_n math_modf + | sub CARG3, BASE, #8 + | ldr PC, [BASE, FRAME_PC] + | .IOS mov RA, BASE + | bl extern modf + | .IOS mov BASE, RA + | mov RC, #(2+1)*8 + | strd CARG1, CARG2, [BASE] + | b ->fff_res + |.endif + | + |.macro math_minmax, name, cond, fcond + |.if FPU + | .ffunc_1 name + | add RB, BASE, RC + | checktp CARG2, LJ_TISNUM + | add RA, BASE, #8 + | bne >4 + |1: // Handle integers. + | ldrd CARG3, CARG4, [RA] + | cmp RA, RB + | bhs ->fff_restv + | checktp CARG4, LJ_TISNUM + | bne >3 + | cmp CARG1, CARG3 + | add RA, RA, #8 + | it cond + | mov..cond CARG1, CARG3 + | b <1 + |3: // Convert intermediate result to number and continue below. + | vmov s4, CARG1 + | bhi ->fff_fallback + | vldr d1, [RA] + | vcvt.f64.s32 d0, s4 + | b >6 + | + |4: + | vldr d0, [BASE] + | bhi ->fff_fallback + |5: // Handle numbers. + | ldrd CARG3, CARG4, [RA] + | vldr d1, [RA] + | cmp RA, RB + | bhs ->fff_resd + | checktp CARG4, LJ_TISNUM + | bhs >7 + |6: + | vcmp.f64 d0, d1 + | vmrs + | add RA, RA, #8 + | it fcond + | vmov..fcond.f64 d0, d1 + | b <5 + |7: // Convert integer to number and continue above. + | vmov s4, CARG3 + | bhi ->fff_fallback + | vcvt.f64.s32 d1, s4 + | b <6 + | + |.else + | + | .ffunc_1 name + | checktp CARG2, LJ_TISNUM + | mov RA, #8 + | bne >4 + |1: // Handle integers. + | ldrd_i CARG3, CARG4, BASE, RA + | cmp RA, RC + | bhs ->fff_restv + | checktp CARG4, LJ_TISNUM + | bne >3 + | cmp CARG1, CARG3 + | add RA, RA, #8 + | it cond + | mov..cond CARG1, CARG3 + | b <1 + |3: // Convert intermediate result to number and continue below. + | bhi ->fff_fallback + | bl extern __aeabi_i2d + | ldrd_i CARG3, CARG4, BASE, RA + | b >6 + | + |4: + | bhi ->fff_fallback + |5: // Handle numbers. + | ldrd_i CARG3, CARG4, BASE, RA + | cmp RA, RC + | bhs ->fff_restv + | checktp CARG4, LJ_TISNUM + | bhs >7 + |6: + | bl extern __aeabi_cdcmple + | add RA, RA, #8 + | mov..fcond CARG1, CARG3 + | mov..fcond CARG2, CARG4 + | b <5 + |7: // Convert integer to number and continue above. + | bhi ->fff_fallback + | strd CARG1, CARG2, TMPD + | mov CARG1, CARG3 + | bl extern __aeabi_i2d + | ldrd CARG3, CARG4, TMPD + | b <6 + |.endif + |.endmacro + | + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ldrd CARG1, CARG2, [BASE] + | ldr PC, [BASE, FRAME_PC] + | cmp NARGS8:RC, #8 + | checktpeq CARG2, LJ_TSTR // Need exactly 1 argument. + | bne ->fff_fallback + | ldr CARG3, STR:CARG1->len + | ldrb CARG1, STR:CARG1[1] // Access is always ok (NUL at end). + | mvn CARG2, #~LJ_TISNUM + | cmp CARG3, #0 + | ite eq + | moveq RC, #(0+1)*8 + | movne RC, #(1+1)*8 + | strd CARG1, CARG2, [BASE, #-8] + | b ->fff_res + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | ldrd CARG1, CARG2, [BASE] + | ldr PC, [BASE, FRAME_PC] + | cmp NARGS8:RC, #8 // Need exactly 1 argument. + | checktpeq CARG2, LJ_TISNUM + | it eq + | bicseq CARG4, CARG1, #255 + | mov CARG3, #1 + | bne ->fff_fallback + | str CARG1, TMPD + | mov CARG2, TMPDp // Points to stack. Little-endian. + |->fff_newstr: + | // CARG2 = str, CARG3 = len. + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: + | // Returns GCstr *. + | ldr BASE, L->base + | mvn CARG2, #~LJ_TSTR + | b ->fff_restv + | + |.ffunc string_sub + | ffgccheck + | ldrd CARG1, CARG2, [BASE] + | ldrd CARG3, CARG4, [BASE, #16] + | cmp NARGS8:RC, #16 + | mvn RB, #0 + | beq >1 + | blo ->fff_fallback + | checktp CARG4, LJ_TISNUM + | mov RB, CARG3 + | bne ->fff_fallback + |1: + | ldrd CARG3, CARG4, [BASE, #8] + | checktp CARG2, LJ_TSTR + | it eq + | ldreq CARG2, STR:CARG1->len + | checktpeq CARG4, LJ_TISNUM + | bne ->fff_fallback + | // CARG1 = str, CARG2 = str->len, CARG3 = start, RB = end + | add CARG4, CARG2, #1 + | cmp CARG3, #0 // if (start < 0) start += len+1 + | it lt + | addlt CARG3, CARG3, CARG4 + | cmp CARG3, #1 // if (start < 1) start = 1 + | it lt + | movlt CARG3, #1 + | cmp RB, #0 // if (end < 0) end += len+1 + | it lt + | addlt RB, RB, CARG4 + | bic RB, RB, RB, asr #31 // if (end < 0) end = 0 + | cmp RB, CARG2 // if (end > len) end = len + | add CARG1, STR:CARG1, #sizeof(GCstr)-1 + | it gt + | movgt RB, CARG2 + | add CARG2, CARG1, CARG3 + | subs CARG3, RB, CARG3 // len = end - start + | add CARG3, CARG3, #1 // len += 1 + | bge ->fff_newstr + |->fff_emptystr: + | sub STR:CARG1, DISPATCH, #-DISPATCH_GL(strempty) + | mvn CARG2, #~LJ_TSTR + | b ->fff_restv + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + | ldr CARG3, [BASE, #4] + | cmp NARGS8:RC, #8 + | ldr STR:CARG2, [BASE] + | blo ->fff_fallback + | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) + | checkstr CARG3, ->fff_fallback + | ldr CARG4, SBUF:CARG1->b + | str BASE, L->base + | str PC, SAVE_PC + | str L, SBUF:CARG1->L + | str CARG4, SBUF:CARG1->w + | bl extern lj_buf_putstr_ .. name + | bl extern lj_buf_tostr + | b ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |// FP number to bit conversion for soft-float. Clobbers r0-r3. + |->vm_tobit_fb: + | bhi ->fff_fallback + |->vm_tobit: + | lsl RB, CARG2, #1 + | adds RB, RB, #0x00200000 + | ittt pl + | movpl CARG1, #0 // |x| < 1? + | bxpl lr + | mvn CARG4, #0x3e0 + | subs RB, CARG4, RB, asr #21 + | bmi >1 // |x| >= 2^32? + | lsl CARG4, CARG2, #11 + | orr CARG4, CARG4, #0x80000000 + | orr CARG4, CARG4, CARG1, lsr #21 + | cmp CARG2, #0 + | lsr CARG1, CARG4, RB + | it lt + | rsblt CARG1, CARG1, #0 + | bx lr + |1: + | add RB, RB, #21 + | lsr CARG4, CARG1, RB + | rsb RB, RB, #20 + | lsl CARG1, CARG2, #12 + | cmp CARG2, #0 + | lsl CARG1, CARG1, RB + | orr CARG1, CARG4, CARG1 + | it lt + | rsblt CARG1, CARG1, #0 + | bx lr + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | checktp CARG2, LJ_TISNUM + | it ne + | blne ->vm_tobit_fb + |.endmacro + | + |.ffunc_bit tobit + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name + | mov CARG3, CARG1 + | mov RA, #8 + |1: + | ldrd_i CARG1, CARG2, BASE, RA + | cmp RA, NARGS8:RC + | add RA, RA, #8 + | bge >2 + | checktp CARG2, LJ_TISNUM + | it ne + | blne ->vm_tobit_fb + | ins CARG3, CARG3, CARG1 + | b <1 + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, orr + |.ffunc_bit_op bxor, eor + | + |2: + | mvn CARG4, #~LJ_TISNUM + | ldr PC, [BASE, FRAME_PC] + | strd CARG3, CARG4, [BASE, #-8] + | b ->fff_res1 + | + |.ffunc_bit bswap + | eor CARG3, CARG1, CARG1, ror #16 + | bic CARG3, CARG3, #0x00ff0000 + | ror CARG1, CARG1, #8 + | mvn CARG2, #~LJ_TISNUM + | eor CARG1, CARG1, CARG3, lsr #8 + | b ->fff_restv + | + |.ffunc_bit bnot + | mvn CARG1, CARG1 + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + | + |.macro .ffunc_bit_sh, name, ins, shmod + | .ffunc bit_..name + | ldrd CARG1, CARG2, [BASE, #8] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | checktp CARG2, LJ_TISNUM + | it ne + | blne ->vm_tobit_fb + |.if shmod == 0 + | and RA, CARG1, #31 + |.else + | rsb RA, CARG1, #0 + |.endif + | ldrd CARG1, CARG2, [BASE] + | checktp CARG2, LJ_TISNUM + | it ne + | blne ->vm_tobit_fb + | ins CARG1, CARG1, RA + | mvn CARG2, #~LJ_TISNUM + | b ->fff_restv + |.endmacro + | + |.ffunc_bit_sh lshift, lsl, 0 + |.ffunc_bit_sh rshift, lsr, 0 + |.ffunc_bit_sh arshift, asr, 0 + |.ffunc_bit_sh rol, ror, 1 + |.ffunc_bit_sh ror, ror, 0 + | + |//----------------------------------------------------------------------- + | + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RC = nargs*8 + | ldr CARG3, [BASE, FRAME_FUNC] + | ldr CARG2, L->maxstack + | add CARG1, BASE, NARGS8:RC + | ldr PC, [BASE, FRAME_PC] // Fallback may overwrite PC. + | str CARG1, L->top + | ldr CARG3, CFUNC:CARG3->f + | str BASE, L->base + | add CARG1, CARG1, #8*LUA_MINSTACK + | str PC, SAVE_PC // Redundant (but a defined value). + | cmp CARG1, CARG2 + | mov CARG1, L + | bhi >5 // Need to grow stack. + | blx CARG3 // (lua_State *L) + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ldr BASE, L->base + | cmp CRET1, #0 + | lsl RC, CRET1, #3 + | sub RA, BASE, #8 + | bgt ->fff_res // Returned nresults+1? + |1: // Returned 0 or -1: retry fast path. + | ldr CARG1, L->top + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | sub NARGS8:RC, CARG1, BASE + | bne ->vm_call_tail // Returned -1? + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | ands CARG1, PC, #FRAME_TYPE + | bic CARG2, PC, #FRAME_TYPEP + | ittt eq + | ldreq INS, [PC, #-4] + | andeq CARG2, MASKR8, INS, lsr #5 // Conditional decode_RA8. + | addeq CARG2, CARG2, #8 + | sub RB, BASE, CARG2 + | b ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | mov CARG2, #LUA_MINSTACK + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->base + | cmp CARG1, CARG1 // Set zero-flag to force retry. + | b <1 + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | mov RA, lr + | str BASE, L->base + | add CARG2, BASE, NARGS8:RC + | str PC, SAVE_PC // Redundant (but a defined value). + | str CARG2, L->top + | mov CARG1, L + | bl extern lj_gc_step // (lua_State *L) + | ldr BASE, L->base + | mov lr, RA // Help return address predictor. + | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] + | bx lr + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask) + | ldrb CARG1, [CARG1] + | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. + | bne >5 + | // Decrement the hookcount for consistency, but always do the call. + | sub CARG2, DISPATCH, #-DISPATCH_GL(hookcount) + | ldr CARG2, [CARG2] + | tst CARG1, #HOOK_ACTIVE + | bne >1 + | sub CARG2, CARG2, #1 + | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT + | sub CARG1, DISPATCH, #-DISPATCH_GL(hookcount) + | it ne + | strne CARG2, [CARG1] + | b >1 + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask) + | ldrb CARG1, [CARG1] + | tst CARG1, #HOOK_ACTIVE // Hook already active? + | beq >1 + |5: // Re-dispatch to static ins. + | decode_OP OP, INS + | add OP, DISPATCH, OP, lsl #2 + | ldr pc, [OP, #GG_DISP2STATIC] + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | sub CARG1, DISPATCH, #-DISPATCH_GL(hookmask) + | ldrb CARG1, [CARG1] + | sub CARG2, DISPATCH, #-DISPATCH_GL(hookcount) + | ldr CARG2, [CARG2] + | tst CARG1, #HOOK_ACTIVE // Hook already active? + | bne <5 + | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT + | beq <5 + | subs CARG2, CARG2, #1 + | sub CARG3, DISPATCH, #-DISPATCH_GL(hookcount) + | str CARG2, [CARG3] + | beq >1 + | tst CARG1, #LUA_MASKLINE + | beq <5 + |1: + | mov CARG1, L + | str BASE, L->base + | mov CARG2, PC + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ldr BASE, L->base + |4: // Re-dispatch to static ins. + | ldrb OP, [PC, #-4] + | ldr INS, [PC, #-4] + | add OP, DISPATCH, OP, lsl #2 + | ldr OP, [OP, #GG_DISP2STATIC] + | decode_RA8 RA, INS + | decode_RD RC, INS + | bx OP + | + |->cont_hook: // Continue from hook yield. + | ldr CARG1, [CARG4, #-24] + | add PC, PC, #4 + | str CARG1, SAVE_MULTRES // Restore MULTRES for *M ins. + | b <4 + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). + | sub CARG1, DISPATCH, #-GG_DISP2J + | str PC, SAVE_PC + | ldr CARG3, LFUNC:CARG3->field_pc + | mov CARG2, PC + | sub RB, DISPATCH, #-DISPATCH_J(L) + | str L, [RB] + | ldrb CARG3, [CARG3, #PC2PROTO(framesize)] + | str BASE, L->base + | add CARG3, BASE, CARG3, lsl #3 + | str CARG3, L->top + | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | b <3 + |.endif + | + |->vm_callhook: // Dispatch target for call hooks. + | mov CARG2, PC + |.if JIT + | b >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | orr CARG2, PC, #1 + |1: + |.endif + | add CARG4, BASE, RC + | str PC, SAVE_PC + | mov CARG1, L + | str BASE, L->base + | sub RA, RA, BASE + | str CARG4, L->top + | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // Returns ASMFunction. + | ldr BASE, L->base + | ldr CARG4, L->top + | mov CARG2, #0 + | add RA, BASE, RA + | sub NARGS8:RC, CARG4, BASE + | str CARG2, SAVE_PC // Invalidate for subsequent line hook. + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | ldr INS, [PC, #-4] + | bx CRET1 + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, CARG4 = meta base + | ldr RB, SAVE_MULTRES + | ldr INS, [PC, #-4] + | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. + | subs RB, RB, #8 + | decode_RA8 RC, INS // Call base. + | beq >2 + |1: // Move results down. + | ldrd CARG1, CARG2, [RA] + | add RA, RA, #8 + | subs RB, RB, #8 + | strd_i CARG1, CARG2, BASE, RC + | add RC, RC, #8 + | bne <1 + |2: + | decode_RA8 RA, INS + | decode_RB8 RB, INS + | add RA, RA, RB + |3: + | cmp RA, RC + | mvn CARG2, #~LJ_TNIL + | bhi >9 // More results wanted? + | + | ldrh RA, TRACE:CARG3->traceno + | ldrh RC, TRACE:CARG3->link + | cmp RC, RA + | beq ->cont_nop // Blacklisted. + | cmp RC, #0 + | bne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | sub RB, DISPATCH, #-DISPATCH_J(exitno) + | str RA, [RB] + | sub RB, DISPATCH, #-DISPATCH_J(L) + | str L, [RB] + | str BASE, L->base + | sub CARG1, DISPATCH, #-GG_DISP2J + | mov CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | ldr BASE, L->base + | b ->cont_nop + | + |9: // Fill up results with nil. + | strd_i CARG1, CARG2, BASE, RC + | add RC, RC, #8 + | b <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov CARG1, L + | str BASE, L->base + | mov CARG2, PC + | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | ldr BASE, L->base + | sub PC, PC, #4 + | b ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_exit_handler: + |.if JIT + | sub sp, sp, #12 + | push {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12} + | ldr CARG1, [sp, #64] // Load original value of lr. + | ldr DISPATCH, [lr, #-1] // Load DISPATCH. + | add CARG3, sp, #64 // Recompute original value of sp. + | mv_vmstate CARG4, EXIT + | str CARG3, [sp, #52] // Store sp in RID_SP + | st_vmstate CARG4 + | ldr CARG4, [CARG1, #-5]! // Get exit instruction. + | str CARG1, [sp, #56] // Store exit pc in RID_LR and RID_PC. + | str CARG1, [sp, #60] + |.if FPU + | vpush {d0-d15} + |.endif + | .long 0xf3432180 //SBFX CARG2, CARG4, #10, #1 + | .long 0xf36321d4 //BFI CARG2, CARG4, #11, #10 + | lsr CARG4, CARG4, #16 + | .long 0xf363010a //BFI CARG2, CARG4, #0, #11 + | add CARG1, CARG1, CARG2, lsl #1 + | ldr CARG2, [lr, #3] // Load exit stub group offset. + | sub CARG1, CARG1, lr + | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] + | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. + | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] + | sub RB, DISPATCH, #-DISPATCH_J(exitno) + | str CARG1, [RB] + | mov CARG4, #0 + | str BASE, L->base + | sub RB, DISPATCH, #-DISPATCH_J(L) + | str L, [RB] + | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] + | sub CARG1, DISPATCH, #-GG_DISP2J + | mov CARG2, sp + | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ldr CARG2, L->cframe + | ldr BASE, L->base + | bic CARG2, CARG2, #~CFRAME_RAWMASK // Use two steps: bic sp is deprecated. + | mov sp, CARG2 + | ldr PC, SAVE_PC // Get SAVE_PC. + | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). + | b >1 + |.endif + |->vm_exit_interp: + | // CARG1 = MULTRES or negated error code, BASE, PC and DISPATCH set. + |.if JIT + | ldr L, SAVE_L + |1: + | cmp CARG1, #0 + | blt >9 // Check for error from exit. + | lsl RC, CARG1, #3 + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | str RC, SAVE_MULTRES + | mov CARG3, #0 + | str BASE, L->base + | ldr CARG2, LFUNC:CARG2->field_pc + | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] + | mv_vmstate CARG4, INTERP + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | // Modified copy of ins_next which handles function header dispatch, too. + | ldrb OP, [PC] + | mov MASKR8, #255 + | ldr INS, [PC], #4 + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | st_vmstate CARG4 + | cmp OP, #BC_FUNCC+2 // Fast function? + | bhs >4 + |2: + | cmp OP, #BC_FUNCF // Function header? + | ldr OP, [DISPATCH, OP, lsl #2] + | decode_RA8 RA, INS + | iteee lo + | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. + | subhs RC, RC, #8 + | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 + | ldrhs CARG3, [BASE, FRAME_FUNC] + | bx OP + | + |4: // Check frame below fast function. + | ldr CARG1, [BASE, FRAME_PC] + | ands CARG2, CARG1, #FRAME_TYPE + | bne <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | ldr CARG3, [CARG1, #-4] + | decode_RA8 CARG1, CARG3 + | sub CARG2, BASE, CARG1 + | ldr LFUNC:CARG3, [CARG2, #-16] + | ldr CARG3, LFUNC:CARG3->field_pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | b <2 + | + |9: // Rethrow error from the right C frame. + | mov CARG1, L + | bl extern lj_err_run // (lua_State *L) + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// FP value rounding. Called from JIT code. + |// + |// double lj_vm_floor/ceil/trunc(double x); + |.macro vm_round, func, hf + |.if hf == 1 + | vmov CARG1, CARG2, d0 + |.endif + | lsl CARG3, CARG2, #1 + | adds RB, CARG3, #0x00200000 + | bpl >2 // |x| < 1? + | mvn CARG4, #0x3cc + | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. + | itt lo + | bxlo lr // |x| >= 2^52: done. + | mvn CARG4, #1 + | lsl CARG4, CARG4, RB + | bic CARG3, CARG1, CARG4 // ztest = lo & ~lomask + | and CARG1, CARG1, CARG4 // lo &= lomask + | subs RB, RB, #32 + | mvn CARG4, #1 + | itttt pl + | lslpl CARG4, CARG4, RB + | bicpl CARG4, CARG2, CARG4 // |x| <= 2^20: ztest |= hi & ~himask + | orrpl CARG3, CARG3, CARG4 + | mvnpl CARG4, #1 + | itt pl + | lslpl CARG4, CARG4, RB + | andpl CARG2, CARG2, CARG4 // |x| <= 2^20: hi &= himask + | mvn CARG4, #1 + |.if "func" == "floor" + | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) + |.else + | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) + |.endif + |.if hf == 1 + | it eq + | vmoveq d0, CARG1, CARG2 + |.endif + | itt eq + | bxeq lr // iszero: done. + | mvn CARG4, #1 + | cmp RB, #0 + | ite pl + | lslpl CARG3, CARG4, RB + | mvnmi CARG3, #0 + | add RB, RB, #32 + | lsl CARG4, CARG4, RB + | subs CARG1, CARG1, CARG4 // lo = lo-lomask + | mvn CARG4, #1 + | sbc CARG2, CARG2, CARG3 // hi = hi-himask+carry + |.if hf == 1 + | vmov d0, CARG1, CARG2 + |.endif + | bx lr + | + |2: // |x| < 1: + | itt cs + | bxcs lr // |x| is not finite. + | orr CARG3, CARG3, CARG1 // ztest = (2*hi) | lo + |.if "func" == "floor" + | tst CARG3, CARG2, asr #31 // iszero = ((ztest & signmask) == 0) + |.else + | bics CARG3, CARG3, CARG2, asr #31 // iszero = ((ztest & ~signmask) == 0) + |.endif + | mov CARG1, #0 // lo = 0 + | and CARG2, CARG2, #0x80000000 + | itt ne + | ldrne CARG4, <9 // hi = sign(x) | (iszero ? 0.0 : 1.0) + | orrne CARG2, CARG2, CARG4 + |.if hf == 1 + | vmov d0, CARG1, CARG2 + |.endif + | bx lr + |.endmacro + | + |9: + | .long 0x00003ff0 // hiword(+1.0) jturnsek: swaped halfwords!!! + | + |->vm_floor: + |.if HFABI + | vm_round floor, 1 + |.endif + |->vm_floor_sf: + | vm_round floor, 0 + | + |->vm_ceil: + |.if HFABI + | vm_round ceil, 1 + |.endif + |->vm_ceil_sf: + | vm_round ceil, 0 + | + |.macro vm_trunc, hf + |.if JIT + |.if hf == 1 + | vmov CARG1, CARG2, d0 + |.endif + | lsl CARG3, CARG2, #1 + | adds RB, CARG3, #0x00200000 + | itt pl + | andpl CARG2, CARG2, #0x80000000 // |x| < 1? hi = sign(x), lo = 0. + | movpl CARG1, #0 + |.if hf == 1 + | it pl + | vmovpl d0, CARG1, CARG2 + |.endif + | itt pl + | bxpl lr + | mvn CARG4, #0x3cc + | subs RB, CARG4, RB, asr #21 // 2^0: RB = 51, 2^51: RB = 0. + | itt lo + | bxlo lr // |x| >= 2^52: already done. + | mvn CARG4, #1 + | lsl CARG4, CARG4, RB + | and CARG1, CARG1, CARG4 // lo &= lomask + | subs RB, RB, #32 + | mvn CARG4, #1 + | itt pl + | lsl CARG4, CARG4, RB + | andpl CARG2, CARG2, CARG4 // |x| <= 2^20: hi &= himask + |.if hf == 1 + | vmov d0, CARG1, CARG2 + |.endif + | bx lr + |.endif + |.endmacro + | + |->vm_trunc: + |.if HFABI + | vm_trunc 1 + |.endif + |->vm_trunc_sf: + | vm_trunc 0 + | + | // double lj_vm_mod(double dividend, double divisor); + |->vm_mod: + |.if FPU + | // Special calling convention. Also, RC (r11) is not preserved. + | vdiv.f64 d0, d6, d7 + | mov RC, lr + | vmov CARG1, CARG2, d0 + | bl ->vm_floor_sf + | vmov d0, CARG1, CARG2 + | vmul.f64 d0, d0, d7 + | mov lr, RC + | vsub.f64 d6, d6, d0 + | bx lr + |.else + | push {r0, r1, r2, r3, r4, lr} + | bl extern __aeabi_ddiv + | bl ->vm_floor_sf + | ldrd CARG3, CARG4, [sp, #8] + | bl extern __aeabi_dmul + | ldrd CARG3, CARG4, [sp] + | eor CARG2, CARG2, #0x80000000 + | bl extern __aeabi_dadd + | add sp, sp, #20 + | pop {pc} + |.endif + | + | // int lj_vm_modi(int dividend, int divisor); + |->vm_modi: + | ands RB, CARG1, #0x80000000 + | it mi + | rsbmi CARG1, CARG1, #0 // a = |dividend| + | eor RB, RB, CARG2, asr #1 // Keep signdiff and sign(divisor). + | cmp CARG2, #0 + | it mi + | rsbmi CARG2, CARG2, #0 // b = |divisor| + | subs CARG4, CARG2, #1 + | ite ne + | cmpne CARG1, CARG2 + | moveq CARG1, #0 // if (b == 1 || a == b) a = 0 + | it hi + | tsthi CARG2, CARG4 + | it eq + | andeq CARG1, CARG1, CARG4 // else if ((b & (b-1)) == 0) a &= b-1 + | bls >1 + | // Use repeated subtraction to get the remainder. + | clz CARG3, CARG1 + | clz CARG4, CARG2 + | sub CARG4, CARG4, CARG3 + | rsbs CARG3, CARG4, #31 // entry = (31-(clz(b)-clz(a)))*12 + | it ne + | .long 0xe8dff002 // tbbne [pc, CARG3] // Duff's device. + | .long 0xb8bec4ca, 0xa0a6acb2, 0x888e949a, 0x70767c82 // TBB table (part1) + | .long 0x585e646a, 0x40464c52, 0x282e343a, 0x10161c22 // TBB table (part2) + { + int i; + for (i = 31; i >= 0; i--) { + | cmp CARG1, CARG2, lsl #i + | it hs + | subhs CARG1, CARG1, CARG2, lsl #i + } + } + |1: + | cmp CARG1, #0 + | it ne + | cmpne RB, #0 + | it mi + | submi CARG1, CARG1, CARG2 // if (y != 0 && signdiff) y = y - b + | eors CARG2, CARG1, RB, lsl #1 + | it mi + | rsbmi CARG1, CARG1, #0 // if (sign(divisor) != sign(y)) y = -y + | bx lr + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_RES, CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_TMP0, CARG3 + |.define NEXT_TMP1, CARG4 + |.define NEXT_LIM, r12 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, [sp] + |.define NEXT_RES_KEY_I, [sp, #8] + |.define NEXT_RES_KEY_IT, [sp, #12] + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT + | ldr NEXT_TMP0, NEXT_TAB->array + | ldr NEXT_LIM, NEXT_TAB->asize + | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 + |1: // Traverse array part. + | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM + | bhs >5 + | ldr NEXT_TMP1, [NEXT_TMP0, #4] + | str NEXT_IDX, NEXT_RES_KEY_I + | add NEXT_TMP0, NEXT_TMP0, #8 + | add NEXT_IDX, NEXT_IDX, #1 + | checktp NEXT_TMP1, LJ_TNIL + | beq <1 // Skip holes in array part. + | ldr NEXT_TMP0, [NEXT_TMP0, #-8] + | mov NEXT_RES, NEXT_RES_PTR + | strd NEXT_TMP0, NEXT_TMP1, NEXT_RES_VAL + | mvn NEXT_TMP0, #~LJ_TISNUM + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + | + |5: // Traverse hash part. + | ldr NEXT_TMP0, NEXT_TAB->hmask + | ldr NODE:NEXT_RES, NEXT_TAB->node + | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 + | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 + | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 + |6: + | cmp NEXT_IDX, NEXT_LIM + | bhi >9 + | ldr NEXT_TMP1, NODE:NEXT_RES->val.it + | checktp NEXT_TMP1, LJ_TNIL + | add NEXT_IDX, NEXT_IDX, #1 + | itt ne + | bxne lr + | // Skip holes in hash part. + | add NEXT_RES, NEXT_RES, #sizeof(Node) + | b <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | mvn NEXT_TMP0, #0 + | mov NEXT_RES, NEXT_RES_PTR + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + |.endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. + |// Saveregs already performed. Callback slot number in [sp], g in r12. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | ldr CTSTATE, GL:r12->ctype_state + | add DISPATCH, r12, #GG_G2DISP + |.if FPU + | str r4, SAVE_R4 + | add r4, sp, CFRAME_SPACE+4+8*8 + | vstmdb r4!, {d8-d15} + |.endif + |.if HFABI + | add r12, CTSTATE, #offsetof(CTState, cb.fpr[8]) + |.endif + | strd CARG3, CARG4, CTSTATE->cb.gpr[2] + | strd CARG1, CARG2, CTSTATE->cb.gpr[0] + |.if HFABI + | vstmdb r12!, {d0-d7} + |.endif + | ldr CARG4, [sp] + | add CARG3, sp, #CFRAME_SIZE + | mov CARG1, CTSTATE + | lsr CARG4, CARG4, #3 + | str CARG3, CTSTATE->cb.stack + | mov CARG2, sp + | str CARG4, CTSTATE->cb.slot + | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. + | bl extern lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ldr BASE, L:CRET1->base + | mv_vmstate CARG2, INTERP + | ldr RC, L:CRET1->top + | mov MASKR8, #255 + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | mov L, CRET1 + | sub RC, RC, BASE + | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. + | st_vmstate CARG2 + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | ldr CTSTATE, [DISPATCH, #DISPATCH_GL(ctype_state)] + | str BASE, L->base + | str CARG4, L->top + | str L, CTSTATE->L + | mov CARG1, CTSTATE + | mov CARG2, RA + | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) + | ldrd CARG1, CARG2, CTSTATE->cb.gpr[0] + |.if HFABI + | vldr d0, CTSTATE->cb.fpr[0] + |.endif + | b ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, r4 + | push {CCSTATE, r5, r11, lr} + | mov CCSTATE, CARG1 + | ldr CARG1, CCSTATE:CARG1->spadj + | ldrb CARG2, CCSTATE->nsp + | add CARG3, CCSTATE, #offsetof(CCallState, stack) + |.if HFABI + | add RB, CCSTATE, #offsetof(CCallState, fpr[0]) + |.endif + | mov r11, sp + | sub sp, sp, CARG1 // Readjust stack. + | subs CARG2, CARG2, #1 + |.if HFABI + | vldm RB, {d0-d7} + |.endif + | ldr RB, CCSTATE->func + | bmi >2 + |1: // Copy stack slots. + | ldr CARG4, [CARG3, CARG2, lsl #2] + | str CARG4, [sp, CARG2, lsl #2] + | subs CARG2, CARG2, #1 + | bpl <1 + |2: + | ldrd CARG1, CARG2, CCSTATE->gpr[0] + | ldrd CARG3, CARG4, CCSTATE->gpr[2] + | blx RB + | mov sp, r11 + |.if HFABI + | add r12, CCSTATE, #offsetof(CCallState, fpr[4]) + |.endif + | strd CRET1, CRET2, CCSTATE->gpr[0] + |.if HFABI + | vstmdb r12!, {d0-d3} + |.endif + | pop {CCSTATE, r5, r11, pc} + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RC = src2, JMP with RC = target + | lsl RC, RC, #3 + | ldrd_iw CARG1, CARG2, RA, BASE + | ldrh RB, [PC, #2] + | ldrd_iw CARG3, CARG4, RC, BASE + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | checktp CARG2, LJ_TISNUM + | bne >3 + | checktp CARG4, LJ_TISNUM + | bne >4 + | cmp CARG1, CARG3 + if (op == BC_ISLT) { + | it lt + | sublt PC, RB, #0x20000 + } else if (op == BC_ISGE) { + | it ge + | subge PC, RB, #0x20000 + } else if (op == BC_ISLE) { + | it le + | suble PC, RB, #0x20000 + } else { + | it gt + | subgt PC, RB, #0x20000 + } + |1: + | ins_next + | + |3: // CARG1, CARG2 is not an integer. + |.if FPU + | vldr d0, [RA] + | bhi ->vmeta_comp + | // d0 is a number. + | checktp CARG4, LJ_TISNUM + | vldr d1, [RC] + | blo >5 + | bhi ->vmeta_comp + | // d0 is a number, CARG3 is an integer. + | vmov s4, CARG3 + | vcvt.f64.s32 d1, s4 + | b >5 + |4: // CARG1 is an integer, CARG3, CARG4 is not an integer. + | vldr d1, [RC] + | bhi ->vmeta_comp + | // CARG1 is an integer, d1 is a number. + | vmov s4, CARG1 + | vcvt.f64.s32 d0, s4 + |5: // d0 and d1 are numbers. + | vcmp.f64 d0, d1 + | vmrs + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + if (op == BC_ISLT) { + | it lo + | sublo PC, RB, #0x20000 + } else if (op == BC_ISGE) { + | it hs + | subhs PC, RB, #0x20000 + } else if (op == BC_ISLE) { + | it ls + | subls PC, RB, #0x20000 + } else { + | it hi + | subhi PC, RB, #0x20000 + } + | b <1 + |.else + | bhi ->vmeta_comp + | // CARG1, CARG2 is a number. + | checktp CARG4, LJ_TISNUM + | it lo + | movlo RA, RB // Save RB. + | blo >5 + | bhi ->vmeta_comp + | // CARG1, CARG2 is a number, CARG3 is an integer. + | mov CARG1, CARG3 + | mov RC, RA + | mov RA, RB // Save RB. + | bl extern __aeabi_i2d + | mov CARG3, CARG1 + | mov CARG4, CARG2 + | ldrd CARG1, CARG2, [RC] // Restore first operand. + | b >5 + |4: // CARG1 is an integer, CARG3, CARG4 is not an integer. + | bhi ->vmeta_comp + | // CARG1 is an integer, CARG3, CARG4 is a number. + | mov RA, RB // Save RB. + | bl extern __aeabi_i2d + | ldrd CARG3, CARG4, [RC] // Restore second operand. + |5: // CARG1, CARG2 and CARG3, CARG4 are numbers. + | bl extern __aeabi_cdcmple + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + if (op == BC_ISLT) { + | it lo + | sublo PC, RA, #0x20000 + } else if (op == BC_ISGE) { + | it hs + | subhs PC, RA, #0x20000 + } else if (op == BC_ISLE) { + | it ls + | subls PC, RA, #0x20000 + } else { + | it hi + | subhi PC, RA, #0x20000 + } + | b <1 + |.endif + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RC = src2, JMP with RC = target + | lsl RC, RC, #3 + | ldrd_iw CARG1, CARG2, RA, BASE + | ldrh RB, [PC, #2] + | ldrd_iw CARG3, CARG4, RC, BASE + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | checktp CARG2, LJ_TISNUM + | it ls + | cmnls CARG4, #-LJ_TISNUM + if (vk) { + | bls ->BC_ISEQN_Z + } else { + | bls ->BC_ISNEN_Z + } + | // Either or both types are not numbers. + |.if FFI + | checktp CARG2, LJ_TCDATA + | checktpne CARG4, LJ_TCDATA + | beq ->vmeta_equal_cd + |.endif + | cmp CARG2, CARG4 // Compare types. + | bne >2 // Not the same type? + | checktp CARG2, LJ_TISPRI + | bhs >1 // Same type and primitive type? + | + | // Same types and not a primitive type. Compare GCobj or pvalue. + | cmp CARG1, CARG3 + if (vk) { + | bne >3 // Different GCobjs or pvalues? + |1: // Branch if same. + | sub PC, RB, #0x20000 + |2: // Different. + | ins_next + |3: + | checktp CARG2, LJ_TISTABUD + | bhi <2 // Different objects and not table/ud? + } else { + | beq >1 // Same GCobjs or pvalues? + | checktp CARG2, LJ_TISTABUD + | bhi >2 // Different objects and not table/ud? + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | ldr TAB:RA, TAB:CARG1->metatable + | cmp TAB:RA, #0 + if (vk) { + | beq <2 // No metatable? + } else { + | beq >2 // No metatable? + } + | ldrb RA, TAB:RA->nomm + | mov CARG4, #1-vk // ne = 0 or 1. + | mov CARG2, CARG1 + | tst RA, #1<vmeta_equal // 'no __eq' flag not set? + if (vk) { + | b <2 + } else { + |2: // Branch if different. + | sub PC, RB, #0x20000 + |1: // Same. + | ins_next + } + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RC = str_const (~), JMP with RC = target + | mvn RC, RC + | ldrd_i CARG1, CARG2, BASE, RA + | ldrh RB, [PC, #2] + | ldr STR:CARG3, [KBASE, RC, lsl #2] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | checktp CARG2, LJ_TSTR + |.if FFI + | bne >7 + | cmp CARG1, CARG3 + |.else + | it eq + | cmpeq CARG1, CARG3 + |.endif + if (vk) { + | it eq + | subeq PC, RB, #0x20000 + |1: + } else { + |1: + | it ne + | subne PC, RB, #0x20000 + } + | ins_next + | + |.if FFI + |7: + | checktp CARG2, LJ_TCDATA + | bne <1 + | b ->vmeta_equal_cd + |.endif + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RC = num_const (~), JMP with RC = target + | lsl RC, RC, #3 + | ldrd_iw CARG1, CARG2, RA, BASE + | ldrh RB, [PC, #2] + | ldrd_iw CARG3, CARG4, RC, KBASE + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | checktp CARG2, LJ_TISNUM + | bne >3 + | checktp CARG4, LJ_TISNUM + | bne >4 + | cmp CARG1, CARG3 + if (vk) { + | it eq + | subeq PC, RB, #0x20000 + |1: + } else { + |1: + | it ne + | subne PC, RB, #0x20000 + } + |2: + | ins_next + | + |3: // CARG1, CARG2 is not an integer. + |.if FFI + | bhi >7 + |.else + if (!vk) { + | it hi + | subhi PC, RB, #0x20000 + } + | bhi <2 + |.endif + |.if FPU + | checktp CARG4, LJ_TISNUM + | vmov s4, CARG3 + | vldr d0, [RA] + | ite lo + | vldrlo d1, [RC] + | vcvths.f64.s32 d1, s4 + | b >5 + |4: // CARG1 is an integer, d1 is a number. + | vmov s4, CARG1 + | vldr d1, [RC] + | vcvt.f64.s32 d0, s4 + |5: // d0 and d1 are numbers. + | vcmp.f64 d0, d1 + | vmrs + if (vk) { + | it eq + | subeq PC, RB, #0x20000 + } else { + | it ne + | subne PC, RB, #0x20000 + } + | b <2 + |.else + | // CARG1, CARG2 is a number. + | checktp CARG4, LJ_TISNUM + | it lo + | movlo RA, RB // Save RB. + | blo >5 + | // CARG1, CARG2 is a number, CARG3 is an integer. + | mov CARG1, CARG3 + | mov RC, RA + |4: // CARG1 is an integer, CARG3, CARG4 is a number. + | mov RA, RB // Save RB. + | bl extern __aeabi_i2d + | ldrd CARG3, CARG4, [RC] // Restore other operand. + |5: // CARG1, CARG2 and CARG3, CARG4 are numbers. + | bl extern __aeabi_cdcmpeq + if (vk) { + | it eq + | subeq PC, RA, #0x20000 + } else { + | it ne + | subne PC, RA, #0x20000 + } + | b <2 + |.endif + | + |.if FFI + |7: + | checktp CARG2, LJ_TCDATA + | bne <1 + | b ->vmeta_equal_cd + |.endif + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RC = primitive_type (~), JMP with RC = target + | ldrd_i CARG1, CARG2, BASE, RA + | ldrh RB, [PC, #2] + | add PC, PC, #4 + | mvn RC, RC + | add RB, PC, RB, lsl #2 + |.if FFI + | checktp CARG2, LJ_TCDATA + | beq ->vmeta_equal_cd + |.endif + | cmp CARG2, RC + if (vk) { + | it eq + | subeq PC, RB, #0x20000 + } else { + | it ne + | subne PC, RB, #0x20000 + } + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RC = src, JMP with RC = target + | add RC, BASE, RC, lsl #3 + | ldrh RB, [PC, #2] + | ldrd CARG1, CARG2, [RC] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | checktp CARG2, LJ_TTRUE + if (op == BC_ISTC || op == BC_IST) { + | it ls + | subls PC, RB, #0x20000 + if (op == BC_ISTC) { + | it ls + | strdls_i CARG1, CARG2, BASE, RA + } + } else { + | it hi + | subhi PC, RB, #0x20000 + if (op == BC_ISFC) { + | it hi + | strdhi_i CARG1, CARG2, BASE, RA + } + } + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RC = -type + | ldrd_i CARG1, CARG2, BASE, RA + | ins_next1 + | cmn CARG2, RC + | ins_next2 + | bne ->vmeta_istype + | ins_next3 + break; + case BC_ISNUM: + | // RA = src*8, RC = -(TISNUM-1) + | ldrd_i CARG1, CARG2, BASE, RA + | ins_next1 + | checktp CARG2, LJ_TISNUM + | ins_next2 + | bhs ->vmeta_istype + | ins_next3 + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RC = src + | lsl RC, RC, #3 + | ins_next1 + | ldrd_i CARG1, CARG2, BASE, RC + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + break; + case BC_NOT: + | // RA = dst*8, RC = src + | add RC, BASE, RC, lsl #3 + | ins_next1 + | ldr CARG1, [RC, #4] + | add RA, BASE, RA + | ins_next2 + | checktp CARG1, LJ_TTRUE + | ite ls + | mvnls CARG2, #~LJ_TFALSE + | mvnhi CARG2, #~LJ_TTRUE + | str CARG2, [RA, #4] + | ins_next3 + break; + case BC_UNM: + | // RA = dst*8, RC = src + | lsl RC, RC, #3 + | ldrd_i CARG1, CARG2, BASE, RC + | ins_next1 + | ins_next2 + | checktp CARG2, LJ_TISNUM + | bhi ->vmeta_unm + | it ne + | eorne CARG2, CARG2, #0x80000000 + | bne >5 + | it eq + | rsbseq CARG1, CARG1, #0 + | it vs + | ldrdvs CARG1, CARG2, >9 + |5: + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + | + |.align 8 + |9: + | .long 0x00000000, 0x000041e0 // 2^31. jturnsek: swaped halfwords!!! + break; + case BC_LEN: + | // RA = dst*8, RC = src + | lsl RC, RC, #3 + | ldrd_i CARG1, CARG2, BASE, RC + | checkstr CARG2, >2 + | ldr CARG1, STR:CARG1->len + |1: + | mvn CARG2, #~LJ_TISNUM + | ins_next1 + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + |2: + | checktab CARG2, ->vmeta_len +#if LJ_52 + | ldr TAB:CARG3, TAB:CARG1->metatable + | cmp TAB:CARG3, #0 + | bne >9 + |3: +#endif + |->BC_LEN_Z: + | .IOS mov RC, BASE + | bl extern lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | .IOS mov BASE, RC + | b <1 +#if LJ_52 + |9: + | ldrb CARG4, TAB:CARG3->nomm + | tst CARG4, #1<vmeta_len +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithcheck, cond, ncond, target + ||if (vk == 1) { + | cmn CARG4, #-LJ_TISNUM + | it cond + | cmn..cond CARG2, #-LJ_TISNUM + ||} else { + | cmn CARG2, #-LJ_TISNUM + | it cond + | cmn..cond CARG4, #-LJ_TISNUM + ||} + | b..ncond target + |.endmacro + |.macro ins_arithcheck_int, target + | ins_arithcheck eq, ne, target + |.endmacro + |.macro ins_arithcheck_num, target + | ins_arithcheck lo, hs, target + |.endmacro + | + |.macro ins_arithpre + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | .if FPU + | ldrd_iw CARG1, CARG2, RB, BASE + | ldrd_iw CARG3, CARG4, RC, KBASE + | .else + | ldrd_i CARG1, CARG2, BASE, RB + | ldrd_i CARG3, CARG4, KBASE, RC + | .endif + || break; + ||case 1: + | .if FPU + | ldrd_iw CARG3, CARG4, RB, BASE + | ldrd_iw CARG1, CARG2, RC, KBASE + | .else + | ldrd_i CARG3, CARG4, BASE, RB + | ldrd_i CARG1, CARG2, KBASE, RC + | .endif + || break; + ||default: + | .if FPU + | ldrd_iw CARG1, CARG2, RB, BASE + | ldrd_iw CARG3, CARG4, RC, BASE + | .else + | ldrd_i CARG1, CARG2, BASE, RB + | ldrd_i CARG3, CARG4, BASE, RC + | .endif + || break; + ||} + |.endmacro + | + |.macro ins_arithpre_fpu, reg1, reg2 + |.if FPU + ||if (vk == 1) { + | vldr reg2, [RB] + | vldr reg1, [RC] + ||} else { + | vldr reg1, [RB] + | vldr reg2, [RC] + ||} + |.endif + |.endmacro + | + |.macro ins_arithpost_fpu, reg + | ins_next1 + | add RA, BASE, RA + | ins_next2 + | vstr reg, [RA] + | ins_next3 + |.endmacro + | + |.macro ins_arithfallback, ins + ||switch (vk) { + ||case 0: + | ins ->vmeta_arith_vn + || break; + ||case 1: + | ins ->vmeta_arith_nv + || break; + ||default: + | ins ->vmeta_arith_vv + || break; + ||} + |.endmacro + | + |.macro ins_arithdn, intins, fpins, fpcall + | ins_arithpre + |.if "intins" ~= "vm_modi" and not FPU + | ins_next1 + |.endif + | ins_arithcheck_int >5 + |.if "intins" == "smull" + | smull CARG1, RC, CARG3, CARG1 + | cmp RC, CARG1, asr #31 + | ins_arithfallback bne + |.elif "intins" == "vm_modi" + | movs CARG2, CARG3 + | ins_arithfallback beq + | bl ->vm_modi + | mvn CARG2, #~LJ_TISNUM + |.else + | intins CARG1, CARG1, CARG3 + | ins_arithfallback bvs + |.endif + |4: + |.if "intins" == "vm_modi" or FPU + | ins_next1 + |.endif + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + |5: // FP variant. + | ins_arithpre_fpu d6, d7 + | ins_arithfallback ins_arithcheck_num + |.if FPU + |.if "intins" == "vm_modi" + | bl fpcall + |.else + | fpins d6, d6, d7 + |.endif + | ins_arithpost_fpu d6 + |.else + | bl fpcall + |.if "intins" ~= "vm_modi" + | ins_next1 + |.endif + | b <4 + |.endif + |.endmacro + | + |.macro ins_arithfp, fpins, fpcall + | ins_arithpre + |.if "fpins" ~= "extern" or HFABI + | ins_arithpre_fpu d0, d1 + |.endif + | ins_arithfallback ins_arithcheck_num + |.if "fpins" == "extern" + | .IOS mov RC, BASE + | bl fpcall + | .IOS mov BASE, RC + |.elif FPU + | fpins d0, d0, d1 + |.else + | bl fpcall + |.endif + |.if ("fpins" ~= "extern" or HFABI) and FPU + | ins_arithpost_fpu d0 + |.else + | ins_next1 + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + |.endif + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn adds, vadd.f64, extern __aeabi_dadd + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn subs, vsub.f64, extern __aeabi_dsub + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithdn smull, vmul.f64, extern __aeabi_dmul + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithfp vdiv.f64, extern __aeabi_ddiv + break; + case BC_MODVN: case BC_MODNV: case BC_MODVV: + | ins_arithdn vm_modi, vm_mod, ->vm_mod + break; + case BC_POW: + | // NYI: (partial) integer arithmetic. + | ins_arithfp extern, extern pow + break; + + case BC_CAT: + | decode_RB8 RC, INS + | decode_RC8 RB, INS + | // RA = dst*8, RC = src_start*8, RB = src_end*8 (note: RB/RC swapped!) + | sub CARG3, RB, RC + | str BASE, L->base + | add CARG2, BASE, RB + |->BC_CAT_Z: + | // RA = dst*8, RC = src_start*8, CARG2 = top-1 + | mov CARG1, L + | str PC, SAVE_PC + | lsr CARG3, CARG3, #3 + | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ldr BASE, L->base + | cmp CRET1, #0 + | bne ->vmeta_binop + | ldrd_i CARG3, CARG4, BASE, RC + | ins_next1 + | ins_next2 + | strd_i CARG3, CARG4, BASE, RA // Copy result to RA. + | ins_next3 + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RC = str_const (~) + | mvn RC, RC + | ins_next1 + | ldr CARG1, [KBASE, RC, lsl #2] + | mvn CARG2, #~LJ_TSTR + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + break; + case BC_KCDATA: + |.if FFI + | // RA = dst*8, RC = cdata_const (~) + | mvn RC, RC + | ins_next1 + | ldr CARG1, [KBASE, RC, lsl #2] + | mvn CARG2, #~LJ_TCDATA + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + |.endif + break; + case BC_KSHORT: + | // RA = dst*8, (RC = int16_literal) + | mov CARG1, INS, asr #16 // Refetch sign-extended reg. + | mvn CARG2, #~LJ_TISNUM + | ins_next1 + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + break; + case BC_KNUM: + | // RA = dst*8, RC = num_const + | lsl RC, RC, #3 + | ins_next1 + | ldrd_i CARG1, CARG2, KBASE, RC + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + break; + case BC_KPRI: + | // RA = dst*8, RC = primitive_type (~) + | add RA, BASE, RA + | mvn RC, RC + | ins_next1 + | ins_next2 + | str RC, [RA, #4] + | ins_next3 + break; + case BC_KNIL: + | // RA = base*8, RC = end + | add RA, BASE, RA + | add RC, BASE, RC, lsl #3 + | mvn CARG1, #~LJ_TNIL + | str CARG1, [RA, #4] + | add RA, RA, #8 + |1: + | str CARG1, [RA, #4] + | cmp RA, RC + | add RA, RA, #8 + | blt <1 + | ins_next_ + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RC = uvnum + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsl RC, RC, #2 + | add RC, RC, #offsetof(GCfuncL, uvptr) + | ldr UPVAL:CARG2, [LFUNC:CARG2, RC] + | ldr CARG2, UPVAL:CARG2->v + | ldrd CARG3, CARG4, [CARG2] + | ins_next1 + | ins_next2 + | strd_i CARG3, CARG4, BASE, RA + | ins_next3 + break; + case BC_USETV: + | // RA = uvnum*8, RC = src + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsr RA, RA, #1 + | add RA, RA, #offsetof(GCfuncL, uvptr) + | lsl RC, RC, #3 + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] + | ldrd_i CARG3, CARG4, BASE, RC + | ldrb RB, UPVAL:CARG2->marked + | ldrb RC, UPVAL:CARG2->closed + | ldr CARG2, UPVAL:CARG2->v + | tst RB, #LJ_GC_BLACK // isblack(uv) + | add RB, CARG4, #-LJ_TISGCV + | it ne + | cmpne RC, #0 + | strd CARG3, CARG4, [CARG2] + | bne >2 // Upvalue is closed and black? + |1: + | ins_next + | + |2: // Check if new value is collectable. + | cmn RB, #-(LJ_TNUMX - LJ_TISGCV) + | it hi + | ldrbhi RC, GCOBJ:CARG3->gch.marked + | bls <1 // tvisgcv(v) + | sub CARG1, DISPATCH, #-GG_DISP2G + | tst RC, #LJ_GC_WHITES + | // Crossed a write barrier. Move the barrier forward. + |.if IOS + | beq <1 + | mov RC, BASE + | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | mov BASE, RC + |.else + | it ne + | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) + |.endif + | b <1 + break; + case BC_USETS: + | // RA = uvnum*8, RC = str_const (~) + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsr RA, RA, #1 + | add RA, RA, #offsetof(GCfuncL, uvptr) + | mvn RC, RC + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] + | ldr STR:CARG3, [KBASE, RC, lsl #2] + | ldrb RB, UPVAL:CARG2->marked + | ldrb RC, UPVAL:CARG2->closed + | ldr CARG2, UPVAL:CARG2->v + | mvn CARG4, #~LJ_TSTR + | tst RB, #LJ_GC_BLACK // isblack(uv) + | ldrb RB, STR:CARG3->marked + | strd CARG3, CARG4, [CARG2] + | bne >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | tst RB, #LJ_GC_WHITES // iswhite(str) + | it ne + | cmpne RC, #0 + | sub CARG1, DISPATCH, #-GG_DISP2G + | // Crossed a write barrier. Move the barrier forward. + |.if IOS + | beq <1 + | mov RC, BASE + | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | mov BASE, RC + |.else + | it ne + | blne extern lj_gc_barrieruv // (global_State *g, TValue *tv) + |.endif + | b <1 + break; + case BC_USETN: + | // RA = uvnum*8, RC = num_const + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsr RA, RA, #1 + | add RA, RA, #offsetof(GCfuncL, uvptr) + | lsl RC, RC, #3 + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] + | ldrd_i CARG3, CARG4, KBASE, RC + | ldr CARG2, UPVAL:CARG2->v + | ins_next1 + | ins_next2 + | strd CARG3, CARG4, [CARG2] + | ins_next3 + break; + case BC_USETP: + | // RA = uvnum*8, RC = primitive_type (~) + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | lsr RA, RA, #1 + | add RA, RA, #offsetof(GCfuncL, uvptr) + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA] + | mvn RC, RC + | ldr CARG2, UPVAL:CARG2->v + | ins_next1 + | ins_next2 + | str RC, [CARG2, #4] + | ins_next3 + break; + + case BC_UCLO: + | // RA = level*8, RC = target + | ldr CARG3, L->openupval + | add RC, PC, RC, lsl #2 + | str BASE, L->base + | cmp CARG3, #0 + | sub PC, RC, #0x20000 + | beq >1 + | mov CARG1, L + | add CARG2, BASE, RA + | bl extern lj_func_closeuv // (lua_State *L, TValue *level) + | ldr BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RC = proto_const (~) (holding function prototype) + | mvn RC, RC + | str BASE, L->base + | ldr CARG2, [KBASE, RC, lsl #2] + | str PC, SAVE_PC + | ldr CARG3, [BASE, FRAME_FUNC] + | mov CARG1, L + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | bl extern lj_func_newL_gc + | // Returns GCfuncL *. + | ldr BASE, L->base + | mvn CARG2, #~LJ_TFUNC + | ins_next1 + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RC = (hbits|asize) | tab_const (~) + if (op == BC_TDUP) { + | mvn RC, RC + } + | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.total) + | ldr CARG3, [CARG1] + | sub CARG1, DISPATCH, #-DISPATCH_GL(gc.threshold) + | ldr CARG4, [CARG1] + | str BASE, L->base + | str PC, SAVE_PC + | cmp CARG3, CARG4 + | mov CARG1, L + | bhs >5 + |1: + if (op == BC_TNEW) { + | lsl CARG2, RC, #21 + | lsr CARG3, RC, #11 + | asr RC, CARG2, #21 + | lsr CARG2, CARG2, #21 + | cmn RC, #1 + | it eq + | addeq CARG2, CARG2, #2 + | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) + | // Returns GCtab *. + } else { + | ldr CARG2, [KBASE, RC, lsl #2] + | bl extern lj_tab_dup // (lua_State *L, Table *kt) + | // Returns GCtab *. + } + | ldr BASE, L->base + | mvn CARG2, #~LJ_TTAB + | ins_next1 + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + |5: + | bl extern lj_gc_step_fixtop // (lua_State *L) + | mov CARG1, L + | b <1 + break; + + case BC_GGET: + | // RA = dst*8, RC = str_const (~) + case BC_GSET: + | // RA = dst*8, RC = str_const (~) + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | mvn RC, RC + | ldr TAB:CARG1, LFUNC:CARG2->env + | ldr STR:RC, [KBASE, RC, lsl #2] + if (op == BC_GGET) { + | b ->BC_TGETS_Z + } else { + | b ->BC_TSETS_Z + } + break; + + case BC_TGETV: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = dst*8, RB = table*8, RC = key*8 + | ldrd_i TAB:CARG1, CARG2, BASE, RB + | ldrd_i CARG3, CARG4, BASE, RC + | checktab CARG2, ->vmeta_tgetv // STALL: load CARG1, CARG2. + | checktp CARG4, LJ_TISNUM // Integer key? + | it eq + | ldreq CARG4, TAB:CARG1->array + | it eq + | ldreq CARG2, TAB:CARG1->asize + | bne >9 + | + | add CARG4, CARG4, CARG3, lsl #3 + | cmp CARG3, CARG2 // In array part? + | it lo + | ldrdlo CARG3, CARG4, [CARG4] + | bhs ->vmeta_tgetv + | ins_next1 // Overwrites RB! + | checktp CARG4, LJ_TNIL + | beq >5 + |1: + | ins_next2 + | strd_i CARG3, CARG4, BASE, RA + | ins_next3 + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG2, TAB:CARG1->metatable + | cmp TAB:CARG2, #0 + | beq <1 // No metatable: done. + | ldrb CARG2, TAB:CARG2->nomm + | tst CARG2, #1<vmeta_tgetv + | + |9: + | checktp CARG4, LJ_TSTR // String key? + | it eq + | moveq STR:RC, CARG3 + | beq ->BC_TGETS_Z + | b ->vmeta_tgetv + break; + case BC_TGETS: + | decode_RB8 RB, INS + | and RC, RC, #255 + | // RA = dst*8, RB = table*8, RC = str_const (~) + | ldrd_i CARG1, CARG2, BASE, RB + | mvn RC, RC + | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. + | checktab CARG2, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 + | ldr CARG3, TAB:CARG1->hmask + | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:CARG1->node + | mov TAB:RB, TAB:CARG1 + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 + |1: + | ldrd CARG1, CARG2, NODE:INS->key // STALL: early NODE:INS. + | ldrd CARG3, CARG4, NODE:INS->val + | ldr NODE:INS, NODE:INS->next + | checktp CARG2, LJ_TSTR + | it eq + | cmpeq CARG1, STR:RC + | bne >4 + | checktp CARG4, LJ_TNIL + | beq >5 + |3: + | ins_next1 + | ins_next2 + | strd_i CARG3, CARG4, BASE, RA + | ins_next3 + | + |4: // Follow hash chain. + | cmp NODE:INS, #0 + | bne <1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG1, TAB:RB->metatable + | mov CARG3, #0 // Optional clear of undef. value (during load stall). + | mvn CARG4, #~LJ_TNIL + | cmp TAB:CARG1, #0 + | beq <3 // No metatable: done. + | ldrb CARG2, TAB:CARG1->nomm + | tst CARG2, #1<vmeta_tgets + break; + case BC_TGETB: + | decode_RB8 RB, INS + | and RC, RC, #255 + | // RA = dst*8, RB = table*8, RC = index + | ldrd_i CARG1, CARG2, BASE, RB + | checktab CARG2, ->vmeta_tgetb // STALL: load CARG1, CARG2. + | ldr CARG3, TAB:CARG1->asize + | ldr CARG4, TAB:CARG1->array + | lsl CARG2, RC, #3 + | cmp RC, CARG3 + | ldrdlo_i CARG3, CARG4, CARG4, CARG2 + | bhs ->vmeta_tgetb + | ins_next1 // Overwrites RB! + | checktp CARG4, LJ_TNIL + | beq >5 + |1: + | ins_next2 + | strd_i CARG3, CARG4, BASE, RA + | ins_next3 + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG2, TAB:CARG1->metatable + | cmp TAB:CARG2, #0 + | beq <1 // No metatable: done. + | ldrb CARG2, TAB:CARG2->nomm + | tst CARG2, #1<vmeta_tgetb + break; + case BC_TGETR: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = dst*8, RB = table*8, RC = key*8 + | ldr TAB:CARG1, [BASE, RB] + | ldr CARG2, [BASE, RC] + | ldr CARG4, TAB:CARG1->array + | ldr CARG3, TAB:CARG1->asize + | add CARG4, CARG4, CARG2, lsl #3 + | cmp CARG2, CARG3 // In array part? + | bhs ->vmeta_tgetr + | ldrd CARG1, CARG2, [CARG4] + |->BC_TGETR_Z: + | ins_next1 + | ins_next2 + | strd_i CARG1, CARG2, BASE, RA + | ins_next3 + break; + + case BC_TSETV: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = src*8, RB = table*8, RC = key*8 + | ldrd_i TAB:CARG1, CARG2, BASE, RB + | ldrd_i CARG3, CARG4, BASE, RC + | checktab CARG2, ->vmeta_tsetv // STALL: load CARG1, CARG2. + | checktp CARG4, LJ_TISNUM // Integer key? + | it eq + | ldreq CARG2, TAB:CARG1->array + | it eq + | ldreq CARG4, TAB:CARG1->asize + | bne >9 + | + | add CARG2, CARG2, CARG3, lsl #3 + | cmp CARG3, CARG4 // In array part? + | it lo + | ldrlo INS, [CARG2, #4] + | bhs ->vmeta_tsetv + | ins_next1 // Overwrites RB! + | checktp INS, LJ_TNIL + | ldrb INS, TAB:CARG1->marked + | ldrd_i CARG3, CARG4, BASE, RA + | beq >5 + |1: + | tst INS, #LJ_GC_BLACK // isblack(table) + | strd CARG3, CARG4, [CARG2] + | bne >7 + |2: + | ins_next2 + | ins_next3 + | + |5: // Check for __newindex if previous value is nil. + | ldr TAB:RA, TAB:CARG1->metatable + | cmp TAB:RA, #0 + | beq <1 // No metatable: done. + | ldrb RA, TAB:RA->nomm + | tst RA, #1<vmeta_tsetv + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG1, INS, CARG3 + | b <2 + | + |9: + | checktp CARG4, LJ_TSTR // String key? + | it eq + | moveq STR:RC, CARG3 + | beq ->BC_TSETS_Z + | b ->vmeta_tsetv + break; + case BC_TSETS: + | decode_RB8 RB, INS + | and RC, RC, #255 + | // RA = src*8, RB = table*8, RC = str_const (~) + | ldrd_i CARG1, CARG2, BASE, RB + | mvn RC, RC + | ldr STR:RC, [KBASE, RC, lsl #2] // STALL: early RC. + | checktab CARG2, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 + | ldr CARG3, TAB:CARG1->hmask + | ldr CARG4, STR:RC->sid + | ldr NODE:INS, TAB:CARG1->node + | mov TAB:RB, TAB:CARG1 + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask + | add CARG3, CARG3, CARG3, lsl #1 + | mov CARG4, #0 + | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 + | strb CARG4, TAB:RB->nomm // Clear metamethod cache. + |1: + | ldrd CARG1, CARG2, NODE:INS->key + | ldr CARG4, NODE:INS->val.it + | ldr NODE:CARG3, NODE:INS->next + | checktp CARG2, LJ_TSTR + | it eq + | cmpeq CARG1, STR:RC + | bne >5 + | ldrb CARG2, TAB:RB->marked + | checktp CARG4, LJ_TNIL // Key found, but nil value? + | ldrd_i CARG3, CARG4, BASE, RA + | beq >4 + |2: + | tst CARG2, #LJ_GC_BLACK // isblack(table) + | strd CARG3, CARG4, NODE:INS->val + | bne >7 + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | ldr TAB:CARG1, TAB:RB->metatable + | cmp TAB:CARG1, #0 + | beq <2 // No metatable: done. + | ldrb CARG1, TAB:CARG1->nomm + | tst CARG1, #1<vmeta_tsets + | + |5: // Follow hash chain. + | movs NODE:INS, NODE:CARG3 + | bne <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | ldr TAB:CARG1, TAB:RB->metatable + | mov CARG3, TMPDp + | str PC, SAVE_PC + | cmp TAB:CARG1, #0 // No metatable: continue. + | str BASE, L->base + | it ne + | ldrbne CARG2, TAB:CARG1->nomm + | mov CARG1, L + | beq >6 + | tst CARG2, #1<vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | mvn CARG4, #~LJ_TSTR + | str STR:RC, TMPDlo + | mov CARG2, TAB:RB + | str CARG4, TMPDhi + | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Returns TValue *. + | ldr BASE, L->base + | ldrd_i CARG3, CARG4, BASE, RA + | strd CARG3, CARG4, [CRET1] + | b <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, CARG2, CARG3 + | b <3 + break; + case BC_TSETB: + | decode_RB8 RB, INS + | and RC, RC, #255 + | // RA = src*8, RB = table*8, RC = index + | ldrd_i CARG1, CARG2, BASE, RB + | checktab CARG2, ->vmeta_tsetb // STALL: load CARG1, CARG2. + | ldr CARG3, TAB:CARG1->asize + | ldr RB, TAB:CARG1->array + | lsl CARG2, RC, #3 + | cmp RC, CARG3 + | ldrdlo_iw CARG3, CARG4, CARG2, RB + | bhs ->vmeta_tsetb + | ins_next1 // Overwrites RB! + | checktp CARG4, LJ_TNIL + | ldrb INS, TAB:CARG1->marked + | ldrd_i CARG3, CARG4, BASE, RA + | beq >5 + |1: + | tst INS, #LJ_GC_BLACK // isblack(table) + | strd CARG3, CARG4, [CARG2] + | bne >7 + |2: + | ins_next2 + | ins_next3 + | + |5: // Check for __newindex if previous value is nil. + | ldr TAB:RA, TAB:CARG1->metatable + | cmp TAB:RA, #0 + | beq <1 // No metatable: done. + | ldrb RA, TAB:RA->nomm + | tst RA, #1<vmeta_tsetb + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG1, INS, CARG3 + | b <2 + break; + case BC_TSETR: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = src*8, RB = table*8, RC = key*8 + | ldr TAB:CARG2, [BASE, RB] + | ldr CARG3, [BASE, RC] + | ldrb INS, TAB:CARG2->marked + | ldr CARG1, TAB:CARG2->array + | ldr CARG4, TAB:CARG2->asize + | tst INS, #LJ_GC_BLACK // isblack(table) + | add CARG1, CARG1, CARG3, lsl #3 + | bne >7 + |2: + | cmp CARG3, CARG4 // In array part? + | bhs ->vmeta_tsetr + |->BC_TSETR_Z: + | ldrd_i CARG3, CARG4, BASE, RA + | ins_next1 + | ins_next2 + | strd CARG3, CARG4, [CARG1] + | ins_next3 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, INS, RB + | b <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RC = num_const (start index) + | add RA, BASE, RA + |1: + | ldr RB, SAVE_MULTRES + | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. + | ldr CARG1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. + | subs RB, RB, #8 + | ldr CARG4, TAB:CARG2->asize + | beq >4 // Nothing to copy? + | add CARG3, CARG1, RB, lsr #3 + | cmp CARG3, CARG4 + | ldr CARG4, TAB:CARG2->array + | add RB, RA, RB + | bhi >5 + | add INS, CARG4, CARG1, lsl #3 + | ldrb CARG1, TAB:CARG2->marked + |3: // Copy result slots to table. + | ldrd CARG3, CARG4, [RA], #8 + | strd CARG3, CARG4, [INS], #8 + | cmp RA, RB + | blo <3 + | tst CARG1, #LJ_GC_BLACK // isblack(table) + | bne >7 + |4: + | ins_next + | + |5: // Need to resize array part. + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | // Must not reallocate the stack. + | .IOS ldr BASE, L->base + | b <1 + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, CARG1, CARG3 + | b <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = nresults+1,) RC = extra_nargs + | ldr CARG1, SAVE_MULTRES + | decode_RC8 NARGS8:RC, INS + | add NARGS8:RC, NARGS8:RC, CARG1 + | b ->BC_CALL_Z + break; + case BC_CALL: + | decode_RC8 NARGS8:RC, INS + | // RA = base*8, (RB = nresults+1,) RC = (nargs+1)*8 + |->BC_CALL_Z: + | mov RB, BASE // Save old BASE for vmeta_call. + | ldrd_iw CARG3, CARG4, BASE, RA + | sub NARGS8:RC, NARGS8:RC, #8 + | add BASE, BASE, #8 + | checkfunc CARG4, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs + | ldr CARG1, SAVE_MULTRES + | add NARGS8:RC, CARG1, RC, lsl #3 + | b ->BC_CALLT1_Z + break; + case BC_CALLT: + | lsl NARGS8:RC, RC, #3 + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + |->BC_CALLT1_Z: + | ldrd_iw LFUNC:CARG3, CARG4, RA, BASE + | sub NARGS8:RC, NARGS8:RC, #8 + | add RA, RA, #8 + | checkfunc CARG4, ->vmeta_callt + | ldr PC, [BASE, FRAME_PC] + |->BC_CALLT2_Z: + | mov RB, #0 + | ldrb CARG4, LFUNC:CARG3->ffid + | tst PC, #FRAME_TYPE + | bne >7 + |1: + | str LFUNC:CARG3, [BASE, FRAME_FUNC] // Copy function down, but keep PC. + | cmp NARGS8:RC, #0 + | beq >3 + |2: + | ldrd_i CARG1, CARG2, RA, RB + | add INS, RB, #8 + | cmp INS, NARGS8:RC + | strd_i CARG1, CARG2, BASE, RB + | mov RB, INS + | bne <2 + |3: + | cmp CARG4, #1 // (> FF_C) Calling a fast function? + | bhi >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | ldr INS, [PC, #-4] + | decode_RA8 RA, INS + | sub CARG1, BASE, RA + | ldr LFUNC:CARG1, [CARG1, #-16] + | ldr CARG1, LFUNC:CARG1->field_pc + | ldr KBASE, [CARG1, #PC2PROTO(k)] + | b <4 + | + |7: // Tailcall from a vararg function. + | eor PC, PC, #FRAME_VARG + | tst PC, #FRAME_TYPEP // Vararg frame below? + | it ne + | movne CARG4, #0 // Clear ffid if no Lua function below. + | bne <1 + | sub BASE, BASE, PC + | ldr PC, [BASE, FRAME_PC] + | tst PC, #FRAME_TYPE + | it ne + | movne CARG4, #0 // Clear ffid if no Lua function below. + | b <1 + break; + + case BC_ITERC: + | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) + | add RA, BASE, RA + | mov RB, BASE // Save old BASE for vmeta_call. + | ldrd CARG3, CARG4, [RA, #-16] + | ldrd CARG1, CARG2, [RA, #-8] + | add BASE, RA, #8 + | strd CARG3, CARG4, [RA, #8] // Copy state. + | strd CARG1, CARG2, [RA, #16] // Copy control var. + | // STALL: locked CARG3, CARG4. + | ldrd LFUNC:CARG3, CARG4, [RA, #-24] + | mov NARGS8:RC, #16 // Iterators get 2 arguments. + | // STALL: load CARG3, CARG4. + | strd LFUNC:CARG3, CARG4, [RA] // Copy callable. + | checkfunc CARG4, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + |.if JIT + | hotloop + |.endif + |->vm_IITERN: + | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) + | add RA, BASE, RA + | ldr TAB:RB, [RA, #-16] + | ldr CARG1, [RA, #-8] // Get index from control var. + | ldr INS, TAB:RB->asize + | ldr CARG2, TAB:RB->array + | add PC, PC, #4 + |1: // Traverse array part. + | subs RC, CARG1, INS + | add CARG3, CARG2, CARG1, lsl #3 + | bhs >5 // Index points after array part? + | ldrd CARG3, CARG4, [CARG3] + | checktp CARG4, LJ_TNIL + | it eq + | addeq CARG1, CARG1, #1 // Skip holes in array part. + | beq <1 + | ldrh RC, [PC, #-2] + | mvn CARG2, #~LJ_TISNUM + | strd CARG3, CARG4, [RA, #8] + | add RC, PC, RC, lsl #2 + | add RB, CARG1, #1 + | strd CARG1, CARG2, [RA] + | sub PC, RC, #0x20000 + | str RB, [RA, #-8] // Update control var. + |3: + | ins_next + | + |5: // Traverse hash part. + | ldr CARG4, TAB:RB->hmask + | ldr NODE:RB, TAB:RB->node + |6: + | add CARG1, RC, RC, lsl #1 + | cmp RC, CARG4 // End of iteration? Branch to ITERL+1. + | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 + | bhi <3 + | ldrd CARG1, CARG2, NODE:CARG3->val + | checktp CARG2, LJ_TNIL + | add RC, RC, #1 + | beq <6 // Skip holes in hash part. + | ldrh RB, [PC, #-2] + | add RC, RC, INS + | ldrd CARG3, CARG4, NODE:CARG3->key + | str RC, [RA, #-8] // Update control var. + | strd CARG1, CARG2, [RA, #8] + | add RC, PC, RB, lsl #2 + | sub PC, RC, #0x20000 + | strd CARG3, CARG4, [RA] + | b <3 + break; + + case BC_ISNEXT: + | // RA = base*8, RC = target (points to ITERN) + | add RA, BASE, RA + | add RC, PC, RC, lsl #2 + | ldrd CFUNC:CARG1, CFUNC:CARG2, [RA, #-24] + | ldr CARG3, [RA, #-12] + | ldr CARG4, [RA, #-4] + | checktp CARG2, LJ_TFUNC + | it eq + | ldrbeq CARG1, CFUNC:CARG1->ffid + | checktpeq CARG3, LJ_TTAB + | checktpeq CARG4, LJ_TNIL + | it eq + | cmpeq CARG1, #FF_next_N + | it eq + | subeq PC, RC, #0x20000 + | bne >5 + | ins_next1 + | ins_next2 + | mov CARG1, #0 + | mvn CARG2, #~LJ_KEYINDEX + | strd CARG1, CARG2, [RA, #-8] // Initialize control var. + |1: + | ins_next3 + |5: // Despecialize bytecode if any of the checks fail. + | mov CARG1, #BC_JMP + | mov OP, #BC_ITERC + | strb CARG1, [PC, #-4] + | sub PC, RC, #0x20000 + |.if JIT + | ldrb CARG1, [PC] + | cmp CARG1, #BC_ITERN + | bne >6 + |.endif + | strb OP, [PC] // Subsumes ins_next1. + | ins_next2 + | b <1 + |.if JIT + |6: // Unpatch JLOOP. + | sub CARG2, DISPATCH, #-DISPATCH_J(trace) + | ldr CARG1, [CARG2] + | ldrh CARG2, [PC, #2] + | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] + | // Subsumes ins_next1 and ins_next2. + | ldr INS, TRACE:CARG1->startins + | .long 0xf36c0e07 //BFI INS, OP, #0, #8 + | str INS, [PC], #4 + | b <1 + |.endif + break; + + case BC_VARG: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ldr CARG1, [BASE, FRAME_PC] + | add RC, BASE, RC + | add RA, BASE, RA + | add RC, RC, #FRAME_VARG + | add CARG4, RA, RB + | sub CARG3, BASE, #8 // CARG3 = vtop + | sub RC, RC, CARG1 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | cmp RB, #0 + | sub CARG1, CARG3, RC + | beq >5 // Copy all varargs? + | sub CARG4, CARG4, #16 + |1: // Copy vararg slots to destination slots. + | cmp RC, CARG3 + | ite lo + | ldrdlo CARG1, CARG2, [RC], #8 + | mvnhs CARG2, #~LJ_TNIL + | cmp RA, CARG4 + | strd CARG1, CARG2, [RA], #8 + | blo <1 + |2: + | ins_next + | + |5: // Copy all varargs. + | ldr CARG4, L->maxstack + | cmp CARG1, #0 + | ite le + | movle RB, #8 // MULTRES = (0+1)*8 + | addgt RB, CARG1, #8 + | add CARG2, RA, CARG1 + | str RB, SAVE_MULTRES + | ble <2 + | cmp CARG2, CARG4 + | bhi >7 + |6: + | ldrd CARG1, CARG2, [RC], #8 + | strd CARG1, CARG2, [RA], #8 + | cmp RC, CARG3 + | blo <6 + | b <2 + | + |7: // Grow stack for varargs. + | lsr CARG2, CARG1, #3 + | str RA, L->top + | mov CARG1, L + | str BASE, L->base + | sub RC, RC, BASE // Need delta, because BASE may change. + | str PC, SAVE_PC + | sub RA, RA, BASE + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->base + | add RA, BASE, RA + | add RC, BASE, RC + | sub CARG3, BASE, #8 + | b <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RC = extra results + | ldr CARG1, SAVE_MULTRES + | ldr PC, [BASE, FRAME_PC] + | add RA, BASE, RA + | add RC, CARG1, RC, lsl #3 + | b ->BC_RETM_Z + break; + + case BC_RET: + | // RA = results*8, RC = nresults+1 + | ldr PC, [BASE, FRAME_PC] + | lsl RC, RC, #3 + | add RA, BASE, RA + |->BC_RETM_Z: + | str RC, SAVE_MULTRES + |1: + | ands CARG1, PC, #FRAME_TYPE + | eor CARG2, PC, #FRAME_VARG + | bne ->BC_RETV2_Z + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return + | ldr INS, [PC, #-4] + | subs CARG4, RC, #8 + | sub CARG3, BASE, #8 + | beq >3 + |2: + | ldrd CARG1, CARG2, [RA], #8 + | add BASE, BASE, #8 + | subs CARG4, CARG4, #8 + | strd CARG1, CARG2, [BASE, #-16] + | bne <2 + |3: + | decode_RA8 RA, INS + | sub CARG4, CARG3, RA + | decode_RB8 RB, INS + | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] + |5: + | cmp RB, RC // More results expected? + | bhi >6 + | mov BASE, CARG4 + | ldr CARG2, LFUNC:CARG1->field_pc + | ins_next1 + | ins_next2 + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | ins_next3 + | + |6: // Fill up results with nil. + | mvn CARG2, #~LJ_TNIL + | add BASE, BASE, #8 + | add RC, RC, #8 + | str CARG2, [BASE, #-12] + | b <5 + | + |->BC_RETV1_Z: // Non-standard return case. + | add RA, BASE, RA + |->BC_RETV2_Z: + | tst CARG2, #FRAME_TYPEP + | bne ->vm_return + | // Return from vararg function: relocate BASE down. + | sub BASE, BASE, CARG2 + | ldr PC, [BASE, FRAME_PC] + | b <1 + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RC = nresults+1 + | ldr PC, [BASE, FRAME_PC] + | lsl RC, RC, #3 + | str RC, SAVE_MULTRES + | ands CARG1, PC, #FRAME_TYPE + | eor CARG2, PC, #FRAME_VARG + | it eq + | ldreq INS, [PC, #-4] + | bne ->BC_RETV1_Z + if (op == BC_RET1) { + | ldrd_i CARG1, CARG2, BASE, RA + } + | sub CARG4, BASE, #8 + | decode_RA8 RA, INS + if (op == BC_RET1) { + | strd CARG1, CARG2, [CARG4] + } + | sub BASE, CARG4, RA + | decode_RB8 RB, INS + | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] + |5: + | cmp RB, RC + | bhi >6 + | ldr CARG2, LFUNC:CARG1->field_pc + | ins_next1 + | ins_next2 + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | ins_next3 + | + |6: // Fill up results with nil. + | sub CARG2, CARG4, #4 + | mvn CARG3, #~LJ_TNIL + | str CARG3, [CARG2, RC] + | add RC, RC, #8 + | b <5 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] + |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] + |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] + |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] + + case BC_FORL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RC = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | ldrd_iw CARG1, CARG2, RA, BASE + if (op != BC_JFORL) { + | add RC, PC, RC, lsl #2 + } + if (!vk) { + | ldrd CARG3, CARG4, FOR_STOP + | checktp CARG2, LJ_TISNUM + | ldr RB, FOR_TSTEP + | bne >5 + | checktp CARG4, LJ_TISNUM + | ldr CARG4, FOR_STEP + | checktpeq RB, LJ_TISNUM + | bne ->vmeta_for + | cmp CARG4, #0 + | blt >4 + | cmp CARG1, CARG3 + } else { + | ldrd CARG3, CARG4, FOR_STEP + | checktp CARG2, LJ_TISNUM + | bne >5 + | adds CARG1, CARG1, CARG3 + | ldr CARG4, FOR_STOP + if (op == BC_IFORL) { + | it vs + | addvs RC, PC, #0x20000 // Overflow: prevent branch. + } else { + | bvs >2 // Overflow: do not enter mcode. + } + | cmp CARG3, #0 + | blt >4 + | cmp CARG1, CARG4 + } + |1: + if (op == BC_FORI) { + | it gt + | subgt PC, RC, #0x20000 + } else if (op == BC_JFORI) { + | sub PC, RC, #0x20000 + | it le + | ldrhle RC, [PC, #-2] + } else if (op == BC_IFORL) { + | it le + | suble PC, RC, #0x20000 + } + if (vk) { + | strd CARG1, CARG2, FOR_IDX + } + |2: + | ins_next1 + | ins_next2 + | strd CARG1, CARG2, FOR_EXT + if (op == BC_JFORI || op == BC_JFORL) { + | ble =>BC_JLOOP + } + |3: + | ins_next3 + | + |4: // Invert check for negative step. + if (!vk) { + | cmp CARG3, CARG1 + } else { + | cmp CARG4, CARG1 + } + | b <1 + | + |5: // FP loop. + if (!vk) { + | itt lo + | cmnlo CARG4, #-LJ_TISNUM + | cmnlo RB, #-LJ_TISNUM + | bhs ->vmeta_for + |.if FPU + | vldr d0, FOR_IDX + | vldr d1, FOR_STOP + | cmp RB, #0 + | vstr d0, FOR_EXT + |.else + | cmp RB, #0 + | strd CARG1, CARG2, FOR_EXT + | blt >8 + |.endif + } else { + |.if FPU + | vldr d0, FOR_IDX + | vldr d2, FOR_STEP + | vldr d1, FOR_STOP + | cmp CARG4, #0 + | vadd.f64 d0, d0, d2 + |.else + | cmp CARG4, #0 + | blt >8 + | bl extern __aeabi_dadd + | strd CARG1, CARG2, FOR_IDX + | ldrd CARG3, CARG4, FOR_STOP + | strd CARG1, CARG2, FOR_EXT + |.endif + } + |6: + |.if FPU + | ite ge + | vcmpge.f64 d0, d1 + | vcmplt.f64 d1, d0 + | vmrs + |.else + | bl extern __aeabi_cdcmple + |.endif + if (vk) { + |.if FPU + | vstr d0, FOR_IDX + | vstr d0, FOR_EXT + |.endif + } + if (op == BC_FORI) { + | it hi + | subhi PC, RC, #0x20000 + } else if (op == BC_JFORI) { + | sub PC, RC, #0x20000 + | it ls + | ldrhls RC, [PC, #-2] + | bls =>BC_JLOOP + } else if (op == BC_IFORL) { + | it ls + | subls PC, RC, #0x20000 + } else { + | bls =>BC_JLOOP + } + | ins_next1 + | ins_next2 + | b <3 + | + |.if not FPU + |8: // Invert check for negative step. + if (vk) { + | bl extern __aeabi_dadd + | strd CARG1, CARG2, FOR_IDX + | strd CARG1, CARG2, FOR_EXT + } + | mov CARG3, CARG1 + | mov CARG4, CARG2 + | ldrd CARG1, CARG2, FOR_STOP + | b <6 + |.endif + break; + + case BC_ITERL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RC = target + | ldrd_iw CARG1, CARG2, RA, BASE + if (op == BC_JITERL) { + | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. + | it ne + | strdne CARG1, CARG2, [RA, #-8] + | bne =>BC_JLOOP + } else { + | add RC, PC, RC, lsl #2 + | // STALL: load CARG1, CARG2. + | cmn CARG2, #-LJ_TNIL // Stop if iterator returned nil. + | itt ne + | subne PC, RC, #0x20000 // Otherwise save control var + branch. + | strdne CARG1, CARG2, [RA, #-8] + } + | ins_next + break; + + case BC_LOOP: + | // RA = base*8, RC = target (loop extent) + | // Note: RA/RC is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RC = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | // RA = base (ignored), RC = traceno + | sub RB, DISPATCH, #-DISPATCH_J(trace) + | ldr CARG1, [RB] + | mov CARG2, #0 // Traces on ARM don't store the trace number, so use 0. + | ldr TRACE:RC, [CARG1, RC, lsl #2] + | st_vmstate CARG2 + | ldr RA, TRACE:RC->mcode + | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] + | sub RB, DISPATCH, #-DISPATCH_GL(tmpbuf.L) + | str L, [RB] + | add RA, RA, #1 + | bx RA + |.endif + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RC = target + | add RC, PC, RC, lsl #2 + | sub PC, RC, #0x20000 + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + |.if JIT + | hotcall + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 + | ldr CARG1, L->maxstack + | ldrb CARG2, [PC, #-4+PC2PROTO(numparams)] + | ldr KBASE, [PC, #-4+PC2PROTO(k)] + | cmp RA, CARG1 + | bhi ->vm_growstack_l + if (op != BC_JFUNCF) { + | ins_next1 + | ins_next2 + } + |2: + | cmp NARGS8:RC, CARG2, lsl #3 // Check for missing parameters. + | mvn CARG4, #~LJ_TNIL + | blo >3 + if (op == BC_JFUNCF) { + | decode_RD RC, INS + | b =>BC_JLOOP + } else { + | ins_next3 + } + | + |3: // Clear missing parameters. + | strd_i CARG3, CARG4, BASE, NARGS8:RC + | add NARGS8:RC, NARGS8:RC, #8 + | b <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 + | ldr CARG1, L->maxstack + | add CARG4, BASE, RC + | add RA, RA, RC + | str LFUNC:CARG3, [CARG4] // Store copy of LFUNC. + | add CARG2, RC, #8+FRAME_VARG + | ldr KBASE, [PC, #-4+PC2PROTO(k)] + | cmp RA, CARG1 + | str CARG2, [CARG4, #4] // Store delta + FRAME_VARG. + | bhs ->vm_growstack_l + | ldrb RB, [PC, #-4+PC2PROTO(numparams)] + | mov RA, BASE + | mov RC, CARG4 + | cmp RB, #0 + | add BASE, CARG4, #8 + | beq >3 + | mvn CARG3, #~LJ_TNIL + |1: + | cmp RA, RC // Less args than parameters? + | ite lo + | ldrdlo CARG1, CARG2, [RA], #8 + | movhs CARG2, CARG3 + | it lo + | strlo CARG3, [RA, #-4] // Clear old fixarg slot (help the GC). + |2: + | subs RB, RB, #1 + | strd CARG1, CARG2, [CARG4, #8]! + | bne <1 + |3: + | ins_next + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ldr CARG4, CFUNC:CARG3->f + } else { + | ldr CARG4, [DISPATCH, #DISPATCH_GL(wrapf)] + } + | add CARG2, RA, NARGS8:RC + | ldr CARG1, L->maxstack + | add RC, BASE, NARGS8:RC + | str BASE, L->base + | cmp CARG2, CARG1 + | str RC, L->top + if (op == BC_FUNCCW) { + | ldr CARG2, CFUNC:CARG3->f + } + | mv_vmstate CARG3, C + | mov CARG1, L + | bhi ->vm_growstack_c // Need to grow stack. + | st_vmstate CARG3 + | blx CARG4 // (lua_State *L [, lua_CFunction f]) + | // Returns nresults. + | ldr BASE, L->base + | mv_vmstate CARG3, INTERP + | ldr CRET2, L->top + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | lsl RC, CRET1, #3 + | st_vmstate CARG3 + | ldr PC, [BASE, FRAME_PC] + | sub RA, CRET2, RC // RA = L->top - nresults*8 + | b ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 0xe\n" /* Return address is in lr. */ + "\t.byte 0xc\n\t.uleb128 0xd\n\t.uleb128 0\n" /* def_cfa sp */ + "\t.align 2\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.long .Lbegin\n" + "\t.long %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x8e\n\t.uleb128 1\n", /* offset lr */ + fcofs, CFRAME_SIZE); + for (i = 11; i >= (LJ_ARCH_HASFPU ? 5 : 4); i--) /* offset r4-r11 */ + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2+(11-i)); +#if LJ_ARCH_HASFPU + for (i = 15; i >= 8; i--) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 %d, %d\n", + 64+2*i, 10+2*(15-i)); + fprintf(ctx->fp, "\t.byte 0x84\n\t.uleb128 %d\n", 25); /* offset r4 */ +#endif + fprintf(ctx->fp, + "\t.align 2\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.long lj_vm_ffi_call\n" + "\t.long %d\n" + "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ + "\t.byte 0x8e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x8b\n\t.uleb128 2\n" /* offset r11 */ + "\t.byte 0x85\n\t.uleb128 3\n" /* offset r5 */ + "\t.byte 0x84\n\t.uleb128 4\n" /* offset r4 */ + "\t.byte 0xd\n\t.uleb128 0xb\n" /* def_cfa_register r11 */ + "\t.align 2\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif + break; + default: + break; + } +} +