ARM64: Improve IR_HREF code generation.

Thanks to Peter Cawley. #1070
This commit is contained in:
Mike Pall 2023-09-09 17:15:26 +02:00
parent 315dc3e776
commit 435d8c6301

View File

@ -773,57 +773,36 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
int destused = ra_used(ir); int destused = ra_used(ir);
Reg dest = ra_dest(as, ir, allow); Reg dest = ra_dest(as, ir, allow);
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
Reg key = 0, tmp = RID_TMP; Reg key = 0, tmp = RID_TMP, type = RID_NONE, tkey;
Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
IRRef refkey = ir->op2; IRRef refkey = ir->op2;
IRIns *irkey = IR(refkey); IRIns *irkey = IR(refkey);
int isk = irref_isk(ir->op2); int isk = irref_isk(refkey);
IRType1 kt = irkey->t; IRType1 kt = irkey->t;
uint32_t k = 0; uint32_t k = 0;
uint32_t khash; uint32_t khash;
MCLabel l_end, l_loop, l_next; MCLabel l_end, l_loop;
rset_clear(allow, tab); rset_clear(allow, tab);
if (!isk) { /* Allocate registers outside of the loop. */
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); if (irkey->o != IR_KNUM || !(k = emit_isk12((int64_t)ir_knum(irkey)->u64))) {
key = ra_alloc1(as, refkey, irt_isnum(kt) ? RSET_FPR : allow);
rset_clear(allow, key); rset_clear(allow, key);
if (!irt_isstr(kt)) {
tmp = ra_scratch(as, allow);
rset_clear(allow, tmp);
} }
if (!isk) {
tkey = ra_scratch(as, allow);
rset_clear(allow, tkey);
} else if (irt_isnum(kt)) { } else if (irt_isnum(kt)) {
int64_t val = (int64_t)ir_knum(irkey)->u64; tkey = key; /* Assumes -0.0 is already canonicalized to +0.0. */
if (!(k = emit_isk12(val))) {
key = ra_allock(as, val, allow);
rset_clear(allow, key);
}
} else if (!irt_ispri(kt)) {
if (!(k = emit_isk12(irkey->i))) {
key = ra_alloc1(as, refkey, allow);
rset_clear(allow, key);
}
}
/* Allocate constants early. */
if (irt_isnum(kt)) {
if (!isk) {
tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
rset_clear(allow, tisnum);
}
} else if (irt_isaddr(kt)) {
if (isk) {
int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
scr = ra_allock(as, kk, allow);
} else { } else {
scr = ra_scratch(as, allow); int64_t kk;
} if (irt_isaddr(kt)) {
rset_clear(allow, scr); kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
} else { } else {
lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow); kk = ~((int64_t)~irt_toitype(kt) << 47);
scr = ra_scratch(as, rset_clear(allow, type)); }
rset_clear(allow, scr); tkey = ra_allock(as, kk, allow);
rset_clear(allow, tkey);
} }
/* Key not found in chain: jump to exit (if merged) or load niltv. */ /* Key not found in chain: jump to exit (if merged) or load niltv. */
@ -839,50 +818,31 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
/* Follow hash chain until the end. */ /* Follow hash chain until the end. */
l_loop = --as->mcp; l_loop = --as->mcp;
emit_n(as, A64I_CMPx^A64I_K12^0, dest); if (destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
l_next = emit_label(as);
/* Type and value comparison. */ /* Type and value comparison. */
if (merge == IR_EQ) if (merge == IR_EQ)
asm_guardcc(as, CC_EQ); asm_guardcc(as, CC_EQ);
else else
emit_cond_branch(as, CC_EQ, l_end); emit_cond_branch(as, CC_EQ, l_end);
emit_nm(as, A64I_CMPx^k, tmp, tkey);
if (!destused)
emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key));
*l_loop = A64I_X | A64I_CBNZ | A64F_S19(as->mcp - l_loop) | dest;
/* Construct tkey as canonicalized or tagged key. */
if (!isk) {
if (irt_isnum(kt)) { if (irt_isnum(kt)) {
if (isk) { emit_dnm(as, A64I_CSELx | A64F_CC(CC_EQ), tkey, RID_ZERO, tkey);
/* Assumes -0.0 is already canonicalized to +0.0. */
if (k)
emit_n(as, A64I_CMPx^k, tmp);
else
emit_nm(as, A64I_CMPx, key, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else { } else {
emit_nm(as, A64I_FCMPd, key, ftmp); lj_assertA(irt_isaddr(kt), "bad HREF key type");
emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); type = ra_allock(as, irt_toitype(kt) << 15, allow);
emit_cond_branch(as, CC_LO, l_next); emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 32), tkey, key, type);
emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
} }
} else if (irt_isaddr(kt)) {
if (isk) {
emit_nm(as, A64I_CMPx, scr, tmp);
emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
} else {
emit_nm(as, A64I_CMPx, tmp, scr);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
}
} else {
emit_nm(as, A64I_CMPx, scr, type);
emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
} }
*l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
if (!isk && irt_isaddr(kt)) {
type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
rset_clear(allow, type);
}
/* Load main position relative to tab->node into dest. */ /* Load main position relative to tab->node into dest. */
khash = isk ? ir_khash(as, irkey) : 1; khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) { if (khash == 0) {
@ -896,7 +856,6 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_dnm(as, A64I_ANDw, dest, dest, tmphash); emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
} else if (irt_isstr(kt)) { } else if (irt_isstr(kt)) {
/* Fetch of str->sid is cheaper than ra_allock. */
emit_dnm(as, A64I_ANDw, dest, dest, tmp); emit_dnm(as, A64I_ANDw, dest, dest, tmp);
emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
@ -905,23 +864,18 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
emit_dnm(as, A64I_SUBw, dest, dest, tmp); emit_dnm(as, A64I_SUBw, dest, dest, tmp);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
emit_dnm(as, A64I_EORw, dest, dest, tmp); emit_dnm(as, A64I_EORw | A64F_SH(A64SH_ROR, 32-HASH_ROT2), dest, tmp, dest);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
emit_dnm(as, A64I_SUBw, tmp, tmp, dest); emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
emit_dnm(as, A64I_EORw, tmp, tmp, dest);
if (irt_isnum(kt)) { if (irt_isnum(kt)) {
emit_dnm(as, A64I_EORw, tmp, tkey, dest);
emit_dnm(as, A64I_ADDw, dest, dest, dest); emit_dnm(as, A64I_ADDw, dest, dest, dest);
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, tkey);
emit_dm(as, A64I_MOVw, tmp, dest); emit_nm(as, A64I_FCMPZd, (key & 31), 0);
emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); emit_dn(as, A64I_FMOV_R_D, tkey, (key & 31));
} else { } else {
checkmclim(as); emit_dnm(as, A64I_EORw, tmp, key, dest);
emit_dm(as, A64I_MOVw, tmp, key); emit_dnm(as, A64I_EORx | A64F_SH(A64SH_LSR, 32), dest, type, key);
emit_dnm(as, A64I_EORw, dest, dest,
ra_allock(as, irt_toitype(kt) << 15, allow));
emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
emit_dm(as, A64I_MOVx, dest, key);
} }
} }
} }