From d1194a82eb24afa1c749a0a8080b67d168f9f201 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 8 Sep 2013 02:53:23 +0200 Subject: [PATCH] Low-overhead profiler, part 4: JIT compiler support. --- src/Makefile.dep | 8 +++---- src/lj_asm.c | 1 + src/lj_asm_arm.h | 10 ++++++++ src/lj_asm_mips.h | 11 +++++++++ src/lj_asm_ppc.h | 11 +++++++++ src/lj_asm_x86.h | 10 ++++++++ src/lj_ir.h | 1 + src/lj_jit.h | 6 +++++ src/lj_opt_fold.c | 11 +++++++++ src/lj_profile.c | 17 +++++++++++++- src/lj_record.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ src/lj_trace.c | 17 ++++++++------ 12 files changed, 151 insertions(+), 12 deletions(-) diff --git a/src/Makefile.dep b/src/Makefile.dep index 7991bd16..3f080374 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -170,12 +170,12 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_vm.h lj_vmevent.h lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ - lj_jit.h lj_ir.h lj_profile.h luajit.h + lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ - lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ - lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ - lj_ffrecord.h lj_snap.h lj_vm.h + lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ + lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ + lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ diff --git a/src/lj_asm.c b/src/lj_asm.c index ee1cc5b5..2bf273e1 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -1565,6 +1565,7 @@ static void asm_ir(ASMState *as, IRIns *ir) case IR_PHI: asm_phi(as, ir); break; case IR_HIOP: asm_hiop(as, ir); break; case IR_GCSTEP: asm_gcstep(as, ir); break; + case IR_PROF: asm_prof(as, ir); break; /* Guarded assertions. */ case IR_LT: case IR_GE: case IR_LE: case IR_GT: diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 497a5692..d736859c 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1915,6 +1915,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) #endif } +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); + emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); +} + /* -- Stack handling ------------------------------------------------------ */ /* Check Lua stack size for overflow. Use exit handler as fallback. */ diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 74eff10b..365538bb 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -1562,6 +1562,17 @@ static void asm_hiop(ASMState *as, IRIns *ir) #endif } +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); + emit_lsglptr(as, MIPSI_LBU, RID_TMP, + (int32_t)offsetof(global_State, hookmask)); +} + /* -- Stack handling ------------------------------------------------------ */ /* Check Lua stack size for overflow. Use exit handler as fallback. */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 652dcca0..e0dcaed0 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -1738,6 +1738,17 @@ static void asm_hiop(ASMState *as, IRIns *ir) #endif } +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); + emit_lsglptr(as, PPCI_LBZ, RID_TMP, + (int32_t)offsetof(global_State, hookmask)); +} + /* -- Stack handling ------------------------------------------------------ */ /* Check Lua stack size for overflow. Use exit handler as fallback. */ diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 82517600..8b72c5e8 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -2348,6 +2348,16 @@ static void asm_hiop(ASMState *as, IRIns *ir) #endif } +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_i8(as, HOOK_PROFILE); + emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask); +} + /* -- Stack handling ------------------------------------------------------ */ /* Check Lua stack size for overflow. Use exit handler as fallback. */ diff --git a/src/lj_ir.h b/src/lj_ir.h index 841153d8..54bbbdda 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -40,6 +40,7 @@ _(USE, S , ref, ___) \ _(PHI, S , ref, ref) \ _(RENAME, S , ref, lit) \ + _(PROF, S , ___, ___) \ \ /* Constants. */ \ _(KPRI, N , ___, ___) \ diff --git a/src/lj_jit.h b/src/lj_jit.h index 2683b462..cfb04aa7 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -396,6 +396,12 @@ typedef struct jit_State { size_t szallmcarea; /* Total size of all allocated mcode areas. */ TValue errinfo; /* Additional info element for trace errors. */ + +#if LJ_HASPROFILE + GCproto *prev_pt; /* Previous prototype. */ + BCLine prev_line; /* Previous line. */ + int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ +#endif } #if LJ_TARGET_ARM LJ_ALIGN(16) /* For DISPATCH-relative addresses in assembler part. */ diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 1d37a7fd..84c5dc00 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2285,6 +2285,17 @@ LJFOLDF(barrier_tnew_tdup) return DROPFOLD; } +/* -- Profiling ----------------------------------------------------------- */ + +LJFOLD(PROF any any) +LJFOLDF(prof) +{ + IRRef ref = J->chain[IR_PROF]; + if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */ + return ref; + return EMITFOLD; +} + /* -- Stores and allocations ---------------------------------------------- */ /* Stores and allocations cannot be folded or passed on to CSE in general. diff --git a/src/lj_profile.c b/src/lj_profile.c index 0baad06c..a58aefc8 100644 --- a/src/lj_profile.c +++ b/src/lj_profile.c @@ -14,6 +14,10 @@ #include "lj_frame.h" #include "lj_debug.h" #include "lj_dispatch.h" +#if LJ_HASJIT +#include "lj_jit.h" +#include "lj_trace.h" +#endif #include "lj_profile.h" #include "luajit.h" @@ -218,13 +222,20 @@ LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, ProfileState *ps = &profile_state; int interval = LJ_PROFILE_INTERVAL_DEFAULT; while (*mode) { - switch (*mode++) { + int m = *mode++; + switch (m) { case 'i': interval = 0; while (*mode >= '0' && *mode <= '9') interval = interval * 10 + (*mode++ - '0'); if (interval <= 0) interval = 1; break; +#if LJ_HASJIT + case 'l': case 'f': + L2J(L)->prof_mode = m; + lj_trace_flushall(L); + break; +#endif default: /* Ignore unknown mode chars. */ break; } @@ -251,6 +262,10 @@ LUA_API void luaJIT_profile_stop(lua_State *L) profile_timer_stop(ps); g->hookmask &= ~HOOK_PROFILE; lj_dispatch_update(g); +#if LJ_HASJIT + G2J(g)->prof_mode = 0; + lj_trace_flushall(L); +#endif lj_buf_free(g, &ps->sb); setmref(ps->sb.b, NULL); setmref(ps->sb.e, NULL); diff --git a/src/lj_record.c b/src/lj_record.c index bdf0212a..3dcb0a85 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -20,6 +20,9 @@ #endif #include "lj_bc.h" #include "lj_ff.h" +#if LJ_HASPROFILE +#include "lj_debug.h" +#endif #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" @@ -579,6 +582,52 @@ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) } /* Side trace continues across a loop that's left or not entered. */ } +/* -- Record profiler hook checks ----------------------------------------- */ + +#if LJ_HASPROFILE + +/* Need to insert profiler hook check? */ +static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) +{ + GCproto *ppt; + lua_assert(J->prof_mode == 'f' || J->prof_mode == 'l'); + if (!pt) + return 0; + ppt = J->prev_pt; + J->prev_pt = pt; + if (pt != ppt && ppt) { + J->prev_line = -1; + return 1; + } + if (J->prof_mode == 'l') { + BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc)); + BCLine pline = J->prev_line; + J->prev_line = line; + if (pline != line) + return 1; + } + return 0; +} + +static void rec_profile_ins(jit_State *J, const BCIns *pc) +{ + if (J->prof_mode && rec_profile_need(J, J->pt, pc)) { + emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); + lj_snap_add(J); + } +} + +static void rec_profile_ret(jit_State *J) +{ + if (J->prof_mode == 'f') { + emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); + J->prev_pt = NULL; + lj_snap_add(J); + } +} + +#endif + /* -- Record calls and returns -------------------------------------------- */ /* Specialize to the runtime value of the called function or its prototype. */ @@ -1770,6 +1819,10 @@ void lj_record_ins(jit_State *J) rec_check_ir(J); #endif +#if LJ_HASPROFILE + rec_profile_ins(J, pc); +#endif + /* Keep a copy of the runtime values of var/num/str operands. */ #define rav (&ix.valv) #define rbv (&ix.tabv) @@ -2074,6 +2127,9 @@ void lj_record_ins(jit_State *J) rc = (BCReg)(J->L->top - J->L->base) - ra + 1; /* fallthrough */ case BC_RET: case BC_RET0: case BC_RET1: +#if LJ_HASPROFILE + rec_profile_ret(J); +#endif lj_record_ret(J, ra, (ptrdiff_t)rc-1); break; @@ -2303,6 +2359,10 @@ void lj_record_setup(jit_State *J) if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); } +#if LJ_HASPROFILE + J->prev_pt = NULL; + J->prev_line = -1; +#endif #ifdef LUAJIT_ENABLE_CHECKHOOK /* Regularly check for instruction/line hooks from compiled code and ** exit to the interpreter if the hooks are set. diff --git a/src/lj_trace.c b/src/lj_trace.c index c70fc247..2b8d931f 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -766,17 +766,20 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) if (errcode) return -errcode; /* Return negated error code. */ - lj_vmevent_send(L, TEXIT, - lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); - trace_exit_regs(L, ex); - ); + if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) + lj_vmevent_send(L, TEXIT, + lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); + setintV(L->top++, J->parent); + setintV(L->top++, J->exitno); + trace_exit_regs(L, ex); + ); pc = exd.pc; cf = cframe_raw(L->cframe); setcframe_pc(cf, pc); - if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { + if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { + /* Just exit to interpreter. */ + } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { if (!(G(L)->hookmask & HOOK_GC)) lj_gc_step(L); /* Exited because of GC: drive GC forward. */ } else {