diff --git a/src/Makefile b/src/Makefile index 441feffb..e7f48fdd 100644 --- a/src/Makefile +++ b/src/Makefile @@ -446,7 +446,7 @@ LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_strfmt.o lj_api.o \ + lj_strfmt.o lj_api.o lj_profile.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ diff --git a/src/Makefile.dep b/src/Makefile.dep index 0ea0d98e..7991bd16 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -102,7 +102,7 @@ lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \ lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \ lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h + lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ @@ -168,6 +168,9 @@ lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ lj_vm.h lj_vmevent.h +lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ + lj_jit.h lj_ir.h lj_profile.h luajit.h lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ @@ -180,7 +183,7 @@ lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ - lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h + lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h luajit.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ @@ -208,20 +211,21 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h lj_char.c \ lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c \ lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c \ - lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h \ - lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_strfmt.c lj_api.c \ - lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ - lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ - lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ - lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ - lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \ - lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \ - lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \ - lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \ - lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \ - lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \ - lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \ - lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c + lj_state.c lj_lex.h lj_alloc.h luajit.h lj_dispatch.c lj_ccallback.h \ + lj_profile.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c \ + lj_strfmt.c lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \ + lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \ + lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ + lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ + lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \ + lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ + lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ + lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ + lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ + lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ + lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ + lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ + lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/src/lj_arch.h b/src/lj_arch.h index c5f2fb3d..ccb54270 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -366,6 +366,18 @@ #define LJ_HASFFI 1 #endif +#if defined(LUAJIT_DISABLE_PROFILE) +#define LJ_HASPROFILE 0 +#elif LJ_TARGET_POSIX +#define LJ_HASPROFILE 1 +#define LJ_PROFILE_SIGPROF 1 +#elif LJ_TARGET_WINDOWS +#define LJ_HASPROFILE 1 +#define LJ_PROFILE_WTHREAD 1 +#else +#define LJ_HASPROFILE 0 +#endif + #ifndef LJ_ARCH_HASFPU #define LJ_ARCH_HASFPU 1 #endif diff --git a/src/lj_debug.c b/src/lj_debug.c index 24efd2ff..8166fcc0 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -28,7 +28,7 @@ cTValue *lj_debug_frame(lua_State *L, int level, int *size) /* Traverse frames backwards. */ for (nextframe = frame = L->base-1; frame > bot; ) { if (frame_gc(frame) == obj2gco(L)) - level++; /* Skip dummy frames. See lj_meta_call(). */ + level++; /* Skip dummy frames. See lj_err_optype_call(). */ if (level-- == 0) { *size = (int)(nextframe - frame); return frame; /* Level found. */ @@ -278,9 +278,9 @@ restart: } /* Deduce function name from caller of a frame. */ -const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) +const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name) { - TValue *pframe; + cTValue *pframe; GCfunc *fn; BCPos pc; if (frame <= tvref(L->stack)) @@ -534,6 +534,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar) } } +#if LJ_HASPROFILE +/* Put the chunkname into a buffer. */ +static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip) +{ + GCstr *name = proto_chunkname(pt); + const char *p = strdata(name); + if (pt->firstline == ~(BCLine)0) { + lj_buf_putmem(sb, "[builtin:", 9); + lj_buf_putstr(sb, name); + lj_buf_putb(sb, ']'); + return 0; + } + if (*p == '=' || *p == '@') { + MSize len = name->len-1; + p++; + if (pathstrip) { + int i; + for (i = len-1; i >= 0; i--) + if (p[i] == '/' || p[i] == '\\') { + len -= i+1; + p = p+i+1; + break; + } + } + lj_buf_putmem(sb, p, len); + } else { + lj_buf_putmem(sb, "[string]", 9); + } + return 1; +} + +/* Put a compact stack dump into a buffer. */ +void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth) +{ + int level = 0, dir = 1, pathstrip = 1; + MSize lastlen = 0; + if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */ + while (level != depth) { /* Loop through all frame. */ + int size; + cTValue *frame = lj_debug_frame(L, level, &size); + if (frame) { + cTValue *nextframe = size ? frame+size : NULL; + GCfunc *fn = frame_func(frame); + const uint8_t *p = (const uint8_t *)fmt; + int c; + while ((c = *p++)) { + switch (c) { + case 'p': /* Preserve full path. */ + pathstrip = 0; + break; + case 'F': case 'f': { /* Dump function name. */ + const char *name; + const char *what = lj_debug_funcname(L, frame, &name); + if (what) { + if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */ + GCproto *pt = funcproto(fn); + if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */ + debug_putchunkname(sb, pt, pathstrip); + lj_buf_putb(sb, ':'); + } + } + lj_buf_putmem(sb, name, (MSize)strlen(name)); + break; + } /* else: can't derive a name, dump module:line. */ + } + /* fallthrough */ + case 'l': /* Dump module:line. */ + if (isluafunc(fn)) { + GCproto *pt = funcproto(fn); + if (debug_putchunkname(sb, pt, pathstrip)) { + /* Regular Lua function. */ + BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) : + pt->firstline; + lj_buf_putb(sb, ':'); + lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline); + } + } else if (isffunc(fn)) { /* Dump numbered builtins. */ + lj_buf_putmem(sb, "[builtin#", 9); + lj_strfmt_putint(sb, fn->c.ffid); + lj_buf_putb(sb, ']'); + } else { /* Dump C function address. */ + lj_buf_putb(sb, '@'); + lj_strfmt_putptr(sb, fn->c.f); + } + break; + case 'Z': /* Zap trailing separator. */ + lastlen = sbuflen(sb); + break; + default: + lj_buf_putb(sb, c); + break; + } + } + } else if (dir == 1) { + break; + } else { + level -= size; /* Reverse frame order: quickly skip missing level. */ + } + level += dir; + } + if (lastlen) + setsbufP(sb, sbufB(sb) + lastlen); /* Zap trailing separator. */ +} +#endif + /* Number of frames for the leading and trailing part of a traceback. */ #define TRACEBACK_LEVELS1 12 #define TRACEBACK_LEVELS2 10 diff --git a/src/lj_debug.h b/src/lj_debug.h index 4144b47e..d93380ac 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h @@ -32,7 +32,7 @@ LJ_FUNC const char *lj_debug_uvname(GCproto *pt, uint32_t idx); LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp); LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, BCReg slot, const char **name); -LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, +LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name); LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line); LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, @@ -40,6 +40,10 @@ LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext); +#if LJ_HASPROFILE +LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, + int depth); +#endif /* Fixed internal variable names. */ #define VARNAMEDEF(_) \ diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index d76dda41..01bc4239 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -27,6 +27,9 @@ #endif #include "lj_trace.h" #include "lj_dispatch.h" +#if LJ_HASPROFILE +#include "lj_profile.h" +#endif #include "lj_vm.h" #include "luajit.h" @@ -84,11 +87,12 @@ void lj_dispatch_init_hotcount(global_State *g) #endif /* Internal dispatch mode bits. */ -#define DISPMODE_JIT 0x01 /* JIT compiler on. */ -#define DISPMODE_REC 0x02 /* Recording active. */ +#define DISPMODE_CALL 0x01 /* Override call dispatch. */ +#define DISPMODE_RET 0x02 /* Override return dispatch. */ #define DISPMODE_INS 0x04 /* Override instruction dispatch. */ -#define DISPMODE_CALL 0x08 /* Override call dispatch. */ -#define DISPMODE_RET 0x10 /* Override return dispatch. */ +#define DISPMODE_JIT 0x10 /* JIT compiler on. */ +#define DISPMODE_REC 0x20 /* Recording active. */ +#define DISPMODE_PROF 0x40 /* Profiling active. */ /* Update dispatch table depending on various flags. */ void lj_dispatch_update(global_State *g) @@ -99,6 +103,9 @@ void lj_dispatch_update(global_State *g) mode |= (G2J(g)->flags & JIT_F_ON) ? DISPMODE_JIT : 0; mode |= G2J(g)->state != LJ_TRACE_IDLE ? (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; +#endif +#if LJ_HASPROFILE + mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0; #endif mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; @@ -128,9 +135,9 @@ void lj_dispatch_update(global_State *g) disp[GG_LEN_DDISP+BC_LOOP] = f_loop; /* Set dynamic instruction dispatch. */ - if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { + if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) { /* Need to update the whole table. */ - if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ + if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */ /* Copy static dispatch table to dynamic dispatch table. */ memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); /* Overwrite with dynamic return dispatch. */ @@ -142,12 +149,13 @@ void lj_dispatch_update(global_State *g) } } else { /* The recording dispatch also checks for hooks. */ - ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; + ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook : + (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; uint32_t i; for (i = 0; i < GG_LEN_SDISP; i++) disp[i] = f; } - } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { + } else if (!(mode & DISPMODE_INS)) { /* Otherwise set dynamic counting ins. */ disp[BC_FORL] = f_forl; disp[BC_ITERL] = f_iterl; @@ -495,3 +503,34 @@ out: return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ } +#if LJ_HASPROFILE +/* Profile dispatch. */ +void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc) +{ + ERRNO_SAVE + global_State *g = G(L); + uint8_t mask = g->hookmask; + g->hookmask = (mask & ~HOOK_PROFILE); + lj_dispatch_update(g); + if (!(mask & HOOK_VMEVENT)) { + GCfunc *fn = curr_func(L); + GCproto *pt = funcproto(fn); + void *cf = cframe_raw(L->cframe); + const BCIns *oldpc = cframe_pc(cf); + uint8_t oldh = hook_save(g); + BCReg slots; + hook_vmevent(g); + setcframe_pc(cf, pc); + slots = cur_topslot(pt, pc, cframe_multres_n(cf)); + L->top = L->base + slots; /* Fix top. */ + lj_profile_interpreter(L); + setgcref(g->cur_L, obj2gco(L)); + setcframe_pc(cf, oldpc); + hook_restore(g, oldh); + lj_trace_abort(g); + setvmstate(g, INTERP); + } + ERRNO_RESTORE +} +#endif + diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 326297cd..811a0ae4 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -29,7 +29,7 @@ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ _(pow) _(fmod) _(ldexp) \ - _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) \ + _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_profile) _(lj_err_throw)\ _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ @@ -110,7 +110,9 @@ LJ_FUNC void lj_dispatch_update(global_State *g); /* Instruction dispatch callback for hooks or when recording. */ LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); -LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); +#if LJ_HASPROFILE +LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc); +#endif #if LJ_HASFFI && !defined(_BUILDVM_H) /* Save/restore errno and GetLastError() around hooks, exits and recording. */ diff --git a/src/lj_obj.h b/src/lj_obj.h index 846c290f..b1bbf1c9 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -554,6 +554,7 @@ typedef struct global_State { #define HOOK_ACTIVE_SHIFT 4 #define HOOK_VMEVENT 0x20 #define HOOK_GC 0x40 +#define HOOK_PROFILE 0x80 #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) #define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) diff --git a/src/lj_profile.c b/src/lj_profile.c new file mode 100644 index 00000000..0baad06c --- /dev/null +++ b/src/lj_profile.c @@ -0,0 +1,274 @@ +/* +** Low-overhead profiling. +** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_profile_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASPROFILE + +#include "lj_buf.h" +#include "lj_frame.h" +#include "lj_debug.h" +#include "lj_dispatch.h" +#include "lj_profile.h" + +#include "luajit.h" + +#if LJ_PROFILE_SIGPROF + +#include +#include + +#elif LJ_PROFILE_PTHREAD + +#include + +#elif LJ_PROFILE_WTHREAD + +#define WIN32_LEAN_AND_MEAN +#include +typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int); + +#endif + +/* Profiler state. */ +typedef struct ProfileState { + global_State *g; /* VM state that started the profiler. */ + luaJIT_profile_callback cb; /* Profiler callback. */ + void *data; /* Profiler callback data. */ + SBuf sb; /* String buffer for stack dumps. */ + int interval; /* Sample interval in milliseconds. */ + int samples; /* Number of samples for next callback. */ + int vmstate; /* VM state when profile timer triggered. */ +#if LJ_PROFILE_SIGPROF + struct sigaction oldsa; /* Previous SIGPROF state. */ +#elif LJ_PROFILE_PTHREAD + pthread_t thread; /* Timer thread. */ + int abort; /* Abort timer thread. */ +#elif LJ_PROFILE_WTHREAD + HINSTANCE wmm; /* WinMM library handle. */ + WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */ + WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */ + HANDLE thread; /* Timer thread. */ + int abort; /* Abort timer thread. */ +#endif +} ProfileState; + +/* Sadly, we have to use a static profiler state. +** +** The SIGPROF variant needs a static pointer to the global state, anyway. +** And it would be hard to extend for multiple threads. You can still use +** multiple VMs in multiple threads, but only profile one at a time. +*/ +static ProfileState profile_state; + +/* Default sample interval in milliseconds. */ +#define LJ_PROFILE_INTERVAL_DEFAULT 10 + +/* -- Profile callbacks --------------------------------------------------- */ + +/* Callback from profile hook (HOOK_PROFILE already cleared). */ +void LJ_FASTCALL lj_profile_interpreter(lua_State *L) +{ + ProfileState *ps = &profile_state; + int samples = ps->samples; + ps->samples = 0; + ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */ +} + +/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */ +static void profile_trigger(ProfileState *ps) +{ + global_State *g = ps->g; + uint8_t mask; + ps->samples++; /* Always increment number of samples. */ + mask = g->hookmask; + if (!(mask & HOOK_PROFILE)) { /* Set profile hook, unless already set. */ + int st = g->vmstate; + ps->vmstate = st >= 0 ? 'N' : + st == ~LJ_VMST_INTERP ? 'I' : + st == ~LJ_VMST_C ? 'C' : + st == ~LJ_VMST_GC ? 'G' : 'J'; + g->hookmask = (mask | HOOK_PROFILE); + lj_dispatch_update(g); + } +} + +/* -- OS-specific profile timer handling ---------------------------------- */ + +#if LJ_PROFILE_SIGPROF + +/* SIGPROF handler. */ +static void profile_signal(int sig) +{ + UNUSED(sig); + profile_trigger(&profile_state); +} + +/* Start profiling timer. */ +static void profile_timer_start(ProfileState *ps) +{ + int interval = ps->interval; + struct itimerval tm; + struct sigaction sa; + tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000; + tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000; + setitimer(ITIMER_PROF, &tm, NULL); + sa.sa_flags = SA_RESTART; + sa.sa_handler = profile_signal; + sigemptyset(&sa.sa_mask); + sigaction(SIGPROF, &sa, &ps->oldsa); +} + +/* Stop profiling timer. */ +static void profile_timer_stop(ProfileState *ps) +{ + struct itimerval tm; + tm.it_value.tv_sec = tm.it_interval.tv_sec = 0; + tm.it_value.tv_usec = tm.it_interval.tv_usec = 0; + setitimer(ITIMER_PROF, &tm, NULL); + sigaction(SIGPROF, &ps->oldsa, NULL); +} + +#elif LJ_PROFILE_PTHREAD + +/* POSIX timer thread. */ +static void *profile_thread(ProfileState *ps) +{ + int interval = ps->interval; + struct timespec ts; + ts.tv_sec = interval / 1000; + ts.tv_nsec = (interval % 1000) * 1000000; + while (1) { + nanosleep(&ts, NULL); + if (ps->abort) break; + profile_trigger(ps); + } + return NULL; +} + +/* Start profiling timer thread. */ +static void profile_timer_start(ProfileState *ps) +{ + ps->abort = 0; + pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps); +} + +/* Stop profiling timer thread. */ +static void profile_timer_stop(ProfileState *ps) +{ + ps->abort = 1; + pthread_join(ps->thread, NULL); +} + +#elif LJ_PROFILE_WTHREAD + +/* Windows timer thread. */ +static DWORD WINAPI profile_thread(void *psx) +{ + ProfileState *ps = (ProfileState *)psx; + int interval = ps->interval; + ps->wmm_tbp(1); + while (1) { + Sleep(interval); + if (ps->abort) break; + profile_trigger(ps); + } + ps->wmm_tep(1); + return 0; +} + +/* Start profiling timer thread. */ +static void profile_timer_start(ProfileState *ps) +{ + if (!ps->wmm) { /* Load WinMM library on-demand. */ + ps->wmm = LoadLibraryA("winmm.dll"); + if (ps->wmm) { + ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); + ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); + if (!ps->wmm_tbp || !ps->wmm_tep) { + ps->wmm = NULL; + return; + } + } + } + ps->abort = 0; + ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL); +} + +/* Stop profiling timer thread. */ +static void profile_timer_stop(ProfileState *ps) +{ + ps->abort = 1; + WaitForSingleObject(ps->thread, INFINITE); +} + +#endif + +/* -- Public profiling API ------------------------------------------------ */ + +/* Start profiling. */ +LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, + luaJIT_profile_callback cb, void *data) +{ + ProfileState *ps = &profile_state; + int interval = LJ_PROFILE_INTERVAL_DEFAULT; + while (*mode) { + switch (*mode++) { + case 'i': + interval = 0; + while (*mode >= '0' && *mode <= '9') + interval = interval * 10 + (*mode++ - '0'); + if (interval <= 0) interval = 1; + break; + default: /* Ignore unknown mode chars. */ + break; + } + } + if (ps->g) { + luaJIT_profile_stop(L); + if (ps->g) return; /* Profiler in use by another VM. */ + } + ps->g = G(L); + ps->interval = interval; + ps->cb = cb; + ps->data = data; + ps->samples = 0; + lj_buf_init(L, &ps->sb); + profile_timer_start(ps); +} + +/* Stop profiling. */ +LUA_API void luaJIT_profile_stop(lua_State *L) +{ + ProfileState *ps = &profile_state; + global_State *g = ps->g; + if (G(L) == g) { /* Only stop profiler if started by this VM. */ + profile_timer_stop(ps); + g->hookmask &= ~HOOK_PROFILE; + lj_dispatch_update(g); + lj_buf_free(g, &ps->sb); + setmref(ps->sb.b, NULL); + setmref(ps->sb.e, NULL); + ps->g = NULL; + } +} + +/* Return a compact stack dump. */ +LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, + int depth, size_t *len) +{ + ProfileState *ps = &profile_state; + SBuf *sb = &ps->sb; + setsbufL(sb, L); + lj_buf_reset(sb); + lj_debug_dumpstack(L, sb, fmt, depth); + *len = (size_t)sbuflen(sb); + return sbufB(sb); +} + +#endif diff --git a/src/lj_profile.h b/src/lj_profile.h new file mode 100644 index 00000000..ad26f2b6 --- /dev/null +++ b/src/lj_profile.h @@ -0,0 +1,17 @@ +/* +** Low-overhead profiling. +** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_PROFILE_H +#define _LJ_PROFILE_H + +#include "lj_obj.h" + +#if LJ_HASPROFILE + +LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L); + +#endif + +#endif diff --git a/src/lj_state.c b/src/lj_state.c index f11467fe..8fd11b2f 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -27,6 +27,7 @@ #include "lj_vm.h" #include "lj_lex.h" #include "lj_alloc.h" +#include "luajit.h" /* -- Stack handling ------------------------------------------------------ */ @@ -237,8 +238,11 @@ LUA_API void lua_close(lua_State *L) { global_State *g = G(L); int i; - setgcrefnull(g->cur_L); L = mainthread(g); /* Only the main thread can be closed. */ +#if LJ_HASPROFILE + luaJIT_profile_stop(L); +#endif + setgcrefnull(g->cur_L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ #if LJ_HASJIT diff --git a/src/lj_vm.h b/src/lj_vm.h index 948d63c2..5893d0b2 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -43,6 +43,7 @@ LJ_ASMF void lj_vm_record(void); LJ_ASMF void lj_vm_inshook(void); LJ_ASMF void lj_vm_rethook(void); LJ_ASMF void lj_vm_callhook(void); +LJ_ASMF void lj_vm_profhook(void); /* Trace exit handling. */ LJ_ASMF void lj_vm_exit_handler(void); diff --git a/src/ljamalg.c b/src/ljamalg.c index 7198a09f..29fed380 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -47,6 +47,7 @@ #include "lj_strscan.c" #include "lj_strfmt.c" #include "lj_api.c" +#include "lj_profile.c" #include "lj_lex.c" #include "lj_parse.c" #include "lj_bcread.c" diff --git a/src/luajit.h b/src/luajit.h index a4c939bf..80530d95 100644 --- a/src/luajit.h +++ b/src/luajit.h @@ -64,6 +64,15 @@ enum { /* Control the JIT engine. */ LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); +/* Low-overhead profiling API. */ +typedef void (*luaJIT_profile_callback)(void *data, lua_State *L, + int samples, int vmstate); +LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, + luaJIT_profile_callback cb, void *data); +LUA_API void luaJIT_profile_stop(lua_State *L); +LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, + int depth, size_t *len); + /* Enforce (dynamic) linker error for version mismatches. Call from main. */ LUA_API void LUAJIT_VERSION_SYM(void); diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index dd39052c..0bd929fd 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -2081,6 +2081,18 @@ static void build_subroutines(BuildCtx *ctx) | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] | ldr INS, [PC, #-4] | bx CRET1 + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov CARG1, L + | str BASE, L->base + | mov CARG2, PC + | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | ldr BASE, L->base + | sub PC, PC, #4 + | b ->cont_nop +#endif | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 39d1521d..e7d01dc4 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -2010,6 +2010,20 @@ static void build_subroutines(BuildCtx *ctx) | lw LFUNC:RB, FRAME_FUNC(BASE) | jr CRET1 |. lw INS, -4(PC) + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | load_got lj_dispatch_profile + | sw MULTRES, SAVE_MULTRES + | move CARG2, PC + | sw BASE, L->base + | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + |. move CARG1, L + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | addiu PC, PC, -4 + | b ->cont_nop + |. lw BASE, L->base +#endif | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index d9186c44..293c5c6f 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -2504,6 +2504,19 @@ static void build_subroutines(BuildCtx *ctx) | lwz INS, -4(PC) | mtctr CRET1 | bctr + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mr CARG1, L + | stw MULTRES, SAVE_MULTRES + | mr CARG2, PC + | stp BASE, L->base + | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | lp BASE, L->base + | subi PC, PC, 4 + | b ->cont_nop +#endif | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 7fe891b1..eaa99740 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -2588,7 +2588,7 @@ static void build_subroutines(BuildCtx *ctx) | mov FCARG2, PC // Caveat: FCARG2 == BASE | mov FCARG1, L:RB | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) + | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) |3: | mov BASE, L:RB->base |4: @@ -2658,6 +2658,19 @@ static void build_subroutines(BuildCtx *ctx) | shr RD, 3 | add NARGS:RD, 1 | jmp RBa + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov FCARG2, PC // Caveat: FCARG2 == BASE + | mov FCARG1, L:RB + | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) + | mov BASE, L:RB->base + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | sub PC, 4 + | jmp ->cont_nop +#endif | |//----------------------------------------------------------------------- |//-- Trace exit handler -------------------------------------------------