From c31ac26fb9803d4b09c27668b7c2d9a01385c9ba Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 5 Jan 2010 22:39:46 +0100 Subject: [PATCH] Add support for WIN64 exception handling to external unwinder. Modify unwinding to always return _ff or _c unwind type. Generate PE object .pdata/.xdata sections for x64 interpreter. Can drop r12-r15 saves in Windows/x64 interpreter now. --- src/buildvm_peobj.c | 70 ++++++++++++++++++++++++++++++- src/buildvm_x86.dasc | 30 ++++++-------- src/buildvm_x86.h | 2 +- src/lj_err.c | 98 +++++++++++++++++++++++++++++++++++++------- src/lj_frame.h | 21 +++++----- 5 files changed, 177 insertions(+), 44 deletions(-) diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c index d45f3c97..68af79c7 100644 --- a/src/buildvm_peobj.c +++ b/src/buildvm_peobj.c @@ -90,6 +90,7 @@ typedef struct PEsymaux { #define PEOBJ_ARCH_TARGET 0x8664 #define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */ #define PEOBJ_RELOC_DIR32 0x02 +#define PEOBJ_RELOC_ADDR32NB 0x03 #define PEOBJ_SYM_PREFIX "" #endif @@ -98,7 +99,10 @@ enum { PEOBJ_SECT_ABS = -2, PEOBJ_SECT_UNDEF = -1, PEOBJ_SECT_TEXT, - /* TODO: add .pdata/.xdata for x64. */ +#if LJ_TARGET_X64 + PEOBJ_SECT_PDATA, + PEOBJ_SECT_XDATA, +#endif PEOBJ_SECT_RDATA, PEOBJ_SECT_RDATA_Z, PEOBJ_NSECTIONS @@ -196,6 +200,24 @@ void emit_peobj(BuildCtx *ctx) /* Flags: 60 = read+execute, 50 = align16, 20 = code. */ pesect[PEOBJ_SECT_TEXT].flags = 0x60500020; +#if LJ_TARGET_X64 + memcpy(pesect[PEOBJ_SECT_PDATA].name, ".pdata", sizeof(".pdata")-1); + pesect[PEOBJ_SECT_PDATA].ofs = sofs; + sofs += (pesect[PEOBJ_SECT_PDATA].size = 3*4); + pesect[PEOBJ_SECT_PDATA].relocofs = sofs; + sofs += (pesect[PEOBJ_SECT_PDATA].nreloc = 3) * PEOBJ_RELOC_SIZE; + /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ + pesect[PEOBJ_SECT_PDATA].flags = 0x40300040; + + memcpy(pesect[PEOBJ_SECT_XDATA].name, ".xdata", sizeof(".xdata")-1); + pesect[PEOBJ_SECT_XDATA].ofs = sofs; + sofs += (pesect[PEOBJ_SECT_XDATA].size = 8*2+4); /* See below. */ + pesect[PEOBJ_SECT_XDATA].relocofs = sofs; + sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; + /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ + pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; +#endif + memcpy(pesect[PEOBJ_SECT_RDATA].name, ".rdata", sizeof(".rdata")-1); pesect[PEOBJ_SECT_RDATA].ofs = sofs; sofs += (pesect[PEOBJ_SECT_RDATA].size = ctx->npc*sizeof(uint16_t)); @@ -228,6 +250,9 @@ void emit_peobj(BuildCtx *ctx) #if !LJ_HASJIT pehdr.nsyms -= 7; #endif +#if LJ_TARGET_X64 + pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ +#endif /* Write PE object header and all sections. */ owrite(ctx, &pehdr, sizeof(PEheader)); @@ -243,6 +268,41 @@ void emit_peobj(BuildCtx *ctx) owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); } +#if LJ_TARGET_X64 + { /* Write .pdata section. */ + uint32_t pdata[3]; /* Start of .text, end of .text and .xdata. */ + PEreloc reloc; + pdata[0] = 0; pdata[1] = (uint32_t)ctx->codesz; pdata[2] = 0; + owrite(ctx, &pdata, sizeof(pdata)); + reloc.vaddr = 0; reloc.symidx = 1+2+relocsyms+2+2+1; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + reloc.vaddr = 4; reloc.symidx = 1+2+relocsyms+2+2+1; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + reloc.vaddr = 8; reloc.symidx = 1+2+relocsyms+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } + { /* Write .xdata section. */ + uint16_t xdata[8+2]; + PEreloc reloc; + xdata[0] = 0x01|0x08|0x10; /* Ver. 1, uhander/ehandler, prolog size 0. */ + xdata[1] = 5; /* Number of unwind codes, no frame pointer. */ + xdata[2] = 0x4200; /* Stack offset 4*8+8 = aword*5. */ + xdata[3] = 0x3000; /* Push rbx. */ + xdata[4] = 0x6000; /* Push rsi. */ + xdata[5] = 0x7000; /* Push rdi. */ + xdata[6] = 0x5000; /* Push rbp. */ + xdata[7] = 0; /* Alignment. */ + xdata[8] = xdata[9] = 0; /* Relocated address of exception handler. */ + owrite(ctx, &xdata, sizeof(xdata)); + reloc.vaddr = sizeof(xdata)-4; reloc.symidx = 1+2+relocsyms+2+2; + reloc.type = PEOBJ_RELOC_ADDR32NB; + owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); + } +#endif + /* Write .rdata section. */ for (i = 0; i < ctx->npc; i++) { uint16_t pcofs = (uint16_t)ctx->sym_ofs[i]; @@ -279,6 +339,14 @@ void emit_peobj(BuildCtx *ctx) emit_peobj_sym(ctx, name, 0, PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); } + +#if LJ_TARGET_X64 + emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); + emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); + emit_peobj_sym(ctx, PEOBJ_SYM_PREFIX "lj_err_unwind_win64", 0, + PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); +#endif + emit_peobj_sym(ctx, PEOBJ_SYM_PREFIX LABEL_ASM_BEGIN, 0, PEOBJ_SECT_TEXT, PEOBJ_TYPE_NULL, PEOBJ_SCL_EXTERN); for (i = nzsym; i < ctx->nsym; i++) { diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc index be2ee71e..e40ca6d7 100644 --- a/src/buildvm_x86.dasc +++ b/src/buildvm_x86.dasc @@ -168,28 +168,22 @@ |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). |.macro saveregs | push rbp; push rdi; push rsi; push rbx -| push r15; push r14; push r13; push r12 | sub rsp, CFRAME_SPACE |.endmacro |.macro restoreregs | add rsp, CFRAME_SPACE -| pop r12; pop r13; pop r14; pop r15 | pop rbx; pop rsi; pop rdi; pop rbp |.endmacro | -|.define SAVE_CFRAME, aword [rsp+aword*17] -|.define SAVE_PC, dword [rsp+dword*33] -|.define SAVE_L, dword [rsp+dword*32] -|.define SAVE_ERRF, dword [rsp+dword*31] -|.define SAVE_NRES, dword [rsp+dword*30] -|.define TMP2, dword [rsp+dword*29] -|.define TMP1, dword [rsp+dword*28] +|.define SAVE_CFRAME, aword [rsp+aword*13] +|.define SAVE_PC, dword [rsp+dword*25] +|.define SAVE_L, dword [rsp+dword*24] +|.define SAVE_ERRF, dword [rsp+dword*23] +|.define SAVE_NRES, dword [rsp+dword*22] +|.define TMP2, dword [rsp+dword*21] +|.define TMP1, dword [rsp+dword*20] |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter -|.define SAVE_RET, aword [rsp+aword*13] //<-- rsp entering interpreter. -|.define SAVE_R8, aword [rsp+aword*12] -|.define SAVE_R7, aword [rsp+aword*11] -|.define SAVE_R6, aword [rsp+aword*10] -|.define SAVE_R5, aword [rsp+aword*9] +|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. |.define SAVE_R4, aword [rsp+aword*8] |.define SAVE_R3, aword [rsp+aword*7] |.define SAVE_R2, aword [rsp+aword*6] @@ -202,7 +196,7 @@ |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee | |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). -|.define TMPQ, qword [rsp+aword*14] +|.define TMPQ, qword [rsp+aword*10] |.define MULTRES, TMP2 |.define TMPa, ARG5 |.define ARG5d, dword [rsp+aword*4] @@ -861,10 +855,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. | mov RC, RA // ... in [RC] | mov PC, [RB-12] // Restore PC from [cont|PC]. - | mov RA, dword [RB-16] |.if X64 + | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. | lea KBASEa, qword [=>0] | add RAa, KBASEa + |.else + | mov RA, dword [RB-16] |.endif | mov LFUNC:KBASE, [BASE-8] | mov PROTO:KBASE, LFUNC:KBASE->pt @@ -1854,7 +1850,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) |.ffunc coroutine_yield | mov L:RB, SAVE_L | mov [RA-4], PC - | test aword L:RB->cframe, CFRAME_CANYIELD + | test aword L:RB->cframe, CFRAME_RESUME | jz ->fff_fallback | mov L:RB->base, RA | lea RC, [RA+NARGS:RC*8-8] diff --git a/src/buildvm_x86.h b/src/buildvm_x86.h index 030ac179..68f27f93 100644 --- a/src/buildvm_x86.h +++ b/src/buildvm_x86.h @@ -1177,7 +1177,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse) dasm_put(Dst, 3443, Dt1(->top), Dt1(->base), Dt8(->upvalue[0].gcr), Dt1(->cframe), Dt1(->status), LUA_YIELD, Dt1(->top), Dt1(->base)); dasm_put(Dst, 3523, Dt1(->maxstack), Dt1(->top), Dt1(->base), Dt1(->top), DISPATCH_GL(vmstate), ~LJ_VMST_INTERP, Dt1(->base)); dasm_put(Dst, 3631, LUA_YIELD, Dt1(->base), Dt1(->top), Dt1(->top), Dt1(->maxstack), FRAME_TYPE); - dasm_put(Dst, 3727, Dt1(->top), Dt1(->base), Dt1(->cframe), CFRAME_CANYIELD, Dt1(->base), Dt1(->top), Dt1(->cframe), LUA_YIELD, Dt1(->status)); + dasm_put(Dst, 3727, Dt1(->top), Dt1(->base), Dt1(->cframe), CFRAME_RESUME, Dt1(->base), Dt1(->top), Dt1(->cframe), LUA_YIELD, Dt1(->status)); if (sse) { dasm_put(Dst, 3813, 1+1, LJ_TISNUM); } else { diff --git a/src/lj_err.c b/src/lj_err.c index b2e7f5f8..b8070250 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -493,7 +493,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) L->cframe = NULL; L->status = cast_byte(errcode); } - return cframe_raw(cf); + return cf; } if (errcode) { L->cframe = cframe_prev(cf); @@ -514,9 +514,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) L->cframe = cf; L->base = frame_prevd(frame) + 1; unwindstack(L, L->base); - return NULL; /* Call special handler. */ } - return cf; + return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); } } /* No C frame. */ @@ -528,7 +527,7 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) G(L)->panic(L); exit(EXIT_FAILURE); } - return L; /* Anything not-NULL will do. */ + return L; /* Anything non-NULL will do. */ } /* -- External frame unwinding -------------------------------------------- */ @@ -574,12 +573,12 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, _Unwind_Action actions, errcode = LUA_ERRRUN; } #if LJ_UNWIND_EXT - if (err_unwind(L, cf, errcode)) { + cf = err_unwind(L, cf, errcode); + if (cf) { _Unwind_SetGR(ctx, 0, errcode); - _Unwind_SetIP(ctx, (_Unwind_Ptr)lj_vm_unwind_c_eh); - return _URC_INSTALL_CONTEXT; - } else if ((actions & _UA_HANDLER_FRAME)) { - _Unwind_SetIP(ctx, (_Unwind_Ptr)lj_vm_unwind_ff_eh); + _Unwind_SetIP(ctx, (_Unwind_Ptr)(cframe_unwind_ff(cf) ? + lj_vm_unwind_ff_eh : + lj_vm_unwind_c_eh)); return _URC_INSTALL_CONTEXT; } #else @@ -607,20 +606,89 @@ static void err_raise_ext(int errcode) #elif defined(_WIN64) +/* +** Someone in Redmond owes me several days of my life. A lot of this is +** undocumented or just plain wrong on MSDN. Some of it can be gathered +** from 3rd party docs or must be found by trial-and-error. They really +** don't want you to write your own language-specific exception handler +** or to interact gracefully with MSVC. :-( +** +** Apparently MSVC doesn't call C++ destructors for foreign exceptions +** unless you compile your C++ code with /EHa. Unfortunately this means +** catch (...) also catches things like access violations. The use of +** _set_se_translator doesn't really help, because it requires /EHa, too. +*/ + #define WIN32_LEAN_AND_MEAN #include -#define LJ_EXCODE ((DWORD)0x024c4a00) +/* Taken from: http://www.nynaeve.net/?p=99 */ +typedef struct UndocumentedDispatcherContext { + ULONG64 ControlPc; + ULONG64 ImageBase; + PRUNTIME_FUNCTION FunctionEntry; + ULONG64 EstablisherFrame; + ULONG64 TargetIp; + PCONTEXT ContextRecord; + PEXCEPTION_ROUTINE LanguageHandler; + PVOID HandlerData; + PUNWIND_HISTORY_TABLE HistoryTable; + ULONG ScopeIndex; + ULONG Fill0; +} UndocumentedDispatcherContext; + +#ifdef _MSC_VER +/* Another wild guess. */ +extern __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); +#endif + +#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) + +#define LJ_EXCODE ((DWORD)0xe24c4a00) #define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) #define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) #define LJ_EXCODE_ERRCODE(cl) (cast_int((cl) & 0xff)) -/* NYI: Win64 exception handler for interpreter frame. */ +/* Win64 exception handler for interpreter frame. */ +LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, + void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) +{ + lua_State *L = cframe_L(cf); + if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ + err_unwind(L, cf, 1); /* Unwind internal frames. */ + } else { + void *cf2 = err_unwind(L, cf, 0); + if (cf2) { /* We catch it, so start unwinding the upper frames. */ + int errcode; + if (LJ_EXCODE_CHECK(rec->ExceptionCode)) { + errcode = LJ_EXCODE_ERRCODE(rec->ExceptionCode); + } else if (rec->ExceptionCode == LJ_MSVC_EXCODE) { +#ifdef _MSC_VER + __DestructExceptionObject(rec, 1); +#endif + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + errcode = LUA_ERRRUN; + } else { /* Don't catch access violations etc. */ + return ExceptionContinueSearch; + } + /* Unwind the stack and call all handlers for all lower C frames + ** (including ourselves) again with EH_UNWINDING set. Then set + ** rsp = cf, rax = errcode and jump to the specified target. + */ + RtlUnwindEx(cf, (void *)(cframe_unwind_ff(cf2) ? + lj_vm_unwind_ff_eh : + lj_vm_unwind_c_eh), + rec, (void *)errcode, ctx, dispatch->HistoryTable); + /* RtlUnwindEx should never return. */ + } + } + return ExceptionContinueSearch; +} /* Raise Windows exception. */ static void err_raise_ext(int errcode) { - RaiseException(LJ_EXCODE_MAKE(errcode), 0, 0, NULL); + RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); } #endif @@ -650,10 +718,10 @@ LJ_NOINLINE void lj_err_throw(lua_State *L, int errcode) #else { void *cf = err_unwind(L, NULL, errcode); - if (cf) - lj_vm_unwind_c(cf, errcode); + if (cframe_unwind_ff(cf)) + lj_vm_unwind_ff(cframe_raw(cf)); else - lj_vm_unwind_ff(cframe_raw(L->cframe)); + lj_vm_unwind_c(cframe_raw(cf), errcode); } #endif exit(EXIT_FAILURE); diff --git a/src/lj_frame.h b/src/lj_frame.h index 0bfcb005..c86818e0 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -67,13 +67,13 @@ enum { #define CFRAME_SIZE (12*4) #elif LJ_TARGET_X64 #if _WIN64 -#define CFRAME_OFS_PREV (17*8) -#define CFRAME_OFS_PC (33*4) -#define CFRAME_OFS_L (32*4) -#define CFRAME_OFS_ERRF (31*4) -#define CFRAME_OFS_NRES (30*4) -#define CFRAME_OFS_MULTRES (29*4) -#define CFRAME_SIZE (14*8) +#define CFRAME_OFS_PREV (13*8) +#define CFRAME_OFS_PC (25*4) +#define CFRAME_OFS_L (24*4) +#define CFRAME_OFS_ERRF (23*4) +#define CFRAME_OFS_NRES (22*4) +#define CFRAME_OFS_MULTRES (21*4) +#define CFRAME_SIZE (10*8) #else #define CFRAME_OFS_PREV (4*8) #define CFRAME_OFS_PC (5*4) @@ -88,8 +88,8 @@ enum { #endif #define CFRAME_RESUME 1 -#define CFRAME_CANYIELD ((intptr_t)(CFRAME_RESUME)) -#define CFRAME_RAWMASK (~CFRAME_CANYIELD) +#define CFRAME_UNWIND_FF 2 /* Only used in unwinder. */ +#define CFRAME_RAWMASK (~(intptr_t)(CFRAME_RESUME|CFRAME_UNWIND_FF)) #define cframe_errfunc(cf) (*(int32_t *)(((char *)(cf))+CFRAME_OFS_ERRF)) #define cframe_nres(cf) (*(int32_t *)(((char *)(cf))+CFRAME_OFS_NRES)) @@ -101,7 +101,8 @@ enum { (mref(*(MRef *)(((char *)(cf))+CFRAME_OFS_PC), const BCIns)) #define setcframe_pc(cf, pc) \ (setmref(*(MRef *)(((char *)(cf))+CFRAME_OFS_PC), (pc))) -#define cframe_canyield(cf) ((intptr_t)(cf) & CFRAME_CANYIELD) +#define cframe_canyield(cf) ((intptr_t)(cf) & CFRAME_RESUME) +#define cframe_unwind_ff(cf) ((intptr_t)(cf) & CFRAME_UNWIND_FF) #define cframe_raw(cf) ((void *)((intptr_t)(cf) & CFRAME_RAWMASK)) #define cframe_Lpc(L) cframe_pc(cframe_raw(L->cframe))