From 4c882fe71406a923f07c8e9a0b9189036e0ba386 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 25 Aug 2012 23:02:29 +0200 Subject: [PATCH] Replace strtod() with builtin string to number conversion. --- src/Makefile | 4 +- src/Makefile.dep | 56 ++--- src/lib_base.c | 3 +- src/lj_api.c | 15 +- src/lj_asm_arm.h | 2 +- src/lj_asm_mips.h | 2 +- src/lj_asm_ppc.h | 2 +- src/lj_asm_x86.h | 2 +- src/lj_char.h | 2 +- src/lj_cparse.c | 67 ++---- src/lj_ffrecord.c | 5 +- src/lj_ir.c | 1 + src/lj_ircall.h | 2 +- src/lj_lex.c | 110 +++------- src/lj_lib.c | 12 +- src/lj_meta.c | 12 +- src/lj_opt_fold.c | 3 +- src/lj_opt_narrow.c | 12 +- src/lj_str.c | 79 ------- src/lj_str.h | 3 - src/lj_strscan.c | 497 ++++++++++++++++++++++++++++++++++++++++++++ src/lj_strscan.h | 39 ++++ src/ljamalg.c | 1 + src/luaconf.h | 1 - 24 files changed, 660 insertions(+), 272 deletions(-) create mode 100644 src/lj_strscan.c create mode 100644 src/lj_strscan.h diff --git a/src/Makefile b/src/Makefile index 748a4ed7..bb617516 100644 --- a/src/Makefile +++ b/src/Makefile @@ -446,8 +446,8 @@ LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ - lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \ - lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \ + lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ + lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ diff --git a/src/Makefile.dep b/src/Makefile.dep index f9e658e1..9b130deb 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -3,7 +3,8 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ - lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_lib.h lj_libdef.h + lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ + lj_lib.h lj_libdef.h lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ @@ -39,7 +40,8 @@ lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ - lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h + lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h \ + lj_strscan.h lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ @@ -77,7 +79,7 @@ lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_cdata.h lj_clib.h lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ - lj_bc.h lj_vm.h lj_char.h + lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ir.h \ @@ -101,7 +103,7 @@ lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ - lj_vm.h lj_recdef.h + lj_vm.h lj_strscan.h lj_recdef.h lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ lj_traceerr.h lj_vm.h @@ -115,34 +117,34 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ - lj_vm.h lj_lib.h + lj_vm.h lj_strscan.h lj_lib.h lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ - lj_state.h lj_lex.h lj_parse.h lj_char.h + lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ - lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_lib.h + lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \ lj_traceerr.h lj_vm.h lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ - lj_vm.h + lj_vm.h lj_strscan.h lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_jit.h lj_iropt.h lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ - lj_folddef.h + lj_strscan.h lj_folddef.h lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ - lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h lj_vm.h + lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_strscan.h lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ @@ -165,6 +167,8 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h +lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_char.h lj_strscan.h lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_tab.h lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ @@ -184,21 +188,21 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ - lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_debug.c \ - lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h \ - lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_api.c lj_bcdump.h lj_parse.h \ - lj_lex.c lualib.h lj_parse.c lj_bcread.c lj_bcwrite.c lj_ctype.c \ - lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ - lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ - lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \ - lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ - lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ - lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ - lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ - lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ - lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ - lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ - lib_init.c + lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ + lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ + luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ + lj_bcdump.h lj_parse.h lj_lex.c lualib.h lj_parse.c lj_bcread.c \ + lj_bcwrite.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c \ + lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h \ + lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h \ + lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \ + lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \ + lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \ + lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \ + lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \ + lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \ + lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \ + lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/src/lib_base.c b/src/lib_base.c index 383955d4..9702c5b4 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -31,6 +31,7 @@ #include "lj_ff.h" #include "lj_dispatch.h" #include "lj_char.h" +#include "lj_strscan.h" #include "lj_lib.h" /* -- Base library: checks ------------------------------------------------ */ @@ -191,7 +192,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) int32_t base = lj_lib_optint(L, 2, 10); if (base == 10) { TValue *o = lj_lib_checkany(L, 1); - if (tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o))) { + if (lj_strscan_numberobj(o)) { copyTV(L, L->base-1, o); return FFH_RES(1); } diff --git a/src/lj_api.c b/src/lj_api.c index b807900d..bfd471d2 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -26,6 +26,7 @@ #include "lj_lex.h" #include "lj_bcdump.h" #include "lj_parse.h" +#include "lj_strscan.h" /* -- Common helper functions --------------------------------------------- */ @@ -236,7 +237,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); TValue tmp; - return (tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), &tmp))); + return (tvisnumber(o) || (tvisstr(o) && lj_strscan_number(strV(o), &tmp))); } LUA_API int lua_isstring(lua_State *L, int idx) @@ -320,7 +321,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) TValue tmp; if (LJ_LIKELY(tvisnumber(o))) return numberVnum(o); - else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp)) + else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) return numV(&tmp); else return 0; @@ -332,7 +333,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) TValue tmp; if (LJ_LIKELY(tvisnumber(o))) return numberVnum(o); - else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp))) + else if (!(tvisstr(o) && lj_strscan_num(strV(o), &tmp))) lj_err_argt(L, idx, LUA_TNUMBER); return numV(&tmp); } @@ -345,7 +346,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def) return numberVnum(o); else if (tvisnil(o)) return def; - else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp))) + else if (!(tvisstr(o) && lj_strscan_num(strV(o), &tmp))) lj_err_argt(L, idx, LUA_TNUMBER); return numV(&tmp); } @@ -360,7 +361,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) } else if (LJ_LIKELY(tvisnum(o))) { n = numV(o); } else { - if (!(tvisstr(o) && lj_str_tonumber(strV(o), &tmp))) + if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) return 0; if (tvisint(&tmp)) return (lua_Integer)intV(&tmp); @@ -383,7 +384,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx) } else if (LJ_LIKELY(tvisnum(o))) { n = numV(o); } else { - if (!(tvisstr(o) && lj_str_tonumber(strV(o), &tmp))) + if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) lj_err_argt(L, idx, LUA_TNUMBER); if (tvisint(&tmp)) return (lua_Integer)intV(&tmp); @@ -408,7 +409,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def) } else if (tvisnil(o)) { return def; } else { - if (!(tvisstr(o) && lj_str_tonumber(strV(o), &tmp))) + if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) lj_err_argt(L, idx, LUA_TNUMBER); if (tvisint(&tmp)) return (lua_Integer)intV(&tmp); diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index f307d6b8..8ba3dc06 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -626,7 +626,7 @@ static void asm_conv64(ASMState *as, IRIns *ir) static void asm_strto(ASMState *as, IRIns *ir) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum]; + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; Reg rlo = 0, rhi = 0, tmp; int destused = ra_used(ir); diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index 14ebd1b3..3de133b8 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -541,7 +541,7 @@ static void asm_conv64(ASMState *as, IRIns *ir) static void asm_strto(ASMState *as, IRIns *ir) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum]; + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; RegSet drop = RSET_SCRATCH; if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 86052577..7d9a9c76 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -533,7 +533,7 @@ static void asm_conv64(ASMState *as, IRIns *ir) static void asm_strto(ASMState *as, IRIns *ir) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum]; + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; int32_t ofs; RegSet drop = RSET_SCRATCH; diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index ceeefbee..da4cf7e2 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -882,7 +882,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) static void asm_strto(ASMState *as, IRIns *ir) { /* Force a spill slot for the destination register (if any). */ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum]; + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; RegSet drop = RSET_SCRATCH; if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r)) diff --git a/src/lj_char.h b/src/lj_char.h index 7b7c1322..c3c86d34 100644 --- a/src/lj_char.h +++ b/src/lj_char.h @@ -21,7 +21,7 @@ #define LJ_CHAR_GRAPH (LJ_CHAR_ALNUM|LJ_CHAR_PUNCT) /* Only pass -1 or 0..255 to these macros. Never pass a signed char! */ -#define lj_char_isa(c, t) (lj_char_bits[(c)+1] & t) +#define lj_char_isa(c, t) ((lj_char_bits+1)[(c)] & t) #define lj_char_iscntrl(c) lj_char_isa((c), LJ_CHAR_CNTRL) #define lj_char_isspace(c) lj_char_isa((c), LJ_CHAR_SPACE) #define lj_char_ispunct(c) lj_char_isa((c), LJ_CHAR_PUNCT) diff --git a/src/lj_cparse.c b/src/lj_cparse.c index e9826715..ab5903fa 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c @@ -15,6 +15,7 @@ #include "lj_frame.h" #include "lj_vm.h" #include "lj_char.h" +#include "lj_strscan.h" /* ** Important note: this is NOT a validating C parser! This is a minimal @@ -156,40 +157,19 @@ LJ_NORET LJ_NOINLINE static void cp_err(CPState *cp, ErrMsg em) /* -- Main lexical scanner ------------------------------------------------ */ -/* Parse integer literal. */ -static CPToken cp_integer(CPState *cp) +/* Parse number literal. Only handles int32_t/uint32_t right now. */ +static CPToken cp_number(CPState *cp) { - uint32_t n = 0; - cp->val.id = CTID_INT32; - if (cp->c != '0') { /* Decimal. */ - do { - n = n*10 + (cp->c - '0'); - } while (lj_char_isdigit(cp_get(cp))); - } else if ((cp_get(cp)& ~0x20) == 'X') { /* Hexadecimal. */ - if (!lj_char_isxdigit(cp_get(cp))) - cp_err(cp, LJ_ERR_XNUMBER); - do { - n = n*16 + (cp->c & 15); - if (!lj_char_isdigit(cp->c)) n += 9; - } while (lj_char_isxdigit(cp_get(cp))); - if (n >= 0x80000000u) cp->val.id = CTID_UINT32; - } else { /* Octal. */ - while (cp->c >= '0' && cp->c <= '7') { - n = n*8 + (cp->c - '0'); - cp_get(cp); - } - if (n >= 0x80000000u) cp->val.id = CTID_UINT32; - } - cp->val.u32 = n; - for (;;) { /* Parse suffixes. */ - if ((cp->c & ~0x20) == 'U') - cp->val.id = CTID_UINT32; - else if ((cp->c & ~0x20) != 'L') - break; - cp_get(cp); - } - if (lj_char_isident(cp->c) && !(cp->mode & CPARSE_MODE_SKIP)) - cp_errmsg(cp, cp->c, LJ_ERR_XNUMBER); + StrScanFmt fmt; + TValue o; + do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); + cp_save(cp, '\0'); + fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); + if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; + else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; + else if (!(cp->mode & CPARSE_MODE_SKIP)) + cp_errmsg(cp, CTOK_INTEGER, LJ_ERR_XNUMBER); + cp->val.u32 = (uint32_t)o.i; return CTOK_INTEGER; } @@ -319,37 +299,34 @@ static CPToken cp_next_(CPState *cp) lj_str_resetbuf(&cp->sb); for (;;) { if (lj_char_isident(cp->c)) - return lj_char_isdigit(cp->c) ? cp_integer(cp) : cp_ident(cp); + return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); switch (cp->c) { case '\n': case '\r': cp_newline(cp); /* fallthrough. */ case ' ': case '\t': case '\v': case '\f': cp_get(cp); break; case '"': case '\'': return cp_string(cp); case '/': - cp_get(cp); - if (cp->c == '*') cp_comment_c(cp); + if (cp_get(cp) == '*') cp_comment_c(cp); else if (cp->c == '/') cp_comment_cpp(cp); else return '/'; break; case '|': - cp_get(cp); if (cp->c != '|') return '|'; cp_get(cp); return CTOK_OROR; + if (cp_get(cp) != '|') return '|'; cp_get(cp); return CTOK_OROR; case '&': - cp_get(cp); if (cp->c != '&') return '&'; cp_get(cp); return CTOK_ANDAND; + if (cp_get(cp) != '&') return '&'; cp_get(cp); return CTOK_ANDAND; case '=': - cp_get(cp); if (cp->c != '=') return '='; cp_get(cp); return CTOK_EQ; + if (cp_get(cp) != '=') return '='; cp_get(cp); return CTOK_EQ; case '!': - cp_get(cp); if (cp->c != '=') return '!'; cp_get(cp); return CTOK_NE; + if (cp_get(cp) != '=') return '!'; cp_get(cp); return CTOK_NE; case '<': - cp_get(cp); - if (cp->c == '=') { cp_get(cp); return CTOK_LE; } + if (cp_get(cp) == '=') { cp_get(cp); return CTOK_LE; } else if (cp->c == '<') { cp_get(cp); return CTOK_SHL; } return '<'; case '>': - cp_get(cp); - if (cp->c == '=') { cp_get(cp); return CTOK_GE; } + if (cp_get(cp) == '=') { cp_get(cp); return CTOK_GE; } else if (cp->c == '>') { cp_get(cp); return CTOK_SHR; } return '>'; case '-': - cp_get(cp); if (cp->c != '>') return '-'; cp_get(cp); return CTOK_DEREF; + if (cp_get(cp) != '>') return '-'; cp_get(cp); return CTOK_DEREF; case '$': return cp_param(cp); case '\0': return CTOK_EOF; diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index 716226bc..1ac0112e 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -26,6 +26,7 @@ #include "lj_crecord.h" #include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_strscan.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -64,7 +65,7 @@ typedef void (LJ_FASTCALL *RecordFunc)(jit_State *J, RecordFFData *rd); /* Get runtime value of int argument. */ static int32_t argv2int(jit_State *J, TValue *o) { - if (!tvisnumber(o) && !(tvisstr(o) && lj_str_tonumber(strV(o), o))) + if (!lj_strscan_numberobj(o)) lj_trace_err(J, LJ_TRERR_BADTYPE); return tvisint(o) ? intV(o) : lj_num2int(numV(o)); } @@ -266,7 +267,7 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) if (tref_isnumber_str(tr)) { if (tref_isstr(tr)) { TValue tmp; - if (!lj_str_tonum(strV(&rd->argv[0]), &tmp)) + if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) recff_nyiu(J); /* Would need an inverted STRTO for this case. */ tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); } diff --git a/src/lj_ir.c b/src/lj_ir.c index f198a6df..92be4149 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -28,6 +28,7 @@ #include "lj_carith.h" #endif #include "lj_vm.h" +#include "lj_strscan.h" #include "lj_lib.h" /* Some local macros to save typing. Undef'd at the end. */ diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 8f481106..ffe7305b 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -102,7 +102,7 @@ typedef struct CCallInfo { #define IRCALLDEF(_) \ _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ _(ANY, lj_str_new, 3, S, STR, CCI_L) \ - _(ANY, lj_str_tonum, 2, FN, INT, 0) \ + _(ANY, lj_strscan_num, 2, FN, INT, 0) \ _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ diff --git a/src/lj_lex.c b/src/lj_lex.c index 669d2dfe..55e2bea1 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -23,6 +23,7 @@ #include "lj_lex.h" #include "lj_parse.h" #include "lj_char.h" +#include "lj_strscan.h" /* Lua lexer token names. */ static const char *const tokennames[] = { @@ -84,100 +85,51 @@ static void inclinenumber(LexState *ls) /* -- Scanner for terminals ----------------------------------------------- */ -#if LJ_HASFFI -/* Load FFI library on-demand. Needed if we create cdata objects. */ -static void lex_loadffi(lua_State *L) -{ - ptrdiff_t oldtop = savestack(L, L->top); - luaopen_ffi(L); - L->top = restorestack(L, oldtop); -} - -/* Parse 64 bit integer. */ -static int lex_number64(LexState *ls, TValue *tv) -{ - uint64_t n = 0; - uint8_t *p = (uint8_t *)ls->sb.buf; - CTypeID id = CTID_INT64; - GCcdata *cd; - int numl = 0; - if (p[0] == '0' && (p[1] & ~0x20) == 'X') { /* Hexadecimal. */ - p += 2; - if (!lj_char_isxdigit(*p)) return 0; - do { - n = n*16 + (*p & 15); - if (!lj_char_isdigit(*p)) n += 9; - p++; - } while (lj_char_isxdigit(*p)); - } else { /* Decimal. */ - if (!lj_char_isdigit(*p)) return 0; - do { - n = n*10 + (*p - '0'); - p++; - } while (lj_char_isdigit(*p)); - } - for (;;) { /* Parse suffixes. */ - if ((*p & ~0x20) == 'U') - id = CTID_UINT64; - else if ((*p & ~0x20) == 'L') - numl++; - else - break; - p++; - } - if (numl != 2 || *p != '\0') return 0; - /* Return cdata holding a 64 bit integer. */ - cd = lj_cdata_new_(ls->L, id, 8); - *(uint64_t *)cdataptr(cd) = n; - lj_parse_keepcdata(ls, tv, cd); - return 1; /* Ok. */ -} -#endif - /* Parse a number literal. */ static void lex_number(LexState *ls, TValue *tv) { - int c, xp = 'E'; + StrScanFmt fmt; + int c, xp = 'e'; lua_assert(lj_char_isdigit(ls->current)); if ((c = ls->current) == '0') { save_and_next(ls); - if ((ls->current & ~0x20) == 'X') xp = 'P'; + if ((ls->current | 0x20) == 'x') xp = 'p'; } while (lj_char_isident(ls->current) || ls->current == '.' || - ((ls->current == '-' || ls->current == '+') && (c & ~0x20) == xp)) { + ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { c = ls->current; save_and_next(ls); } -#if LJ_HASFFI - c &= ~0x20; - if ((c == 'I' || c == 'L' || c == 'U') && !ctype_ctsG(G(ls->L))) - lex_loadffi(ls->L); - if (c == 'I') /* Parse imaginary part of complex number. */ - ls->sb.n--; -#endif save(ls, '\0'); -#if LJ_HASFFI - if ((c == 'L' || c == 'U') && lex_number64(ls, tv)) { /* Parse 64 bit int. */ - return; - } else -#endif - if (lj_str_numconv(ls->sb.buf, tv)) { -#if LJ_HASFFI - if (c == 'I') { /* Return cdata holding a complex number. */ - GCcdata *cd = lj_cdata_new_(ls->L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); + fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, + (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | + (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); + if (LJ_DUALNUM && fmt == STRSCAN_INT) { + setitype(tv, LJ_TISNUM); + } else if (fmt == STRSCAN_NUM) { + /* Already in correct format. */ + } else if (LJ_HASFFI && fmt != STRSCAN_ERROR) { + lua_State *L = ls->L; + GCcdata *cd; + lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); + if (!ctype_ctsG(G(L))) { + ptrdiff_t oldtop = savestack(L, L->top); + luaopen_ffi(L); /* Load FFI library on-demand. */ + L->top = restorestack(L, oldtop); + } + if (fmt == STRSCAN_IMAG) { + cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); ((double *)cdataptr(cd))[0] = 0; - ((double *)cdataptr(cd))[1] = numberVnum(tv); - lj_parse_keepcdata(ls, tv, cd); + ((double *)cdataptr(cd))[1] = numV(tv); + } else { + cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8); + *(uint64_t *)cdataptr(cd) = tv->u64; } -#endif - if (LJ_DUALNUM && tvisnum(tv)) { - int32_t k = lj_num2int(numV(tv)); - if ((lua_Number)k == numV(tv)) /* -0 cannot end up here. */ - setintV(tv, k); - } - return; + lj_parse_keepcdata(ls, tv, cd); + } else { + lua_assert(fmt == STRSCAN_ERROR); + lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); } - lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); } static int skip_sep(LexState *ls) diff --git a/src/lj_lib.c b/src/lj_lib.c index ae9b9301..4ab27f66 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -17,6 +17,7 @@ #include "lj_bc.h" #include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_strscan.h" #include "lj_lib.h" /* -- Library initialization ---------------------------------------------- */ @@ -155,8 +156,7 @@ GCstr *lj_lib_optstr(lua_State *L, int narg) void lj_lib_checknumber(lua_State *L, int narg) { TValue *o = L->base + narg-1; - if (!(o < L->top && - (tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o))))) + if (!(o < L->top && lj_strscan_numberobj(o))) lj_err_argt(L, narg, LUA_TNUMBER); } #endif @@ -165,7 +165,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg) { TValue *o = L->base + narg-1; if (!(o < L->top && - (tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o))))) + (tvisnumber(o) || (tvisstr(o) && lj_strscan_num(strV(o), o))))) lj_err_argt(L, narg, LUA_TNUMBER); if (LJ_UNLIKELY(tvisint(o))) { lua_Number n = (lua_Number)intV(o); @@ -179,8 +179,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg) int32_t lj_lib_checkint(lua_State *L, int narg) { TValue *o = L->base + narg-1; - if (!(o < L->top && - (tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o))))) + if (!(o < L->top && lj_strscan_numberobj(o))) lj_err_argt(L, narg, LUA_TNUMBER); if (LJ_LIKELY(tvisint(o))) { return intV(o); @@ -200,8 +199,7 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def) int32_t lj_lib_checkbit(lua_State *L, int narg) { TValue *o = L->base + narg-1; - if (!(o < L->top && - (tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o))))) + if (!(o < L->top && lj_strscan_numberobj(o))) lj_err_argt(L, narg, LUA_TNUMBER); if (LJ_LIKELY(tvisint(o))) { return intV(o); diff --git a/src/lj_meta.c b/src/lj_meta.c index ab8099e8..b33cb88d 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -18,6 +18,7 @@ #include "lj_frame.h" #include "lj_bc.h" #include "lj_vm.h" +#include "lj_strscan.h" /* -- Metamethod handling ------------------------------------------------- */ @@ -193,7 +194,7 @@ static cTValue *str2num(cTValue *o, TValue *n) return o; else if (tvisint(o)) return (setnumV(n, (lua_Number)intV(o)), n); - else if (tvisstr(o) && lj_str_tonum(strV(o), n)) + else if (tvisstr(o) && lj_strscan_num(strV(o), n)) return n; else return NULL; @@ -436,12 +437,9 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top) /* Helper for FORI. Coercion. */ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o) { - if (!(tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o)))) - lj_err_msg(L, LJ_ERR_FORINIT); - if (!(tvisnumber(o+1) || (tvisstr(o+1) && lj_str_tonumber(strV(o+1), o+1)))) - lj_err_msg(L, LJ_ERR_FORLIM); - if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2)))) - lj_err_msg(L, LJ_ERR_FORSTEP); + if (!lj_strscan_numberobj(o)) lj_err_msg(L, LJ_ERR_FORINIT); + if (!lj_strscan_numberobj(o+1)) lj_err_msg(L, LJ_ERR_FORLIM); + if (!lj_strscan_numberobj(o+2)) lj_err_msg(L, LJ_ERR_FORSTEP); if (LJ_DUALNUM) { /* Ensure all slots are integers or all slots are numbers. */ int32_t k[3]; diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 2240f5db..8f1c8770 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -25,6 +25,7 @@ #endif #include "lj_carith.h" #include "lj_vm.h" +#include "lj_strscan.h" /* Here's a short description how the FOLD engine processes instructions: ** @@ -693,7 +694,7 @@ LJFOLD(STRTO KGC) LJFOLDF(kfold_strto) { TValue n; - if (lj_str_tonum(ir_kstr(fleft), &n)) + if (lj_strscan_num(ir_kstr(fleft), &n)) return lj_ir_knum(J, numV(&n)); return FAILFOLD; } diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 9bbb2083..2eb56c23 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -11,13 +11,13 @@ #if LJ_HASJIT -#include "lj_str.h" #include "lj_bc.h" #include "lj_ir.h" #include "lj_jit.h" #include "lj_iropt.h" #include "lj_trace.h" #include "lj_vm.h" +#include "lj_strscan.h" /* Rationale for narrowing optimizations: ** @@ -519,11 +519,11 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, { if (tref_isstr(rb)) { rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0); - lj_str_tonum(strV(vb), vb); + lj_strscan_num(strV(vb), vb); } if (tref_isstr(rc)) { rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); - lj_str_tonum(strV(vc), vc); + lj_strscan_num(strV(vc), vc); } /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */ if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) && @@ -541,7 +541,7 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) { if (tref_isstr(rc)) { rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0); - lj_str_tonum(strV(vc), vc); + lj_strscan_num(strV(vc), vc); } if (tref_isinteger(rc)) { if ((uint32_t)numberVint(vc) != 0x80000000u) @@ -555,7 +555,7 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc) { TRef tmp; - if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) + if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc)) lj_trace_err(J, LJ_TRERR_BADTYPE); if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) && tref_isinteger(rb) && tref_isinteger(rc) && @@ -575,7 +575,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc) /* Narrowing of power operator or math.pow. */ TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc) { - if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc)) + if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc)) lj_trace_err(J, LJ_TRERR_BADTYPE); /* Narrowing must be unconditional to preserve (-x)^i semantics. */ if (tvisint(vc) || numisint(numV(vc))) { diff --git a/src/lj_str.c b/src/lj_str.c index 7bf4848a..1060ec6d 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -169,85 +169,6 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) /* -- Type conversions ---------------------------------------------------- */ -/* Convert string object to number. */ -int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n) -{ - int ok = lj_str_numconv(strdata(str), n); - if (ok && tvisint(n)) - setnumV(n, (lua_Number)intV(n)); - return ok; -} - -int LJ_FASTCALL lj_str_tonumber(GCstr *str, TValue *n) -{ - return lj_str_numconv(strdata(str), n); -} - -/* Convert string to number. */ -int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n) -{ -#if LJ_DUALNUM - int sign = 1; -#else - lua_Number sign = 1; -#endif - const uint8_t *p = (const uint8_t *)s; - while (lj_char_isspace(*p)) p++; - if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; } - if ((uint32_t)(*p - '0') < 10) { - uint32_t k = (uint32_t)(*p++ - '0'); - if (k == 0 && ((*p & ~0x20) == 'X')) { - p++; - if (!lj_char_isxdigit(*p)) - return 0; /* Don't accept '0x' without hex digits. */ - do { - if (k >= 0x10000000u) goto parsedbl; - k = (k << 4) + (*p & 15u); - if (!lj_char_isdigit(*p)) k += 9; - p++; - } while (lj_char_isxdigit(*p)); - } else { - while ((uint32_t)(*p - '0') < 10) { - if (LJ_UNLIKELY(k >= 429496729) && (k != 429496729 || *p > '5')) - goto parsedbl; - k = k * 10u + (uint32_t)(*p++ - '0'); - } - } - while (LJ_UNLIKELY(lj_char_isspace(*p))) p++; - if (LJ_LIKELY(*p == '\0')) { -#if LJ_DUALNUM - if (sign == 1) { - if (k < 0x80000000u) { - setintV(n, (int32_t)k); - return 1; - } - } else if (k <= 0x80000000u) { - setintV(n, -(int32_t)k); - return 1; - } -#endif - setnumV(n, sign * (lua_Number)k); - return 1; - } - } -parsedbl: - { - TValue tv; - char *endptr; - setnumV(&tv, lua_str2number(s, &endptr)); - if (endptr == s) return 0; /* Conversion failed. */ - if (LJ_UNLIKELY(*endptr != '\0')) { - while (lj_char_isspace((uint8_t)*endptr)) endptr++; - if (*endptr != '\0') return 0; /* Invalid trailing characters? */ - } - if (LJ_LIKELY(!tvisnan(&tv))) - setnumV(n, numV(&tv)); - else - setnanV(n); /* Canonicalize injected NaNs. */ - return 1; - } -} - /* Print number to buffer. Canonicalizes non-finite values. */ size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) { diff --git a/src/lj_str.h b/src/lj_str.h index 3d9be4f3..2bf34ed4 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -20,9 +20,6 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) /* Type conversions. */ -LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n); -LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n); -LJ_FUNC int LJ_FASTCALL lj_str_tonumber(GCstr *str, TValue *n); LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o); LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k); LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np); diff --git a/src/lj_strscan.c b/src/lj_strscan.c new file mode 100644 index 00000000..31189ae7 --- /dev/null +++ b/src/lj_strscan.c @@ -0,0 +1,497 @@ +/* +** String scanning. +** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h +*/ + +#include + +#define lj_strscan_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_char.h" +#include "lj_strscan.h" + +/* -- Scanning numbers ---------------------------------------------------- */ + +/* +** Rationale for the builtin string to number conversion library: +** +** It removes a dependency on libc's strtod(), which is a true portability +** nightmare. Mainly due to the plethora of supported OS and toolchain +** combinations. Sadly, the various implementations +** a) are often buggy, incomplete (no hex floats) and/or imprecise, +** b) sometimes crash or hang on certain inputs, +** c) return non-standard NaNs that need to be filtered out, and +** d) fail if the locale-specific decimal separator is not a dot, +** which can only be fixed with atrocious workarounds. +** +** Also, most of the strtod() implementations are hopelessly bloated, +** which is not just an I-cache hog, but a problem for static linkage +** on embedded systems, too. +** +** OTOH the builtin conversion function is very compact. Even though it +** does a lot more, like parsing long longs, octal or imaginary numbers +** and returning the result in different formats: +** a) It needs less than 3 KB (!) of machine code (on x64 with -Os), +** b) it doesn't perform any dynamic allocation and, +** c) it needs only around 600 bytes of stack space. +** +** The builtin function is faster than strtod() for typical inputs, e.g. +** "123", "1.5" or "1e6". Arguably, it's slower for very large exponents, +** which are not very common (this could be fixed, if needed). +** +** And most importantly, the builtin function is equally precise on all +** platforms. It correctly converts and rounds any input to a double. +** If this is not the case, please send a bug report -- but PLEASE verify +** that the implementation you're comparing to is not the culprit! +** +** The implementation quickly pre-scans the entire string first and +** handles simple integers on-the-fly. Otherwise, it dispatches to the +** base-specific parser. Hex and octal is straightforward. +** +** Decimal to binary conversion uses a fixed-length circular buffer in +** base 100. Some simple cases are handled directly. For other cases, the +** number in the buffer is up-scaled or down-scaled until the integer part +** is in the proper range. Then the integer part is rounded and converted +** to a double which is finally rescaled to the result. Denormals need +** special treatment to prevent incorrect 'double rounding'. +*/ + +/* Definitions for circular decimal digit buffer (base 100 = 2 digits/byte). */ +#define STRSCAN_DIG 1024 +#define STRSCAN_MAXDIG 800 /* 772 + extra are sufficient. */ +#define STRSCAN_DDIG (STRSCAN_DIG/2) +#define STRSCAN_DMASK (STRSCAN_DDIG-1) + +/* Helpers for circular buffer. */ +#define DNEXT(a) (((a)+1) & STRSCAN_DMASK) +#define DPREV(a) (((a)-1) & STRSCAN_DMASK) +#define DLEN(lo, hi) ((int32_t)(((lo)-(hi)) & STRSCAN_DMASK)) + +#define casecmp(c, k) (((c) | 0x20) == k) + +/* Final conversion to double. */ +static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) +{ + double n; + + /* Avoid double rounding for denormals. */ + if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { + /* NYI: all of this generates way too much code on 32 bit CPUs. */ +#if defined(__GNUC__) && LJ_64 + int32_t b = (int32_t)(__builtin_clzll(x)^63); +#else + int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : + (int32_t)lj_fls((uint32_t)x); +#endif + if ((int32_t)b + ex2 <= -1023 && (int32_t)b + ex2 >= -1075) { + uint64_t rb = (uint64_t)1 << (-1075-ex2); + if ((x & rb) && ((x & (rb+rb+rb-1)))) x += rb+rb; + x = (x & ~(rb+rb-1)); + } + } + + /* Convert to double using a signed int64_t conversion, then rescale. */ + lua_assert((int64_t)x >= 0); + n = (double)(int64_t)x; + if (neg) n = -n; + if (ex2) n = ldexp(n, ex2); + o->n = n; +} + +/* Parse hexadecimal number. */ +static StrScanFmt strscan_hex(const uint8_t *p, TValue *o, + StrScanFmt fmt, uint32_t opt, + int32_t ex2, int32_t neg, uint32_t dig) +{ + uint64_t x = 0; + uint32_t i; + + /* Scan hex digits. */ + for (i = dig > 16 ? 16 : dig ; i; i--, p++) { + uint32_t d = (*p != '.' ? *p : *++p); if (d > '9') d += 9; + x = (x << 4) + (d & 15); + } + + /* Summarize rounding-effect of excess digits. */ + for (i = 16; i < dig; i++, p++) + x |= ((*p != '.' ? *p : *++p) != '0'), ex2 += 4; + + /* Format-specific handling. */ + switch (fmt) { + case STRSCAN_INT: + if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) { + o->i = neg ? -(int)x : (int)x; + return STRSCAN_INT; /* Fast path for 32 bit integers. */ + } + if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; } + /* fallthrough */ + case STRSCAN_U32: + if (dig > 8) return STRSCAN_ERROR; + o->i = neg ? -(int)x : (int)x; + return STRSCAN_U32; + case STRSCAN_I64: + case STRSCAN_U64: + if (dig > 16) return STRSCAN_ERROR; + o->u64 = neg ? (uint64_t)-(int64_t)x : x; + return fmt; + default: + break; + } + + /* Reduce range then convert to double. */ + if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } + strscan_double(x, o, ex2, neg); + return fmt; +} + +/* Parse octal number. */ +static StrScanFmt strscan_oct(const uint8_t *p, TValue *o, + StrScanFmt fmt, int32_t neg, uint32_t dig) +{ + uint64_t x = 0; + + /* Scan octal digits. */ + if (dig > 22 || (dig == 22 && *p > '1')) return STRSCAN_ERROR; + while (dig-- > 0) { + if (!(*p >= '0' && *p <= '7')) return STRSCAN_ERROR; + x = (x << 3) + (*p++ & 7); + } + + /* Format-specific handling. */ + switch (fmt) { + case STRSCAN_INT: + if (x >= 0x80000000u+neg) fmt = STRSCAN_U32; + /* fallthrough */ + case STRSCAN_U32: + if ((x >> 32)) return STRSCAN_ERROR; + o->i = neg ? -(int)x : (int)x; + break; + default: + case STRSCAN_I64: + case STRSCAN_U64: + o->u64 = neg ? (uint64_t)-(int64_t)x : x; + break; + } + return fmt; +} + +/* Parse decimal number. */ +static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, + StrScanFmt fmt, uint32_t opt, + int32_t ex10, int32_t neg, uint32_t dig) +{ + uint8_t xi[STRSCAN_DDIG], *xip = xi; + + if (dig) { + uint32_t i = dig; + if (i > STRSCAN_MAXDIG) { + ex10 -= (int32_t)(i - STRSCAN_MAXDIG); + i = STRSCAN_MAXDIG; + } + /* Scan unaligned leading digit. */ + if (((ex10^i) & 1)) + *xip++ = ((*p != '.' ? *p : *++p) & 15), i--, p++; + /* Scan aligned double-digits. */ + for ( ; i > 1; i -= 2) { + uint32_t d = 10 * ((*p != '.' ? *p : *++p) & 15); p++; + *xip++ = d + ((*p != '.' ? *p : *++p) & 15); p++; + } + /* Scan and realign trailing digit. */ + if (i) *xip++ = 10 * ((*p != '.' ? *p : *++p) & 15), ex10--, p++; + + /* Summarize rounding-effect of excess digits. */ + if (dig > STRSCAN_MAXDIG) { + do { + if ((*p != '.' ? *p : *++p) != '0') { xip[-1] |= 1; break; } + p++; + } while (--dig > STRSCAN_MAXDIG); + dig = STRSCAN_MAXDIG; + } else { /* Simplify exponent. */ + while (ex10 > 0 && dig <= 18) *xip++ = 0, ex10 -= 2, dig += 2; + } + } else { /* Only got zeros. */ + ex10 = 0; + xi[0] = 0; + } + + /* Fast path for numbers in integer format (but handles e.g. 1e6, too). */ + if (dig <= 20 && ex10 == 0) { + uint8_t *xis; + uint64_t x = xi[0]; + double n; + for (xis = xi+1; xis < xip; xis++) x = x * 100 + *xis; + if (!(dig == 20 && (xi[0] > 18 || (int64_t)x >= 0))) { /* No overflow? */ + /* Format-specific handling. */ + switch (fmt) { + case STRSCAN_INT: + if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) { + o->i = neg ? -(int)x : (int)x; + return STRSCAN_INT; /* Fast path for 32 bit integers. */ + } + if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; goto plainnumber; } + /* fallthrough */ + case STRSCAN_U32: + if ((x >> 32) != 0) return STRSCAN_ERROR; + o->i = neg ? -(int)x : (int)x; + return STRSCAN_U32; + case STRSCAN_I64: + case STRSCAN_U64: + o->u64 = neg ? (uint64_t)-(int64_t)x : x; + return fmt; + default: + plainnumber: /* Fast path for plain numbers < 2^63. */ + if ((int64_t)x < 0) break; + n = (double)(int64_t)x; + if (neg) n = -n; + o->n = n; + return fmt; + } + } + } + + /* Slow non-integer path. */ + if (fmt == STRSCAN_INT) { + if ((opt & STRSCAN_OPT_C)) return STRSCAN_ERROR; + fmt = STRSCAN_NUM; + } else if (fmt > STRSCAN_INT) { + return STRSCAN_ERROR; + } + { + uint32_t hi = 0, lo = (uint32_t)(xip-xi); + int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); + + lua_assert(lo > 0 && (ex10 & 1) == 0); + + /* Handle simple overflow/underflow. */ + if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } + else if (idig < -326/2) { o->n = 0.0; return fmt; } + + /* Scale up until we have at least 17 or 18 integer part digits. */ + while (idig < 9 && idig < DLEN(lo, hi)) { + uint32_t i, cy = 0; + ex2 -= 6; + for (i = DPREV(lo); ; i = DPREV(i)) { + uint32_t d = (xi[i] << 6) + cy; + cy = (((d >> 2) * 5243) >> 17); d = d - cy * 100; /* Div/mod 100. */ + xi[i] = (uint8_t)d; + if (i == hi) break; + if (d == 0 && i == DPREV(lo)) lo = i; + } + if (cy) { + if (xi[DPREV(lo)] == 0) lo = DPREV(lo); + else if (hi == lo) { lo = DPREV(lo); xi[DPREV(lo)] |= xi[lo]; } + hi = DPREV(hi); xi[hi] = (uint8_t)cy; idig++; + } + } + + /* Scale down until no more than 17 or 18 integer part digits remain. */ + while (idig > 9) { + uint32_t i, cy = 0; + ex2 += 6; + for (i = hi; i != lo; i = DNEXT(i)) { + cy += xi[i]; + xi[i] = (cy >> 6); + cy = 100 * (cy & 0x3f); + if (xi[i] == 0 && i == hi) hi = DNEXT(hi), idig--; + } + while (cy) { + if (hi == lo) { xi[DPREV(lo)] |= 1; break; } + xi[lo] = (cy >> 6); lo = DNEXT(lo); + cy = 100 * (cy & 0x3f); + } + } + + /* Collect integer part digits and convert to rescaled double. */ + { + uint64_t x = xi[hi]; + uint32_t i; + for (i = DNEXT(hi); --idig > 0 && i != lo; i = DNEXT(i)) + x = x * 100 + xi[i]; + if (i == lo) { + while (--idig >= 0) x = x * 100; + } else { /* Gather round bit from remaining digits. */ + x <<= 1; ex2--; + do { + if (xi[i]) { x |= 1; break; } + i = DNEXT(i); + } while (i != lo); + } + strscan_double(x, o, ex2, neg); + } + } + return fmt; +} + +/* Scan string containing a number. Returns format. Returns value in o. */ +StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) +{ + int32_t neg = 0; + + /* Remove leading space, parse sign and non-numbers. */ + if (LJ_UNLIKELY(!lj_char_isdigit(*p))) { + while (lj_char_isspace(*p)) p++; + if (*p == '+' || *p == '-') neg = (*p++ == '-'); + if (LJ_UNLIKELY(*p >= 'A')) { /* Parse "inf", "infinity" or "nan". */ + TValue tmp; + setnanV(&tmp); + if (casecmp(p[0],'i') && casecmp(p[1],'n') && casecmp(p[2],'f')) { + if (neg) setminfV(&tmp); else setpinfV(&tmp); + p += 3; + if (casecmp(p[0],'i') && casecmp(p[1],'n') && casecmp(p[2],'i') && + casecmp(p[3],'t') && casecmp(p[4],'y')) p += 5; + } else if (casecmp(p[0],'n') && casecmp(p[1],'a') && casecmp(p[2],'n')) { + p += 3; + } + while (lj_char_isspace(*p)) p++; + if (*p) return STRSCAN_ERROR; + o->u64 = tmp.u64; + return STRSCAN_NUM; + } + } + + /* Parse regular number. */ + { + StrScanFmt fmt = STRSCAN_INT; + int cmask = LJ_CHAR_DIGIT; + int base = (opt & STRSCAN_OPT_C) && *p == '0' ? 0 : 10; + const uint8_t *sp, *dp = NULL; + uint32_t dig = 0, hasdig = 0, x = 0; + int32_t ex = 0; + + /* Determine base and skip leading zeros. */ + if (LJ_UNLIKELY(*p <= '0')) { + if (*p == '0' && casecmp(p[1], 'x')) + base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; + for ( ; ; p++) { + if (*p == '0') { + hasdig = 1; + } else if (*p == '.') { + if (dp) return STRSCAN_ERROR; + dp = p; + } else { + break; + } + } + } + + /* Preliminary digit and decimal point scan. */ + for (sp = p; ; p++) { + if (LJ_LIKELY(lj_char_isa(*p, cmask))) { + x = x * 10 + (*p & 15); /* For fast path below. */ + dig++; + } else if (*p == '.') { + if (dp) return STRSCAN_ERROR; + dp = p; + } else { + break; + } + } + if (!(hasdig | dig)) return STRSCAN_ERROR; + + /* Handle decimal point. */ + if (dp) { + fmt = STRSCAN_NUM; + if (dig) { + ex = (int)(dp-(p-1)); dp = p-1; + while (ex < 0 && *dp-- == '0') ex++, dig--; /* Skip trailing zeros. */ + if (base == 16) ex *= 4; + } + } + + /* Parse exponent. */ + if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { + uint32_t xx; + int negx = 0; + fmt = STRSCAN_NUM; p++; + if (*p == '+' || *p == '-') negx = (*p++ == '-'); + if (!lj_char_isdigit(*p)) return STRSCAN_ERROR; + xx = (*p++ & 15); + while (lj_char_isdigit(*p)) { + if (xx < 65536) xx = xx * 10 + (*p & 15); + p++; + } + ex += negx ? -(int)xx : (int)xx; + } + + /* Parse suffix. */ + if (*p) { + /* I (IMAG), U (U32), LL (I64), ULL/LLU (U64), L (long), UL/LU (ulong). */ + /* NYI: f (float). Not needed until cp_number() handles non-integers. */ + if (casecmp(*p, 'i')) { + if (!(opt & STRSCAN_OPT_IMAG)) return STRSCAN_ERROR; + p++; fmt = STRSCAN_IMAG; + } else if (fmt == STRSCAN_INT) { + if (casecmp(*p, 'u')) p++, fmt = STRSCAN_U32; + if (casecmp(*p, 'l')) { + p++; + if (casecmp(*p, 'l')) p++, fmt += STRSCAN_I64 - STRSCAN_INT; + else if (!(opt & STRSCAN_OPT_C)) return STRSCAN_ERROR; + else if (sizeof(long) == 8) fmt += STRSCAN_I64 - STRSCAN_INT; + } + if (casecmp(*p, 'u') && (fmt == STRSCAN_INT || fmt == STRSCAN_I64)) + p++, fmt += STRSCAN_U32 - STRSCAN_INT; + if ((fmt == STRSCAN_U32 && !(opt & STRSCAN_OPT_C)) || + (fmt >= STRSCAN_I64 && !(opt & STRSCAN_OPT_LL))) + return STRSCAN_ERROR; + } + while (lj_char_isspace(*p)) p++; + if (*p) return STRSCAN_ERROR; + } + + /* Fast path for decimal 32 bit integers. */ + if (fmt == STRSCAN_INT && base == 10 && + (dig < 10 || (dig == 10 && *sp <= '2' && x < 0x80000000u+neg))) { + if ((opt & STRSCAN_OPT_TONUM)) { + double n = (double)(int32_t)x; + if (neg) n = -n; + o->n = n; + return STRSCAN_NUM; + } else { + o->i = neg ? -(int)x : (int)x; + return STRSCAN_INT; + } + } + + /* Dispatch to base-specific parser. */ + if (base == 0 && !(fmt == STRSCAN_NUM || fmt == STRSCAN_IMAG)) + return strscan_oct(sp, o, fmt, neg, dig); + if (base == 16) + fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); + else + fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); + + /* Try to convert number to integer, if requested. */ + if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) { + double n = o->n; + int32_t i = lj_num2int(n); + if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; } + } + return fmt; + } +} + +int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) +{ + StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, + STRSCAN_OPT_TONUM); + lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM); + return (fmt != STRSCAN_ERROR); +} + +#if LJ_DUALNUM +int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) +{ + StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, + STRSCAN_OPT_TOINT); + lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); + if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); + return (fmt != STRSCAN_ERROR); +} +#endif + +#undef DNEXT +#undef DPREV +#undef DLEN + diff --git a/src/lj_strscan.h b/src/lj_strscan.h new file mode 100644 index 00000000..da5bd095 --- /dev/null +++ b/src/lj_strscan.h @@ -0,0 +1,39 @@ +/* +** String scanning. +** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_STRSCAN_H +#define _LJ_STRSCAN_H + +#include "lj_obj.h" + +/* Options for accepted/returned formats. */ +#define STRSCAN_OPT_TOINT 0x01 /* Convert to int32_t, if possible. */ +#define STRSCAN_OPT_TONUM 0x02 /* Always convert to double. */ +#define STRSCAN_OPT_IMAG 0x04 +#define STRSCAN_OPT_LL 0x08 +#define STRSCAN_OPT_C 0x10 + +/* Returned format. */ +typedef enum { + STRSCAN_ERROR, + STRSCAN_NUM, STRSCAN_IMAG, + STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64, +} StrScanFmt; + +LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt); +LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); +#if LJ_DUALNUM +LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); +#else +#define lj_strscan_number(s, o) lj_strscan_num((s), (o)) +#endif + +/* Check for number or convert string to number/int in-place (!). */ +static LJ_AINLINE int lj_strscan_numberobj(TValue *o) +{ + return tvisnumber(o) || (tvisstr(o) && lj_strscan_number(strV(o), o)); +} + +#endif diff --git a/src/ljamalg.c b/src/ljamalg.c index b1124464..a64c9429 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -43,6 +43,7 @@ #include "lj_dispatch.c" #include "lj_vmevent.c" #include "lj_vmmath.c" +#include "lj_strscan.c" #include "lj_api.c" #include "lj_lex.c" #include "lj_parse.c" diff --git a/src/luaconf.h b/src/luaconf.h index dfb79e19..5c29d4f3 100644 --- a/src/luaconf.h +++ b/src/luaconf.h @@ -98,7 +98,6 @@ #define LUA_NUMBER_FMT "%.14g" #define lua_number2str(s, n) sprintf((s), LUA_NUMBER_FMT, (n)) #define LUAI_MAXNUMBER2STR 32 -#define lua_str2number(s, p) strtod((s), (p)) #define LUA_INTFRMLEN "l" #define LUA_INTFRM_T long