Replace strtod() with builtin string to number conversion.

This commit is contained in:
Mike Pall 2012-08-25 23:02:29 +02:00
parent 653facd0d5
commit 4c882fe714
24 changed files with 660 additions and 272 deletions

View File

@ -446,8 +446,8 @@ LJLIB_C= $(LJLIB_O:.o=.c)
LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \
lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \
lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \
lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \

View File

@ -3,7 +3,8 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_lib.h lj_libdef.h
lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
lj_lib.h lj_libdef.h
lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
@ -39,7 +40,8 @@ lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h \
lj_strscan.h
lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
@ -77,7 +79,7 @@ lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_cdata.h lj_clib.h
lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \
lj_bc.h lj_vm.h lj_char.h
lj_bc.h lj_vm.h lj_char.h lj_strscan.h
lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ir.h \
@ -101,7 +103,7 @@ lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
lj_vm.h lj_recdef.h
lj_vm.h lj_strscan.h lj_recdef.h
lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
lj_traceerr.h lj_vm.h
@ -115,34 +117,34 @@ lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
lj_vm.h lj_lib.h
lj_vm.h lj_strscan.h lj_lib.h
lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
lj_state.h lj_lex.h lj_parse.h lj_char.h
lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_lib.h
lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h
lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \
lj_traceerr.h lj_vm.h
lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
lj_vm.h
lj_vm.h lj_strscan.h
lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_jit.h lj_iropt.h
lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \
lj_folddef.h
lj_strscan.h lj_folddef.h
lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_vm.h
lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
lj_traceerr.h lj_vm.h lj_strscan.h
lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
@ -165,6 +167,8 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h
lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_char.h lj_strscan.h
lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_tab.h
lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
@ -184,21 +188,21 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \
lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \
lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \
lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_debug.c \
lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h \
lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_api.c lj_bcdump.h lj_parse.h \
lj_lex.c lualib.h lj_parse.c lj_bcread.c lj_bcwrite.c lj_ctype.c \
lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \
lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
lib_init.c
lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \
luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \
lj_bcdump.h lj_parse.h lj_lex.c lualib.h lj_parse.c lj_bcread.c \
lj_bcwrite.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c \
lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h \
lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h \
lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \
lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \
lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \
lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \
lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \
lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \
lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \
lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \

View File

@ -31,6 +31,7 @@
#include "lj_ff.h"
#include "lj_dispatch.h"
#include "lj_char.h"
#include "lj_strscan.h"
#include "lj_lib.h"
/* -- Base library: checks ------------------------------------------------ */
@ -191,7 +192,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
int32_t base = lj_lib_optint(L, 2, 10);
if (base == 10) {
TValue *o = lj_lib_checkany(L, 1);
if (tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o))) {
if (lj_strscan_numberobj(o)) {
copyTV(L, L->base-1, o);
return FFH_RES(1);
}

View File

@ -26,6 +26,7 @@
#include "lj_lex.h"
#include "lj_bcdump.h"
#include "lj_parse.h"
#include "lj_strscan.h"
/* -- Common helper functions --------------------------------------------- */
@ -236,7 +237,7 @@ LUA_API int lua_isnumber(lua_State *L, int idx)
{
cTValue *o = index2adr(L, idx);
TValue tmp;
return (tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), &tmp)));
return (tvisnumber(o) || (tvisstr(o) && lj_strscan_number(strV(o), &tmp)));
}
LUA_API int lua_isstring(lua_State *L, int idx)
@ -320,7 +321,7 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
TValue tmp;
if (LJ_LIKELY(tvisnumber(o)))
return numberVnum(o);
else if (tvisstr(o) && lj_str_tonum(strV(o), &tmp))
else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp))
return numV(&tmp);
else
return 0;
@ -332,7 +333,7 @@ LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
TValue tmp;
if (LJ_LIKELY(tvisnumber(o)))
return numberVnum(o);
else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
else if (!(tvisstr(o) && lj_strscan_num(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
return numV(&tmp);
}
@ -345,7 +346,7 @@ LUALIB_API lua_Number luaL_optnumber(lua_State *L, int idx, lua_Number def)
return numberVnum(o);
else if (tvisnil(o))
return def;
else if (!(tvisstr(o) && lj_str_tonum(strV(o), &tmp)))
else if (!(tvisstr(o) && lj_strscan_num(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
return numV(&tmp);
}
@ -360,7 +361,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
} else if (LJ_LIKELY(tvisnum(o))) {
n = numV(o);
} else {
if (!(tvisstr(o) && lj_str_tonumber(strV(o), &tmp)))
if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
return 0;
if (tvisint(&tmp))
return (lua_Integer)intV(&tmp);
@ -383,7 +384,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
} else if (LJ_LIKELY(tvisnum(o))) {
n = numV(o);
} else {
if (!(tvisstr(o) && lj_str_tonumber(strV(o), &tmp)))
if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
if (tvisint(&tmp))
return (lua_Integer)intV(&tmp);
@ -408,7 +409,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
} else if (tvisnil(o)) {
return def;
} else {
if (!(tvisstr(o) && lj_str_tonumber(strV(o), &tmp)))
if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
lj_err_argt(L, idx, LUA_TNUMBER);
if (tvisint(&tmp))
return (lua_Integer)intV(&tmp);

View File

@ -626,7 +626,7 @@ static void asm_conv64(ASMState *as, IRIns *ir)
static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
Reg rlo = 0, rhi = 0, tmp;
int destused = ra_used(ir);

View File

@ -541,7 +541,7 @@ static void asm_conv64(ASMState *as, IRIns *ir)
static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
RegSet drop = RSET_SCRATCH;
if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */

View File

@ -533,7 +533,7 @@ static void asm_conv64(ASMState *as, IRIns *ir)
static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
int32_t ofs;
RegSet drop = RSET_SCRATCH;

View File

@ -882,7 +882,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
static void asm_strto(ASMState *as, IRIns *ir)
{
/* Force a spill slot for the destination register (if any). */
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
RegSet drop = RSET_SCRATCH;
if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))

View File

@ -21,7 +21,7 @@
#define LJ_CHAR_GRAPH (LJ_CHAR_ALNUM|LJ_CHAR_PUNCT)
/* Only pass -1 or 0..255 to these macros. Never pass a signed char! */
#define lj_char_isa(c, t) (lj_char_bits[(c)+1] & t)
#define lj_char_isa(c, t) ((lj_char_bits+1)[(c)] & t)
#define lj_char_iscntrl(c) lj_char_isa((c), LJ_CHAR_CNTRL)
#define lj_char_isspace(c) lj_char_isa((c), LJ_CHAR_SPACE)
#define lj_char_ispunct(c) lj_char_isa((c), LJ_CHAR_PUNCT)

View File

@ -15,6 +15,7 @@
#include "lj_frame.h"
#include "lj_vm.h"
#include "lj_char.h"
#include "lj_strscan.h"
/*
** Important note: this is NOT a validating C parser! This is a minimal
@ -156,40 +157,19 @@ LJ_NORET LJ_NOINLINE static void cp_err(CPState *cp, ErrMsg em)
/* -- Main lexical scanner ------------------------------------------------ */
/* Parse integer literal. */
static CPToken cp_integer(CPState *cp)
/* Parse number literal. Only handles int32_t/uint32_t right now. */
static CPToken cp_number(CPState *cp)
{
uint32_t n = 0;
cp->val.id = CTID_INT32;
if (cp->c != '0') { /* Decimal. */
do {
n = n*10 + (cp->c - '0');
} while (lj_char_isdigit(cp_get(cp)));
} else if ((cp_get(cp)& ~0x20) == 'X') { /* Hexadecimal. */
if (!lj_char_isxdigit(cp_get(cp)))
cp_err(cp, LJ_ERR_XNUMBER);
do {
n = n*16 + (cp->c & 15);
if (!lj_char_isdigit(cp->c)) n += 9;
} while (lj_char_isxdigit(cp_get(cp)));
if (n >= 0x80000000u) cp->val.id = CTID_UINT32;
} else { /* Octal. */
while (cp->c >= '0' && cp->c <= '7') {
n = n*8 + (cp->c - '0');
cp_get(cp);
}
if (n >= 0x80000000u) cp->val.id = CTID_UINT32;
}
cp->val.u32 = n;
for (;;) { /* Parse suffixes. */
if ((cp->c & ~0x20) == 'U')
cp->val.id = CTID_UINT32;
else if ((cp->c & ~0x20) != 'L')
break;
cp_get(cp);
}
if (lj_char_isident(cp->c) && !(cp->mode & CPARSE_MODE_SKIP))
cp_errmsg(cp, cp->c, LJ_ERR_XNUMBER);
StrScanFmt fmt;
TValue o;
do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
cp_save(cp, '\0');
fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C);
if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
else if (!(cp->mode & CPARSE_MODE_SKIP))
cp_errmsg(cp, CTOK_INTEGER, LJ_ERR_XNUMBER);
cp->val.u32 = (uint32_t)o.i;
return CTOK_INTEGER;
}
@ -319,37 +299,34 @@ static CPToken cp_next_(CPState *cp)
lj_str_resetbuf(&cp->sb);
for (;;) {
if (lj_char_isident(cp->c))
return lj_char_isdigit(cp->c) ? cp_integer(cp) : cp_ident(cp);
return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
switch (cp->c) {
case '\n': case '\r': cp_newline(cp); /* fallthrough. */
case ' ': case '\t': case '\v': case '\f': cp_get(cp); break;
case '"': case '\'': return cp_string(cp);
case '/':
cp_get(cp);
if (cp->c == '*') cp_comment_c(cp);
if (cp_get(cp) == '*') cp_comment_c(cp);
else if (cp->c == '/') cp_comment_cpp(cp);
else return '/';
break;
case '|':
cp_get(cp); if (cp->c != '|') return '|'; cp_get(cp); return CTOK_OROR;
if (cp_get(cp) != '|') return '|'; cp_get(cp); return CTOK_OROR;
case '&':
cp_get(cp); if (cp->c != '&') return '&'; cp_get(cp); return CTOK_ANDAND;
if (cp_get(cp) != '&') return '&'; cp_get(cp); return CTOK_ANDAND;
case '=':
cp_get(cp); if (cp->c != '=') return '='; cp_get(cp); return CTOK_EQ;
if (cp_get(cp) != '=') return '='; cp_get(cp); return CTOK_EQ;
case '!':
cp_get(cp); if (cp->c != '=') return '!'; cp_get(cp); return CTOK_NE;
if (cp_get(cp) != '=') return '!'; cp_get(cp); return CTOK_NE;
case '<':
cp_get(cp);
if (cp->c == '=') { cp_get(cp); return CTOK_LE; }
if (cp_get(cp) == '=') { cp_get(cp); return CTOK_LE; }
else if (cp->c == '<') { cp_get(cp); return CTOK_SHL; }
return '<';
case '>':
cp_get(cp);
if (cp->c == '=') { cp_get(cp); return CTOK_GE; }
if (cp_get(cp) == '=') { cp_get(cp); return CTOK_GE; }
else if (cp->c == '>') { cp_get(cp); return CTOK_SHR; }
return '>';
case '-':
cp_get(cp); if (cp->c != '>') return '-'; cp_get(cp); return CTOK_DEREF;
if (cp_get(cp) != '>') return '-'; cp_get(cp); return CTOK_DEREF;
case '$':
return cp_param(cp);
case '\0': return CTOK_EOF;

View File

@ -26,6 +26,7 @@
#include "lj_crecord.h"
#include "lj_dispatch.h"
#include "lj_vm.h"
#include "lj_strscan.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@ -64,7 +65,7 @@ typedef void (LJ_FASTCALL *RecordFunc)(jit_State *J, RecordFFData *rd);
/* Get runtime value of int argument. */
static int32_t argv2int(jit_State *J, TValue *o)
{
if (!tvisnumber(o) && !(tvisstr(o) && lj_str_tonumber(strV(o), o)))
if (!lj_strscan_numberobj(o))
lj_trace_err(J, LJ_TRERR_BADTYPE);
return tvisint(o) ? intV(o) : lj_num2int(numV(o));
}
@ -266,7 +267,7 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
if (tref_isnumber_str(tr)) {
if (tref_isstr(tr)) {
TValue tmp;
if (!lj_str_tonum(strV(&rd->argv[0]), &tmp))
if (!lj_strscan_num(strV(&rd->argv[0]), &tmp))
recff_nyiu(J); /* Would need an inverted STRTO for this case. */
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
}

View File

@ -28,6 +28,7 @@
#include "lj_carith.h"
#endif
#include "lj_vm.h"
#include "lj_strscan.h"
#include "lj_lib.h"
/* Some local macros to save typing. Undef'd at the end. */

View File

@ -102,7 +102,7 @@ typedef struct CCallInfo {
#define IRCALLDEF(_) \
_(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
_(ANY, lj_str_new, 3, S, STR, CCI_L) \
_(ANY, lj_str_tonum, 2, FN, INT, 0) \
_(ANY, lj_strscan_num, 2, FN, INT, 0) \
_(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \
_(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \
_(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \

View File

@ -23,6 +23,7 @@
#include "lj_lex.h"
#include "lj_parse.h"
#include "lj_char.h"
#include "lj_strscan.h"
/* Lua lexer token names. */
static const char *const tokennames[] = {
@ -84,101 +85,52 @@ static void inclinenumber(LexState *ls)
/* -- Scanner for terminals ----------------------------------------------- */
#if LJ_HASFFI
/* Load FFI library on-demand. Needed if we create cdata objects. */
static void lex_loadffi(lua_State *L)
{
ptrdiff_t oldtop = savestack(L, L->top);
luaopen_ffi(L);
L->top = restorestack(L, oldtop);
}
/* Parse 64 bit integer. */
static int lex_number64(LexState *ls, TValue *tv)
{
uint64_t n = 0;
uint8_t *p = (uint8_t *)ls->sb.buf;
CTypeID id = CTID_INT64;
GCcdata *cd;
int numl = 0;
if (p[0] == '0' && (p[1] & ~0x20) == 'X') { /* Hexadecimal. */
p += 2;
if (!lj_char_isxdigit(*p)) return 0;
do {
n = n*16 + (*p & 15);
if (!lj_char_isdigit(*p)) n += 9;
p++;
} while (lj_char_isxdigit(*p));
} else { /* Decimal. */
if (!lj_char_isdigit(*p)) return 0;
do {
n = n*10 + (*p - '0');
p++;
} while (lj_char_isdigit(*p));
}
for (;;) { /* Parse suffixes. */
if ((*p & ~0x20) == 'U')
id = CTID_UINT64;
else if ((*p & ~0x20) == 'L')
numl++;
else
break;
p++;
}
if (numl != 2 || *p != '\0') return 0;
/* Return cdata holding a 64 bit integer. */
cd = lj_cdata_new_(ls->L, id, 8);
*(uint64_t *)cdataptr(cd) = n;
lj_parse_keepcdata(ls, tv, cd);
return 1; /* Ok. */
}
#endif
/* Parse a number literal. */
static void lex_number(LexState *ls, TValue *tv)
{
int c, xp = 'E';
StrScanFmt fmt;
int c, xp = 'e';
lua_assert(lj_char_isdigit(ls->current));
if ((c = ls->current) == '0') {
save_and_next(ls);
if ((ls->current & ~0x20) == 'X') xp = 'P';
if ((ls->current | 0x20) == 'x') xp = 'p';
}
while (lj_char_isident(ls->current) || ls->current == '.' ||
((ls->current == '-' || ls->current == '+') && (c & ~0x20) == xp)) {
((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) {
c = ls->current;
save_and_next(ls);
}
#if LJ_HASFFI
c &= ~0x20;
if ((c == 'I' || c == 'L' || c == 'U') && !ctype_ctsG(G(ls->L)))
lex_loadffi(ls->L);
if (c == 'I') /* Parse imaginary part of complex number. */
ls->sb.n--;
#endif
save(ls, '\0');
#if LJ_HASFFI
if ((c == 'L' || c == 'U') && lex_number64(ls, tv)) { /* Parse 64 bit int. */
return;
} else
#endif
if (lj_str_numconv(ls->sb.buf, tv)) {
#if LJ_HASFFI
if (c == 'I') { /* Return cdata holding a complex number. */
GCcdata *cd = lj_cdata_new_(ls->L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv,
(LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
(LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
if (LJ_DUALNUM && fmt == STRSCAN_INT) {
setitype(tv, LJ_TISNUM);
} else if (fmt == STRSCAN_NUM) {
/* Already in correct format. */
} else if (LJ_HASFFI && fmt != STRSCAN_ERROR) {
lua_State *L = ls->L;
GCcdata *cd;
lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG);
if (!ctype_ctsG(G(L))) {
ptrdiff_t oldtop = savestack(L, L->top);
luaopen_ffi(L); /* Load FFI library on-demand. */
L->top = restorestack(L, oldtop);
}
if (fmt == STRSCAN_IMAG) {
cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
((double *)cdataptr(cd))[0] = 0;
((double *)cdataptr(cd))[1] = numberVnum(tv);
((double *)cdataptr(cd))[1] = numV(tv);
} else {
cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8);
*(uint64_t *)cdataptr(cd) = tv->u64;
}
lj_parse_keepcdata(ls, tv, cd);
}
#endif
if (LJ_DUALNUM && tvisnum(tv)) {
int32_t k = lj_num2int(numV(tv));
if ((lua_Number)k == numV(tv)) /* -0 cannot end up here. */
setintV(tv, k);
}
return;
}
} else {
lua_assert(fmt == STRSCAN_ERROR);
lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
}
}
static int skip_sep(LexState *ls)
{

View File

@ -17,6 +17,7 @@
#include "lj_bc.h"
#include "lj_dispatch.h"
#include "lj_vm.h"
#include "lj_strscan.h"
#include "lj_lib.h"
/* -- Library initialization ---------------------------------------------- */
@ -155,8 +156,7 @@ GCstr *lj_lib_optstr(lua_State *L, int narg)
void lj_lib_checknumber(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
if (!(o < L->top &&
(tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o)))))
if (!(o < L->top && lj_strscan_numberobj(o)))
lj_err_argt(L, narg, LUA_TNUMBER);
}
#endif
@ -165,7 +165,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
if (!(o < L->top &&
(tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o)))))
(tvisnumber(o) || (tvisstr(o) && lj_strscan_num(strV(o), o)))))
lj_err_argt(L, narg, LUA_TNUMBER);
if (LJ_UNLIKELY(tvisint(o))) {
lua_Number n = (lua_Number)intV(o);
@ -179,8 +179,7 @@ lua_Number lj_lib_checknum(lua_State *L, int narg)
int32_t lj_lib_checkint(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
if (!(o < L->top &&
(tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o)))))
if (!(o < L->top && lj_strscan_numberobj(o)))
lj_err_argt(L, narg, LUA_TNUMBER);
if (LJ_LIKELY(tvisint(o))) {
return intV(o);
@ -200,8 +199,7 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
int32_t lj_lib_checkbit(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
if (!(o < L->top &&
(tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o)))))
if (!(o < L->top && lj_strscan_numberobj(o)))
lj_err_argt(L, narg, LUA_TNUMBER);
if (LJ_LIKELY(tvisint(o))) {
return intV(o);

View File

@ -18,6 +18,7 @@
#include "lj_frame.h"
#include "lj_bc.h"
#include "lj_vm.h"
#include "lj_strscan.h"
/* -- Metamethod handling ------------------------------------------------- */
@ -193,7 +194,7 @@ static cTValue *str2num(cTValue *o, TValue *n)
return o;
else if (tvisint(o))
return (setnumV(n, (lua_Number)intV(o)), n);
else if (tvisstr(o) && lj_str_tonum(strV(o), n))
else if (tvisstr(o) && lj_strscan_num(strV(o), n))
return n;
else
return NULL;
@ -436,12 +437,9 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
/* Helper for FORI. Coercion. */
void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o)
{
if (!(tvisnumber(o) || (tvisstr(o) && lj_str_tonumber(strV(o), o))))
lj_err_msg(L, LJ_ERR_FORINIT);
if (!(tvisnumber(o+1) || (tvisstr(o+1) && lj_str_tonumber(strV(o+1), o+1))))
lj_err_msg(L, LJ_ERR_FORLIM);
if (!(tvisnumber(o+2) || (tvisstr(o+2) && lj_str_tonumber(strV(o+2), o+2))))
lj_err_msg(L, LJ_ERR_FORSTEP);
if (!lj_strscan_numberobj(o)) lj_err_msg(L, LJ_ERR_FORINIT);
if (!lj_strscan_numberobj(o+1)) lj_err_msg(L, LJ_ERR_FORLIM);
if (!lj_strscan_numberobj(o+2)) lj_err_msg(L, LJ_ERR_FORSTEP);
if (LJ_DUALNUM) {
/* Ensure all slots are integers or all slots are numbers. */
int32_t k[3];

View File

@ -25,6 +25,7 @@
#endif
#include "lj_carith.h"
#include "lj_vm.h"
#include "lj_strscan.h"
/* Here's a short description how the FOLD engine processes instructions:
**
@ -693,7 +694,7 @@ LJFOLD(STRTO KGC)
LJFOLDF(kfold_strto)
{
TValue n;
if (lj_str_tonum(ir_kstr(fleft), &n))
if (lj_strscan_num(ir_kstr(fleft), &n))
return lj_ir_knum(J, numV(&n));
return FAILFOLD;
}

View File

@ -11,13 +11,13 @@
#if LJ_HASJIT
#include "lj_str.h"
#include "lj_bc.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
#include "lj_trace.h"
#include "lj_vm.h"
#include "lj_strscan.h"
/* Rationale for narrowing optimizations:
**
@ -519,11 +519,11 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
{
if (tref_isstr(rb)) {
rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0);
lj_str_tonum(strV(vb), vb);
lj_strscan_num(strV(vb), vb);
}
if (tref_isstr(rc)) {
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
lj_str_tonum(strV(vc), vc);
lj_strscan_num(strV(vc), vc);
}
/* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
@ -541,7 +541,7 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
{
if (tref_isstr(rc)) {
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
lj_str_tonum(strV(vc), vc);
lj_strscan_num(strV(vc), vc);
}
if (tref_isinteger(rc)) {
if ((uint32_t)numberVint(vc) != 0x80000000u)
@ -555,7 +555,7 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc)
{
TRef tmp;
if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc))
lj_trace_err(J, LJ_TRERR_BADTYPE);
if ((LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) &&
tref_isinteger(rb) && tref_isinteger(rc) &&
@ -575,7 +575,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vc)
/* Narrowing of power operator or math.pow. */
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
{
if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
if (tvisstr(vc) && !lj_strscan_num(strV(vc), vc))
lj_trace_err(J, LJ_TRERR_BADTYPE);
/* Narrowing must be unconditional to preserve (-x)^i semantics. */
if (tvisint(vc) || numisint(numV(vc))) {

View File

@ -169,85 +169,6 @@ void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
/* -- Type conversions ---------------------------------------------------- */
/* Convert string object to number. */
int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n)
{
int ok = lj_str_numconv(strdata(str), n);
if (ok && tvisint(n))
setnumV(n, (lua_Number)intV(n));
return ok;
}
int LJ_FASTCALL lj_str_tonumber(GCstr *str, TValue *n)
{
return lj_str_numconv(strdata(str), n);
}
/* Convert string to number. */
int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n)
{
#if LJ_DUALNUM
int sign = 1;
#else
lua_Number sign = 1;
#endif
const uint8_t *p = (const uint8_t *)s;
while (lj_char_isspace(*p)) p++;
if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; }
if ((uint32_t)(*p - '0') < 10) {
uint32_t k = (uint32_t)(*p++ - '0');
if (k == 0 && ((*p & ~0x20) == 'X')) {
p++;
if (!lj_char_isxdigit(*p))
return 0; /* Don't accept '0x' without hex digits. */
do {
if (k >= 0x10000000u) goto parsedbl;
k = (k << 4) + (*p & 15u);
if (!lj_char_isdigit(*p)) k += 9;
p++;
} while (lj_char_isxdigit(*p));
} else {
while ((uint32_t)(*p - '0') < 10) {
if (LJ_UNLIKELY(k >= 429496729) && (k != 429496729 || *p > '5'))
goto parsedbl;
k = k * 10u + (uint32_t)(*p++ - '0');
}
}
while (LJ_UNLIKELY(lj_char_isspace(*p))) p++;
if (LJ_LIKELY(*p == '\0')) {
#if LJ_DUALNUM
if (sign == 1) {
if (k < 0x80000000u) {
setintV(n, (int32_t)k);
return 1;
}
} else if (k <= 0x80000000u) {
setintV(n, -(int32_t)k);
return 1;
}
#endif
setnumV(n, sign * (lua_Number)k);
return 1;
}
}
parsedbl:
{
TValue tv;
char *endptr;
setnumV(&tv, lua_str2number(s, &endptr));
if (endptr == s) return 0; /* Conversion failed. */
if (LJ_UNLIKELY(*endptr != '\0')) {
while (lj_char_isspace((uint8_t)*endptr)) endptr++;
if (*endptr != '\0') return 0; /* Invalid trailing characters? */
}
if (LJ_LIKELY(!tvisnan(&tv)))
setnumV(n, numV(&tv));
else
setnanV(n); /* Canonicalize injected NaNs. */
return 1;
}
}
/* Print number to buffer. Canonicalizes non-finite values. */
size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
{

View File

@ -20,9 +20,6 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
/* Type conversions. */
LJ_FUNC int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n);
LJ_FUNC int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n);
LJ_FUNC int LJ_FASTCALL lj_str_tonumber(GCstr *str, TValue *n);
LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);

497
src/lj_strscan.c Normal file
View File

@ -0,0 +1,497 @@
/*
** String scanning.
** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
*/
#include <math.h>
#define lj_strscan_c
#define LUA_CORE
#include "lj_obj.h"
#include "lj_char.h"
#include "lj_strscan.h"
/* -- Scanning numbers ---------------------------------------------------- */
/*
** Rationale for the builtin string to number conversion library:
**
** It removes a dependency on libc's strtod(), which is a true portability
** nightmare. Mainly due to the plethora of supported OS and toolchain
** combinations. Sadly, the various implementations
** a) are often buggy, incomplete (no hex floats) and/or imprecise,
** b) sometimes crash or hang on certain inputs,
** c) return non-standard NaNs that need to be filtered out, and
** d) fail if the locale-specific decimal separator is not a dot,
** which can only be fixed with atrocious workarounds.
**
** Also, most of the strtod() implementations are hopelessly bloated,
** which is not just an I-cache hog, but a problem for static linkage
** on embedded systems, too.
**
** OTOH the builtin conversion function is very compact. Even though it
** does a lot more, like parsing long longs, octal or imaginary numbers
** and returning the result in different formats:
** a) It needs less than 3 KB (!) of machine code (on x64 with -Os),
** b) it doesn't perform any dynamic allocation and,
** c) it needs only around 600 bytes of stack space.
**
** The builtin function is faster than strtod() for typical inputs, e.g.
** "123", "1.5" or "1e6". Arguably, it's slower for very large exponents,
** which are not very common (this could be fixed, if needed).
**
** And most importantly, the builtin function is equally precise on all
** platforms. It correctly converts and rounds any input to a double.
** If this is not the case, please send a bug report -- but PLEASE verify
** that the implementation you're comparing to is not the culprit!
**
** The implementation quickly pre-scans the entire string first and
** handles simple integers on-the-fly. Otherwise, it dispatches to the
** base-specific parser. Hex and octal is straightforward.
**
** Decimal to binary conversion uses a fixed-length circular buffer in
** base 100. Some simple cases are handled directly. For other cases, the
** number in the buffer is up-scaled or down-scaled until the integer part
** is in the proper range. Then the integer part is rounded and converted
** to a double which is finally rescaled to the result. Denormals need
** special treatment to prevent incorrect 'double rounding'.
*/
/* Definitions for circular decimal digit buffer (base 100 = 2 digits/byte). */
#define STRSCAN_DIG 1024
#define STRSCAN_MAXDIG 800 /* 772 + extra are sufficient. */
#define STRSCAN_DDIG (STRSCAN_DIG/2)
#define STRSCAN_DMASK (STRSCAN_DDIG-1)
/* Helpers for circular buffer. */
#define DNEXT(a) (((a)+1) & STRSCAN_DMASK)
#define DPREV(a) (((a)-1) & STRSCAN_DMASK)
#define DLEN(lo, hi) ((int32_t)(((lo)-(hi)) & STRSCAN_DMASK))
#define casecmp(c, k) (((c) | 0x20) == k)
/* Final conversion to double. */
static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg)
{
double n;
/* Avoid double rounding for denormals. */
if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) {
/* NYI: all of this generates way too much code on 32 bit CPUs. */
#if defined(__GNUC__) && LJ_64
int32_t b = (int32_t)(__builtin_clzll(x)^63);
#else
int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) :
(int32_t)lj_fls((uint32_t)x);
#endif
if ((int32_t)b + ex2 <= -1023 && (int32_t)b + ex2 >= -1075) {
uint64_t rb = (uint64_t)1 << (-1075-ex2);
if ((x & rb) && ((x & (rb+rb+rb-1)))) x += rb+rb;
x = (x & ~(rb+rb-1));
}
}
/* Convert to double using a signed int64_t conversion, then rescale. */
lua_assert((int64_t)x >= 0);
n = (double)(int64_t)x;
if (neg) n = -n;
if (ex2) n = ldexp(n, ex2);
o->n = n;
}
/* Parse hexadecimal number. */
static StrScanFmt strscan_hex(const uint8_t *p, TValue *o,
StrScanFmt fmt, uint32_t opt,
int32_t ex2, int32_t neg, uint32_t dig)
{
uint64_t x = 0;
uint32_t i;
/* Scan hex digits. */
for (i = dig > 16 ? 16 : dig ; i; i--, p++) {
uint32_t d = (*p != '.' ? *p : *++p); if (d > '9') d += 9;
x = (x << 4) + (d & 15);
}
/* Summarize rounding-effect of excess digits. */
for (i = 16; i < dig; i++, p++)
x |= ((*p != '.' ? *p : *++p) != '0'), ex2 += 4;
/* Format-specific handling. */
switch (fmt) {
case STRSCAN_INT:
if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
o->i = neg ? -(int)x : (int)x;
return STRSCAN_INT; /* Fast path for 32 bit integers. */
}
if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
/* fallthrough */
case STRSCAN_U32:
if (dig > 8) return STRSCAN_ERROR;
o->i = neg ? -(int)x : (int)x;
return STRSCAN_U32;
case STRSCAN_I64:
case STRSCAN_U64:
if (dig > 16) return STRSCAN_ERROR;
o->u64 = neg ? (uint64_t)-(int64_t)x : x;
return fmt;
default:
break;
}
/* Reduce range then convert to double. */
if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
strscan_double(x, o, ex2, neg);
return fmt;
}
/* Parse octal number. */
static StrScanFmt strscan_oct(const uint8_t *p, TValue *o,
StrScanFmt fmt, int32_t neg, uint32_t dig)
{
uint64_t x = 0;
/* Scan octal digits. */
if (dig > 22 || (dig == 22 && *p > '1')) return STRSCAN_ERROR;
while (dig-- > 0) {
if (!(*p >= '0' && *p <= '7')) return STRSCAN_ERROR;
x = (x << 3) + (*p++ & 7);
}
/* Format-specific handling. */
switch (fmt) {
case STRSCAN_INT:
if (x >= 0x80000000u+neg) fmt = STRSCAN_U32;
/* fallthrough */
case STRSCAN_U32:
if ((x >> 32)) return STRSCAN_ERROR;
o->i = neg ? -(int)x : (int)x;
break;
default:
case STRSCAN_I64:
case STRSCAN_U64:
o->u64 = neg ? (uint64_t)-(int64_t)x : x;
break;
}
return fmt;
}
/* Parse decimal number. */
static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
StrScanFmt fmt, uint32_t opt,
int32_t ex10, int32_t neg, uint32_t dig)
{
uint8_t xi[STRSCAN_DDIG], *xip = xi;
if (dig) {
uint32_t i = dig;
if (i > STRSCAN_MAXDIG) {
ex10 -= (int32_t)(i - STRSCAN_MAXDIG);
i = STRSCAN_MAXDIG;
}
/* Scan unaligned leading digit. */
if (((ex10^i) & 1))
*xip++ = ((*p != '.' ? *p : *++p) & 15), i--, p++;
/* Scan aligned double-digits. */
for ( ; i > 1; i -= 2) {
uint32_t d = 10 * ((*p != '.' ? *p : *++p) & 15); p++;
*xip++ = d + ((*p != '.' ? *p : *++p) & 15); p++;
}
/* Scan and realign trailing digit. */
if (i) *xip++ = 10 * ((*p != '.' ? *p : *++p) & 15), ex10--, p++;
/* Summarize rounding-effect of excess digits. */
if (dig > STRSCAN_MAXDIG) {
do {
if ((*p != '.' ? *p : *++p) != '0') { xip[-1] |= 1; break; }
p++;
} while (--dig > STRSCAN_MAXDIG);
dig = STRSCAN_MAXDIG;
} else { /* Simplify exponent. */
while (ex10 > 0 && dig <= 18) *xip++ = 0, ex10 -= 2, dig += 2;
}
} else { /* Only got zeros. */
ex10 = 0;
xi[0] = 0;
}
/* Fast path for numbers in integer format (but handles e.g. 1e6, too). */
if (dig <= 20 && ex10 == 0) {
uint8_t *xis;
uint64_t x = xi[0];
double n;
for (xis = xi+1; xis < xip; xis++) x = x * 100 + *xis;
if (!(dig == 20 && (xi[0] > 18 || (int64_t)x >= 0))) { /* No overflow? */
/* Format-specific handling. */
switch (fmt) {
case STRSCAN_INT:
if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
o->i = neg ? -(int)x : (int)x;
return STRSCAN_INT; /* Fast path for 32 bit integers. */
}
if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; goto plainnumber; }
/* fallthrough */
case STRSCAN_U32:
if ((x >> 32) != 0) return STRSCAN_ERROR;
o->i = neg ? -(int)x : (int)x;
return STRSCAN_U32;
case STRSCAN_I64:
case STRSCAN_U64:
o->u64 = neg ? (uint64_t)-(int64_t)x : x;
return fmt;
default:
plainnumber: /* Fast path for plain numbers < 2^63. */
if ((int64_t)x < 0) break;
n = (double)(int64_t)x;
if (neg) n = -n;
o->n = n;
return fmt;
}
}
}
/* Slow non-integer path. */
if (fmt == STRSCAN_INT) {
if ((opt & STRSCAN_OPT_C)) return STRSCAN_ERROR;
fmt = STRSCAN_NUM;
} else if (fmt > STRSCAN_INT) {
return STRSCAN_ERROR;
}
{
uint32_t hi = 0, lo = (uint32_t)(xip-xi);
int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1);
lua_assert(lo > 0 && (ex10 & 1) == 0);
/* Handle simple overflow/underflow. */
if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; }
else if (idig < -326/2) { o->n = 0.0; return fmt; }
/* Scale up until we have at least 17 or 18 integer part digits. */
while (idig < 9 && idig < DLEN(lo, hi)) {
uint32_t i, cy = 0;
ex2 -= 6;
for (i = DPREV(lo); ; i = DPREV(i)) {
uint32_t d = (xi[i] << 6) + cy;
cy = (((d >> 2) * 5243) >> 17); d = d - cy * 100; /* Div/mod 100. */
xi[i] = (uint8_t)d;
if (i == hi) break;
if (d == 0 && i == DPREV(lo)) lo = i;
}
if (cy) {
if (xi[DPREV(lo)] == 0) lo = DPREV(lo);
else if (hi == lo) { lo = DPREV(lo); xi[DPREV(lo)] |= xi[lo]; }
hi = DPREV(hi); xi[hi] = (uint8_t)cy; idig++;
}
}
/* Scale down until no more than 17 or 18 integer part digits remain. */
while (idig > 9) {
uint32_t i, cy = 0;
ex2 += 6;
for (i = hi; i != lo; i = DNEXT(i)) {
cy += xi[i];
xi[i] = (cy >> 6);
cy = 100 * (cy & 0x3f);
if (xi[i] == 0 && i == hi) hi = DNEXT(hi), idig--;
}
while (cy) {
if (hi == lo) { xi[DPREV(lo)] |= 1; break; }
xi[lo] = (cy >> 6); lo = DNEXT(lo);
cy = 100 * (cy & 0x3f);
}
}
/* Collect integer part digits and convert to rescaled double. */
{
uint64_t x = xi[hi];
uint32_t i;
for (i = DNEXT(hi); --idig > 0 && i != lo; i = DNEXT(i))
x = x * 100 + xi[i];
if (i == lo) {
while (--idig >= 0) x = x * 100;
} else { /* Gather round bit from remaining digits. */
x <<= 1; ex2--;
do {
if (xi[i]) { x |= 1; break; }
i = DNEXT(i);
} while (i != lo);
}
strscan_double(x, o, ex2, neg);
}
}
return fmt;
}
/* Scan string containing a number. Returns format. Returns value in o. */
StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
{
int32_t neg = 0;
/* Remove leading space, parse sign and non-numbers. */
if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
while (lj_char_isspace(*p)) p++;
if (*p == '+' || *p == '-') neg = (*p++ == '-');
if (LJ_UNLIKELY(*p >= 'A')) { /* Parse "inf", "infinity" or "nan". */
TValue tmp;
setnanV(&tmp);
if (casecmp(p[0],'i') && casecmp(p[1],'n') && casecmp(p[2],'f')) {
if (neg) setminfV(&tmp); else setpinfV(&tmp);
p += 3;
if (casecmp(p[0],'i') && casecmp(p[1],'n') && casecmp(p[2],'i') &&
casecmp(p[3],'t') && casecmp(p[4],'y')) p += 5;
} else if (casecmp(p[0],'n') && casecmp(p[1],'a') && casecmp(p[2],'n')) {
p += 3;
}
while (lj_char_isspace(*p)) p++;
if (*p) return STRSCAN_ERROR;
o->u64 = tmp.u64;
return STRSCAN_NUM;
}
}
/* Parse regular number. */
{
StrScanFmt fmt = STRSCAN_INT;
int cmask = LJ_CHAR_DIGIT;
int base = (opt & STRSCAN_OPT_C) && *p == '0' ? 0 : 10;
const uint8_t *sp, *dp = NULL;
uint32_t dig = 0, hasdig = 0, x = 0;
int32_t ex = 0;
/* Determine base and skip leading zeros. */
if (LJ_UNLIKELY(*p <= '0')) {
if (*p == '0' && casecmp(p[1], 'x'))
base = 16, cmask = LJ_CHAR_XDIGIT, p += 2;
for ( ; ; p++) {
if (*p == '0') {
hasdig = 1;
} else if (*p == '.') {
if (dp) return STRSCAN_ERROR;
dp = p;
} else {
break;
}
}
}
/* Preliminary digit and decimal point scan. */
for (sp = p; ; p++) {
if (LJ_LIKELY(lj_char_isa(*p, cmask))) {
x = x * 10 + (*p & 15); /* For fast path below. */
dig++;
} else if (*p == '.') {
if (dp) return STRSCAN_ERROR;
dp = p;
} else {
break;
}
}
if (!(hasdig | dig)) return STRSCAN_ERROR;
/* Handle decimal point. */
if (dp) {
fmt = STRSCAN_NUM;
if (dig) {
ex = (int)(dp-(p-1)); dp = p-1;
while (ex < 0 && *dp-- == '0') ex++, dig--; /* Skip trailing zeros. */
if (base == 16) ex *= 4;
}
}
/* Parse exponent. */
if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) {
uint32_t xx;
int negx = 0;
fmt = STRSCAN_NUM; p++;
if (*p == '+' || *p == '-') negx = (*p++ == '-');
if (!lj_char_isdigit(*p)) return STRSCAN_ERROR;
xx = (*p++ & 15);
while (lj_char_isdigit(*p)) {
if (xx < 65536) xx = xx * 10 + (*p & 15);
p++;
}
ex += negx ? -(int)xx : (int)xx;
}
/* Parse suffix. */
if (*p) {
/* I (IMAG), U (U32), LL (I64), ULL/LLU (U64), L (long), UL/LU (ulong). */
/* NYI: f (float). Not needed until cp_number() handles non-integers. */
if (casecmp(*p, 'i')) {
if (!(opt & STRSCAN_OPT_IMAG)) return STRSCAN_ERROR;
p++; fmt = STRSCAN_IMAG;
} else if (fmt == STRSCAN_INT) {
if (casecmp(*p, 'u')) p++, fmt = STRSCAN_U32;
if (casecmp(*p, 'l')) {
p++;
if (casecmp(*p, 'l')) p++, fmt += STRSCAN_I64 - STRSCAN_INT;
else if (!(opt & STRSCAN_OPT_C)) return STRSCAN_ERROR;
else if (sizeof(long) == 8) fmt += STRSCAN_I64 - STRSCAN_INT;
}
if (casecmp(*p, 'u') && (fmt == STRSCAN_INT || fmt == STRSCAN_I64))
p++, fmt += STRSCAN_U32 - STRSCAN_INT;
if ((fmt == STRSCAN_U32 && !(opt & STRSCAN_OPT_C)) ||
(fmt >= STRSCAN_I64 && !(opt & STRSCAN_OPT_LL)))
return STRSCAN_ERROR;
}
while (lj_char_isspace(*p)) p++;
if (*p) return STRSCAN_ERROR;
}
/* Fast path for decimal 32 bit integers. */
if (fmt == STRSCAN_INT && base == 10 &&
(dig < 10 || (dig == 10 && *sp <= '2' && x < 0x80000000u+neg))) {
if ((opt & STRSCAN_OPT_TONUM)) {
double n = (double)(int32_t)x;
if (neg) n = -n;
o->n = n;
return STRSCAN_NUM;
} else {
o->i = neg ? -(int)x : (int)x;
return STRSCAN_INT;
}
}
/* Dispatch to base-specific parser. */
if (base == 0 && !(fmt == STRSCAN_NUM || fmt == STRSCAN_IMAG))
return strscan_oct(sp, o, fmt, neg, dig);
if (base == 16)
fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig);
else
fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
/* Try to convert number to integer, if requested. */
if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) {
double n = o->n;
int32_t i = lj_num2int(n);
if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; }
}
return fmt;
}
}
int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
{
StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
STRSCAN_OPT_TONUM);
lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
return (fmt != STRSCAN_ERROR);
}
#if LJ_DUALNUM
int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
{
StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
STRSCAN_OPT_TOINT);
lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
return (fmt != STRSCAN_ERROR);
}
#endif
#undef DNEXT
#undef DPREV
#undef DLEN

39
src/lj_strscan.h Normal file
View File

@ -0,0 +1,39 @@
/*
** String scanning.
** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
*/
#ifndef _LJ_STRSCAN_H
#define _LJ_STRSCAN_H
#include "lj_obj.h"
/* Options for accepted/returned formats. */
#define STRSCAN_OPT_TOINT 0x01 /* Convert to int32_t, if possible. */
#define STRSCAN_OPT_TONUM 0x02 /* Always convert to double. */
#define STRSCAN_OPT_IMAG 0x04
#define STRSCAN_OPT_LL 0x08
#define STRSCAN_OPT_C 0x10
/* Returned format. */
typedef enum {
STRSCAN_ERROR,
STRSCAN_NUM, STRSCAN_IMAG,
STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
} StrScanFmt;
LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
#if LJ_DUALNUM
LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
#else
#define lj_strscan_number(s, o) lj_strscan_num((s), (o))
#endif
/* Check for number or convert string to number/int in-place (!). */
static LJ_AINLINE int lj_strscan_numberobj(TValue *o)
{
return tvisnumber(o) || (tvisstr(o) && lj_strscan_number(strV(o), o));
}
#endif

View File

@ -43,6 +43,7 @@
#include "lj_dispatch.c"
#include "lj_vmevent.c"
#include "lj_vmmath.c"
#include "lj_strscan.c"
#include "lj_api.c"
#include "lj_lex.c"
#include "lj_parse.c"

View File

@ -98,7 +98,6 @@
#define LUA_NUMBER_FMT "%.14g"
#define lua_number2str(s, n) sprintf((s), LUA_NUMBER_FMT, (n))
#define LUAI_MAXNUMBER2STR 32
#define lua_str2number(s, p) strtod((s), (p))
#define LUA_INTFRMLEN "l"
#define LUA_INTFRM_T long