From 5bb1f0edac809302b299e189fb3c4006e0bc939a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sun, 12 May 2013 22:37:02 +0200 Subject: [PATCH] Refactor string.format(). --- src/Makefile | 3 +- src/Makefile.dep | 30 ++--- src/lib_string.c | 240 +++++++++++--------------------------- src/lj_errmsg.h | 5 +- src/lj_str.h | 2 +- src/lj_strfmt.c | 295 +++++++++++++++++++++++++++++++++++++++++++++++ src/lj_strfmt.h | 84 ++++++++++++++ src/ljamalg.c | 1 + 8 files changed, 465 insertions(+), 195 deletions(-) create mode 100644 src/lj_strfmt.c create mode 100644 src/lj_strfmt.h diff --git a/src/Makefile b/src/Makefile index 0065b8c2..b9101a74 100644 --- a/src/Makefile +++ b/src/Makefile @@ -439,7 +439,8 @@ LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ + lj_strfmt.o lj_api.o \ + lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ diff --git a/src/Makefile.dep b/src/Makefile.dep index 074d0908..8e01865c 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -35,7 +35,7 @@ lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \ - lj_char.h lj_lib.h lj_libdef.h + lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ lj_tab.h lj_lib.h lj_libdef.h @@ -179,6 +179,8 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h lj_char.h +lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_buf.h lj_gc.h lj_str.h lj_char.h lj_strfmt.h lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_char.h lj_strscan.h lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ @@ -203,19 +205,19 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c lj_tab.c lj_func.c \ lj_udata.c lj_meta.c lj_strscan.h lj_lib.h lj_debug.c lj_state.c \ lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h luajit.h lj_vmevent.c \ - lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c lj_lex.c lualib.h \ - lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c \ - lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h \ - lj_ccallback.c lj_target.h lj_target_*.h lj_mcode.h lj_carith.c \ - lj_carith.h lj_clib.c lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c \ - lj_ircall.h lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h \ - lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c \ - lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h \ - lj_crecord.c lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h \ - lj_emit_*.h lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c \ - lib_aux.c lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c \ - lib_io.c lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c \ - lib_ffi.c lib_init.c + lj_vmevent.h lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt.h lj_api.c \ + lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ + lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ + lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ + lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ + lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h lj_iropt.h lj_opt_mem.c \ + lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c \ + lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c lj_snap.c lj_record.c \ + lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h lj_ffrecord.c \ + lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h lj_trace.c \ + lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lj_libdef.h \ + lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c \ + lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ diff --git a/src/lib_string.c b/src/lib_string.c index 2c86daa4..b955e933 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -6,8 +6,6 @@ ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h */ -#include - #define lib_string_c #define LUA_LIB @@ -26,6 +24,7 @@ #include "lj_ff.h" #include "lj_bcdump.h" #include "lj_char.h" +#include "lj_strfmt.h" #include "lj_lib.h" /* ------------------------------------------------------------------------ */ @@ -641,130 +640,20 @@ LJLIB_CF(string_gsub) /* ------------------------------------------------------------------------ */ -/* Max. buffer size needed (at least #string.format("%99.99f", -1e308)). */ -#define STRING_FMT_MAXBUF 512 -/* Valid format specifier flags. */ -#define STRING_FMT_FLAGS "-+ #0" -/* Max. format specifier size. */ -#define STRING_FMT_MAXSPEC \ - (sizeof(STRING_FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) - -/* Add quoted string to buffer. */ -static void string_fmt_quoted(SBuf *sb, GCstr *str) -{ - const char *s = strdata(str); - MSize len = str->len; - lj_buf_putb(sb, '"'); - while (len--) { - uint32_t c = (uint32_t)(uint8_t)*s++; - char *p = lj_buf_more(sb, 4); - if (c == '"' || c == '\\' || c == '\n') { - *p++ = '\\'; - } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ - uint32_t d; - *p++ = '\\'; - if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { - *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; - goto tens; - } else if (c >= 10) { - tens: - d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); - } - c += '0'; - } - *p++ = (char)c; - setsbufP(sb, p); - } - lj_buf_putb(sb, '"'); -} - -/* Scan format and generate format specifier. */ -static const char *string_fmt_scan(lua_State *L, char *spec, const char *fmt) -{ - const char *p = fmt; - while (*p && strchr(STRING_FMT_FLAGS, *p) != NULL) p++; /* Skip flags. */ - if ((size_t)(p - fmt) >= sizeof(STRING_FMT_FLAGS)) - lj_err_caller(L, LJ_ERR_STRFMTR); - if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for width. */ - if (lj_char_isdigit((uint8_t)*p)) p++; - if (*p == '.') { - p++; - if (lj_char_isdigit((uint8_t)*p)) p++; /* Skip max. 2 digits for prec. */ - if (lj_char_isdigit((uint8_t)*p)) p++; - } - if (lj_char_isdigit((uint8_t)*p)) - lj_err_caller(L, LJ_ERR_STRFMTW); - *spec++ = '%'; - strncpy(spec, fmt, (size_t)(p - fmt + 1)); - spec += p - fmt + 1; - *spec = '\0'; - return p; -} - -/* Patch LUA_INTRFRMLEN into integer format specifier. */ -static void string_fmt_intfmt(char *spec) -{ - char c; - do { - c = *spec++; - } while (*spec); - *--spec = (LUA_INTFRMLEN)[0]; - if ((LUA_INTFRMLEN)[1]) *++spec = (LUA_INTFRMLEN)[1]; - *++spec = c; - *++spec = '\0'; -} - -/* Derive sprintf argument for integer format. Ugly. */ -static LUA_INTFRM_T string_fmt_intarg(lua_State *L, int arg) -{ - if (sizeof(LUA_INTFRM_T) == 4) { - return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); - } else { - cTValue *o; - lj_lib_checknumber(L, arg); - o = L->base+arg-1; - if (tvisint(o)) - return (LUA_INTFRM_T)intV(o); - else - return (LUA_INTFRM_T)numV(o); - } -} - -/* Derive sprintf argument for unsigned integer format. Ugly. */ -static unsigned LUA_INTFRM_T string_fmt_uintarg(lua_State *L, int arg) -{ - if (sizeof(LUA_INTFRM_T) == 4) { - return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); - } else { - cTValue *o; - lj_lib_checknumber(L, arg); - o = L->base+arg-1; - if (tvisint(o)) - return (unsigned LUA_INTFRM_T)intV(o); - else if ((int32_t)o->u32.hi < 0) - return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o); - else - return (unsigned LUA_INTFRM_T)numV(o); - } -} - /* Emulate tostring() inline. */ -static GCstr *string_fmt_tostring(lua_State *L, int arg) +static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry) { TValue *o = L->base+arg-1; cTValue *mo; lua_assert(o < L->top); /* Caller already checks for existence. */ if (LJ_LIKELY(tvisstr(o))) return strV(o); - if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { + if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { copyTV(L, L->top++, mo); copyTV(L, L->top++, o); lua_call(L, 1, 1); - L->top--; - if (tvisstr(L->top)) - return strV(L->top); - o = L->base+arg-1; - copyTV(L, o, L->top); + copyTV(L, L->base+arg-1, --L->top); + return NULL; /* Buffer may be overwritten, retry. */ } if (tvisnumber(o)) { return lj_str_fromnumber(L, o); @@ -775,84 +664,85 @@ static GCstr *string_fmt_tostring(lua_State *L, int arg) } else if (tvistrue(o)) { return lj_str_newlit(L, "true"); } else { - if (tvisfunc(o) && isffunc(funcV(o))) - lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid); - else - lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg)); - L->top--; - return strV(L->top); + char buf[8+2+2+16], *p = buf; + if (tvisfunc(o) && isffunc(funcV(o))) { + p = lj_buf_wmem(p, "function: builtin#", 18); + p = lj_str_bufint(p, funcV(o)->c.ffid); + } else { + p = lj_buf_wmem(p, lj_typename(o), strlen(lj_typename(o))); + *p++ = ':'; *p++ = ' '; + p = lj_str_bufptr(p, lua_topointer(L, arg)); + } + return lj_str_new(L, buf, (size_t)(p - buf)); } } LJLIB_CF(string_format) { - int arg = 1, top = (int)(L->top - L->base); - GCstr *sfmt = lj_lib_checkstr(L, arg); - const char *fmt = strdata(sfmt); - const char *efmt = fmt + sfmt->len; - SBuf *sb = lj_buf_tmp_(L); - while (fmt < efmt) { - if (*fmt != L_ESC || *++fmt == L_ESC) { - lj_buf_putb(sb, *fmt++); + int arg, top = (int)(L->top - L->base); + GCstr *sfmt; + SBuf *sb; + FormatState fs; + SFormat sf; + int retry = 0; +again: + arg = 1; + sb = lj_buf_tmp_(L); + sfmt = lj_lib_checkstr(L, arg); + lj_strfmt_init(&fs, strdata(sfmt), sfmt->len); + while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { + if (sf == STRFMT_LIT) { + lj_buf_putmem(sb, fs.str, fs.len); + } else if (sf == STRFMT_ERR) { + lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len))); } else { - char buf[STRING_FMT_MAXBUF]; - char spec[STRING_FMT_MAXSPEC]; - MSize len = 0; if (++arg > top) luaL_argerror(L, arg, lj_obj_typename[0]); - fmt = string_fmt_scan(L, spec, fmt); - switch (*fmt++) { - case 'c': - len = (MSize)sprintf(buf, spec, lj_lib_checkint(L, arg)); - break; - case 'd': case 'i': - string_fmt_intfmt(spec); - len = (MSize)sprintf(buf, spec, string_fmt_intarg(L, arg)); - break; - case 'o': case 'u': case 'x': case 'X': - string_fmt_intfmt(spec); - len = (MSize)sprintf(buf, spec, string_fmt_uintarg(L, arg)); - break; - case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { - TValue tv; - tv.n = lj_lib_checknum(L, arg); - if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { - /* Canonicalize output of non-finite values. */ - char nbuf[LJ_STR_NUMBUF]; - char *p = lj_str_bufnum(nbuf, &tv); - if (fmt[-1] < 'a') { *(p-3) -= 0x20; *(p-2) -= 0x20; *(p-1) -= 0x20; } - *p = '\0'; - for (p = spec; *p < 'A' && *p != '.'; p++) ; - *p++ = 's'; *p = '\0'; - len = (MSize)sprintf(buf, spec, nbuf); - break; + switch (STRFMT_TYPE(sf)) { + case STRFMT_INT: + if (tvisint(L->base+arg-1)) { + int32_t k = intV(L->base+arg-1); + if (sf == STRFMT_INT) + lj_buf_putint(sb, k); /* Shortcut for plain %d. */ + else + lj_strfmt_putxint(sb, sf, k); + } else { + lj_strfmt_putnum_int(sb, sf, lj_lib_checknum(L, arg)); } - len = (MSize)sprintf(buf, spec, (double)tv.n); + break; + case STRFMT_UINT: + if (tvisint(L->base+arg-1)) + lj_strfmt_putxint(sb, sf, intV(L->base+arg-1)); + else + lj_strfmt_putnum_uint(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_NUM: + lj_strfmt_putnum(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_STR: { + GCstr *str = string_fmt_tostring(L, arg, retry); + if (str == NULL) + retry = 1; + else if ((sf & STRFMT_T_QUOTED)) + lj_strfmt_putquoted(sb, str); + else + lj_strfmt_putstr(sb, sf, str); break; } - case 'q': - string_fmt_quoted(sb, lj_lib_checkstr(L, arg)); - continue; - case 'p': + case STRFMT_CHAR: + lj_strfmt_putchar(sb, sf, lj_lib_checkint(L, arg)); + break; + case STRFMT_PTR: /* No formatting. */ setsbufP(sb, lj_str_bufptr(lj_buf_more(sb, LJ_STR_PTRBUF), lua_topointer(L, arg))); - continue; - case 's': { - GCstr *str = string_fmt_tostring(L, arg); - if (!strchr(spec, '.') && str->len >= 100) { /* Format overflow? */ - lj_buf_putmem(sb, strdata(str), str->len); /* Use orig string. */ - continue; - } - len = (MSize)sprintf(buf, spec, strdata(str)); break; - } default: - lj_err_callerv(L, LJ_ERR_STRFMTO, fmt[-1] ? fmt[-1] : ' '); + lua_assert(0); break; } - lj_buf_putmem(sb, buf, len); } } + if (retry++ == 1) goto again; setstrV(L, L->top-1, lj_buf_str(L, sb)); lj_gc_check(L); return 1; diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index fd46acd4..e62dc237 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -96,9 +96,7 @@ ERRDEF(STRPATX, "pattern too complex") ERRDEF(STRCAPI, "invalid capture index") ERRDEF(STRCAPN, "too many captures") ERRDEF(STRCAPU, "unfinished capture") -ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) -ERRDEF(STRFMTR, "invalid format (repeated flags)") -ERRDEF(STRFMTW, "invalid format (width or precision too long)") +ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format")) ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(BADMODN, "name conflict for module " LUA_QS) #if LJ_HASJIT @@ -117,7 +115,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) /* Lexer/parser errors. */ ERRDEF(XMODE, "attempt to load chunk with wrong mode") ERRDEF(XNEAR, "%s near " LUA_QS) -ERRDEF(XELEM, "lexical element too long") ERRDEF(XLINES, "chunk has too many lines") ERRDEF(XLEVELS, "chunk has too many syntax levels") ERRDEF(XNUMBER, "malformed number") diff --git a/src/lj_str.h b/src/lj_str.h index dd9b3d94..6e08764e 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -25,7 +25,7 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) /* Type conversions. */ -LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *buf, int32_t k); +LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k); LJ_FUNC char * LJ_FASTCALL lj_str_bufnum(char *p, cTValue *o); LJ_FUNC char * LJ_FASTCALL lj_str_bufptr(char *p, const void *v); LJ_FUNC const char *lj_str_buftv(char *buf, cTValue *o, MSize *lenp); diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c new file mode 100644 index 00000000..9aaf08e2 --- /dev/null +++ b/src/lj_strfmt.c @@ -0,0 +1,295 @@ +/* +** String formatting. +** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h +*/ + +#include + +#define lj_strfmt_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_buf.h" +#include "lj_char.h" +#include "lj_strfmt.h" + +/* -- Format parser ------------------------------------------------------- */ + +static const uint8_t strfmt_map[('x'-'A')+1] = { + STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0, + 0,0,0,0,0,0, + STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, + 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X +}; + +SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) +{ + const uint8_t *p = fs->p, *e = fs->e; + fs->str = (const char *)p; + for (; p < e; p++) { + if (*p == '%') { /* Escape char? */ + if (p[1] == '%') { /* '%%'? */ + fs->p = ++p+1; + goto retlit; + } else { + SFormat sf = 0; + uint32_t c; + if (p != (const uint8_t *)fs->str) + break; + for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) { + /* Parse flags. */ + if (*p == '-') sf |= STRFMT_F_LEFT; + else if (*p == '+') sf |= STRFMT_F_PLUS; + else if (*p == '0') sf |= STRFMT_F_ZERO; + else if (*p == ' ') sf |= STRFMT_F_SPACE; + else if (*p == '#') sf |= STRFMT_F_ALT; + else break; + } + if ((uint32_t)*p - '0' < 10) { /* Parse width. */ + uint32_t width = (uint32_t)*p++ - '0'; + if ((uint32_t)*p - '0' < 10) + width = (uint32_t)*p++ - '0' + width*10; + sf |= (width << STRFMT_SH_WIDTH); + } + if (*p == '.') { /* Parse precision. */ + uint32_t prec = 0; + p++; + if ((uint32_t)*p - '0' < 10) { + prec = (uint32_t)*p++ - '0'; + if ((uint32_t)*p - '0' < 10) + prec = (uint32_t)*p++ - '0' + prec*10; + } + sf |= ((prec+1) << STRFMT_SH_PREC); + } + /* Parse conversion. */ + c = (uint32_t)*p - 'A'; + if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) { + uint32_t sx = strfmt_map[c]; + if (sx) { + fs->p = p+1; + return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER)); + } + } + /* Return error location. */ + if (*p >= 32) p++; + fs->len = (MSize)(p - (const uint8_t *)fs->str); + fs->p = fs->e; + return STRFMT_ERR; + } + } + } + fs->p = p; +retlit: + fs->len = (MSize)(p - (const uint8_t *)fs->str); + return fs->len ? STRFMT_LIT : STRFMT_EOF; +} + +/* -- Format conversions -------------------------------------------------- */ + +/* Add formatted char to buffer. */ +SBuf *lj_strfmt_putchar(SBuf *sb, SFormat sf, int32_t c) +{ + MSize width = STRFMT_WIDTH(sf); + char *p = lj_buf_more(sb, width > 1 ? width : 1); + if ((sf & STRFMT_F_LEFT)) *p++ = (char)c; + while (width-- > 1) *p++ = ' '; + if (!(sf & STRFMT_F_LEFT)) *p++ = (char)c; + setsbufP(sb, p); + return sb; +} + +/* Add formatted string to buffer. */ +SBuf *lj_strfmt_putstr(SBuf *sb, SFormat sf, GCstr *str) +{ + MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf); + MSize width = STRFMT_WIDTH(sf); + char *p = lj_buf_more(sb, width > len ? width : len); + if ((sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); + while (width-- > len) *p++ = ' '; + if (!(sf & STRFMT_F_LEFT)) p = lj_buf_wmem(p, strdata(str), len); + setsbufP(sb, p); + return sb; +} + +/* Add quoted string to buffer (no formatting). */ +SBuf *lj_strfmt_putquoted(SBuf *sb, GCstr *str) +{ + const char *s = strdata(str); + MSize len = str->len; + lj_buf_putb(sb, '"'); + while (len--) { + uint32_t c = (uint32_t)(uint8_t)*s++; + char *p = lj_buf_more(sb, 4); + if (c == '"' || c == '\\' || c == '\n') { + *p++ = '\\'; + } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ + uint32_t d; + *p++ = '\\'; + if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { + *p++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; + goto tens; + } else if (c >= 10) { + tens: + d = (c * 205) >> 11; c -= d * 10; *p++ = (char)('0'+d); + } + c += '0'; + } + *p++ = (char)c; + setsbufP(sb, p); + } + lj_buf_putb(sb, '"'); + return sb; +} + +/* Add formatted signed/unsigned integer to buffer. */ +SBuf *lj_strfmt_putxint(SBuf *sb, SFormat sf, uint64_t k) +{ + char buf[1+22], *q = buf + sizeof(buf), *p; +#ifdef LUA_USE_ASSERT + char *ps; +#endif + MSize prefix = 0, len, prec, pprec, width, need; + + /* Figure out signed prefixes. */ + if (STRFMT_TYPE(sf) == STRFMT_INT) { + if ((int64_t)k < 0) { + k = (uint64_t)-(int64_t)k; + prefix = 256 + '-'; + } else if ((sf & STRFMT_F_PLUS)) { + prefix = 256 + '+'; + } else if ((sf & STRFMT_F_SPACE)) { + prefix = 256 + ' '; + } + } + + /* Convert number and store to fixed-size buffer in reverse order. */ + prec = STRFMT_PREC(sf); + if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO; + if (k == 0) { /* Special-case zero argument. */ + if (prec != 0 || + (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT)) + *--q = '0'; + } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */ + uint32_t k2; + while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; } + k2 = (uint32_t)k; + do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2); + } else if ((sf & STRFMT_T_HEX)) { /* Hex. */ + const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" : + "0123456789abcdef"; + do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k); + if ((sf & STRFMT_F_ALT)) prefix = 512 + 'x'; + } else { /* Octal. */ + do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k); + if ((sf & STRFMT_F_ALT)) *--q = '0'; + } + + /* Calculate sizes. */ + len = (MSize)(buf + sizeof(buf) - q); + if ((int32_t)len >= (int32_t)prec) prec = len; + width = STRFMT_WIDTH(sf); + pprec = prec + (prefix >> 8); + need = width > pprec ? width : pprec; + p = lj_buf_more(sb, need); +#ifdef LUA_USE_ASSERT + ps = p; +#endif + + /* Format number with leading/trailing whitespace and zeros. */ + if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) + while (width-- > pprec) *p++ = ' '; + if (prefix) { + if ((char)prefix == 'x') *p++ = '0'; + *p++ = (char)prefix; + } + if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) + while (width-- > pprec) *p++ = '0'; + while (prec-- > len) *p++ = '0'; + while (q < buf + sizeof(buf)) *p++ = *q++; /* Add number itself. */ + if ((sf & STRFMT_F_LEFT)) + while (width-- > pprec) *p++ = ' '; + + lua_assert(need == (MSize)(p - ps)); + setsbufP(sb, p); + return sb; +} + +/* Add number formatted as signed integer to buffer. */ +SBuf *lj_strfmt_putnum_int(SBuf *sb, SFormat sf, lua_Number n) +{ + int64_t k = (int64_t)n; + if (checki32(k) && sf == STRFMT_INT) + return lj_buf_putint(sb, k); /* Shortcut for plain %d. */ + else + return lj_strfmt_putxint(sb, sf, (uint64_t)k); +} + +/* Add number formatted as unsigned integer to buffer. */ +SBuf *lj_strfmt_putnum_uint(SBuf *sb, SFormat sf, lua_Number n) +{ + int64_t k; + if (n >= 9223372036854775808.0) + k = (int64_t)(n - 18446744073709551616.0); + else + k = (int64_t)n; + return lj_strfmt_putxint(sb, sf, (uint64_t)k); +} + +/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */ +#define STRFMT_FMTNUMBUF 512 + +/* Add formatted floating-point number to buffer. */ +SBuf *lj_strfmt_putnum(SBuf *sb, SFormat sf, lua_Number n) +{ + TValue tv; + tv.n = n; + if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { + /* Canonicalize output of non-finite values. */ + MSize width = STRFMT_WIDTH(sf), len = 3; + int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0; + char *p; + if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) { + ch ^= ('n' << 16) | ('a' << 8) | 'n'; + if ((sf & STRFMT_F_SPACE)) prefix = ' '; + } else { + ch ^= ('i' << 16) | ('n' << 8) | 'f'; + if ((tv.u32.hi & 0x80000000)) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + } + if (prefix) len = 4; + p = lj_buf_more(sb, width > len ? width : len); + if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; + if (prefix) *p++ = prefix; + *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch; + if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; + setsbufP(sb, p); + } else { /* Delegate to sprintf() for now. */ + uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf); + char fmt[1+5+2+3+1+1], *p = fmt; + *p++ = '%'; + if ((sf & STRFMT_F_LEFT)) *p++ = '-'; + if ((sf & STRFMT_F_PLUS)) *p++ = '+'; + if ((sf & STRFMT_F_ZERO)) *p++ = '0'; + if ((sf & STRFMT_F_SPACE)) *p++ = ' '; + if ((sf & STRFMT_F_ALT)) *p++ = '#'; + if (width) { + uint8_t x = width / 10, y = width % 10; + if (x) *p++ = '0' + x; + *p++ = '0' + y; + } + if (prec != 255) { + uint8_t x = prec / 10, y = prec % 10; + *p++ = '.'; + if (x) *p++ = '0' + x; + *p++ = '0' + y; + } + *p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0); + *p = '\0'; + p = lj_buf_more(sb, STRFMT_FMTNUMBUF); + setsbufP(sb, p + sprintf(p, fmt, n)); + } + return sb; +} + diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h new file mode 100644 index 00000000..b3556f1a --- /dev/null +++ b/src/lj_strfmt.h @@ -0,0 +1,84 @@ +/* +** String formatting. +** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_STRFMT_H +#define _LJ_STRFMT_H + +#include "lj_obj.h" + +typedef uint32_t SFormat; /* Format indicator. */ + +/* Format parser state. */ +typedef struct FormatState { + const uint8_t *p; /* Current format string pointer. */ + const uint8_t *e; /* End of format string. */ + const char *str; /* Returned literal string. */ + MSize len; /* Size of literal string. */ +} FormatState; + +/* Format types (max. 16). */ +typedef enum FormatType { + STRFMT_EOF, STRFMT_ERR, STRFMT_LIT, + STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR +} FormatType; + +/* Format subtypes (bits are reused). */ +#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */ +#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */ +#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */ +#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */ +#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */ +#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */ +#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */ + +/* Format flags. */ +#define STRFMT_F_LEFT 0x0100 +#define STRFMT_F_PLUS 0x0200 +#define STRFMT_F_ZERO 0x0400 +#define STRFMT_F_SPACE 0x0800 +#define STRFMT_F_ALT 0x1000 +#define STRFMT_F_UPPER 0x2000 + +/* Format indicator fields. */ +#define STRFMT_SH_WIDTH 16 +#define STRFMT_SH_PREC 24 + +#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15)) +#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u) +#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u) +#define STRFMT_FP(sf) (((sf) >> 4) & 3) + +/* Formats for conversion characters. */ +#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A) +#define STRFMT_C (STRFMT_CHAR) +#define STRFMT_D (STRFMT_INT) +#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E) +#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F) +#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G) +#define STRFMT_I STRFMT_D +#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT) +#define STRFMT_P (STRFMT_PTR) +#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED) +#define STRFMT_S (STRFMT_STR) +#define STRFMT_U (STRFMT_UINT) +#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX) + +static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) +{ + fs->p = (const uint8_t *)p; + fs->e = (const uint8_t *)p + len; + lua_assert(*fs->e == 0); /* Must be NUL-terminated (may have NULs inside). */ +} + +LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs); +LJ_FUNC SBuf *lj_strfmt_putchar(SBuf *sb, SFormat, int32_t c); +LJ_FUNC SBuf *lj_strfmt_putstr(SBuf *sb, SFormat, GCstr *str); +LJ_FUNC SBuf *lj_strfmt_putquoted(SBuf *sb, GCstr *str); +LJ_FUNC SBuf *lj_strfmt_putxint(SBuf *sb, SFormat sf, uint64_t k); +LJ_FUNC SBuf *lj_strfmt_putnum_int(SBuf *sb, SFormat sf, lua_Number n); +LJ_FUNC SBuf *lj_strfmt_putnum_uint(SBuf *sb, SFormat sf, lua_Number n); +LJ_FUNC SBuf *lj_strfmt_putnum(SBuf *sb, SFormat, lua_Number n); + +#endif diff --git a/src/ljamalg.c b/src/ljamalg.c index 487609c4..7198a09f 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -45,6 +45,7 @@ #include "lj_vmevent.c" #include "lj_vmmath.c" #include "lj_strscan.c" +#include "lj_strfmt.c" #include "lj_api.c" #include "lj_lex.c" #include "lj_parse.c"