From 18f6aa97fd93df8e9964c2d22f20f16e6b71b72b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Fri, 26 Feb 2016 17:59:43 +0100 Subject: [PATCH] Use internal implementation for converting FP numbers to strings. Contributed by Peter Cawley. --- src/Makefile | 2 +- src/Makefile.dep | 4 +- src/lib_base.c | 3 +- src/lib_io.c | 3 +- src/lj_buf.c | 2 +- src/lj_ctype.c | 14 +- src/lj_meta.c | 14 +- src/lj_strfmt.c | 108 +------- src/lj_strfmt.h | 6 +- src/lj_strfmt_num.c | 591 ++++++++++++++++++++++++++++++++++++++++++++ src/ljamalg.c | 1 + 11 files changed, 627 insertions(+), 121 deletions(-) create mode 100644 src/lj_strfmt_num.c diff --git a/src/Makefile b/src/Makefile index 1df39dc1..5d66f6eb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -478,7 +478,7 @@ LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_strfmt.o lj_api.o lj_profile.o \ + lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ diff --git a/src/Makefile.dep b/src/Makefile.dep index 9aefb236..2c329f55 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -94,7 +94,7 @@ lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_crecord.h lj_strfmt.h lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ - lj_ccallback.h + lj_ccallback.h lj_buf.h lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h @@ -188,6 +188,8 @@ lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h +lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_char.h lj_strscan.h lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ diff --git a/src/lib_base.c b/src/lib_base.c index ca268b1d..3c973265 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -495,11 +495,10 @@ LJLIB_CF(print) shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring); for (i = 0; i < nargs; i++) { cTValue *o = &L->base[i]; - char buf[STRFMT_MAXBUF_NUM]; const char *str; size_t size; MSize len; - if (shortcut && (str = lj_strfmt_wstrnum(buf, o, &len)) != NULL) { + if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) { size = len; } else { copyTV(L, L->top+1, o); diff --git a/src/lib_io.c b/src/lib_io.c index 2aa83474..501de568 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -232,9 +232,8 @@ static int io_file_write(lua_State *L, FILE *fp, int start) cTValue *tv; int status = 1; for (tv = L->base+start; tv < L->top; tv++) { - char buf[STRFMT_MAXBUF_NUM]; MSize len; - const char *p = lj_strfmt_wstrnum(buf, tv, &len); + const char *p = lj_strfmt_wstrnum(L, tv, &len); if (!p) lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); status = status && (fwrite(p, 1, len, fp) == len); diff --git a/src/lj_buf.c b/src/lj_buf.c index 023bb9aa..7b6c2188 100644 --- a/src/lj_buf.c +++ b/src/lj_buf.c @@ -186,7 +186,7 @@ SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) } else if (tvisint(o)) { p = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); } else if (tvisnum(o)) { - p = lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM+seplen), o); + p = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); } else { goto badtype; } diff --git a/src/lj_ctype.c b/src/lj_ctype.c index eda070ce..94159462 100644 --- a/src/lj_ctype.c +++ b/src/lj_ctype.c @@ -14,6 +14,7 @@ #include "lj_strfmt.h" #include "lj_ctype.h" #include "lj_ccallback.h" +#include "lj_buf.h" /* -- C type definitions -------------------------------------------------- */ @@ -571,19 +572,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned) /* Convert complex to string with 'i' or 'I' suffix. */ GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) { - char buf[2*STRFMT_MAXBUF_NUM+2+1], *p = buf; + SBuf *sb = lj_buf_tmp_(L); TValue re, im; if (size == 2*sizeof(double)) { re.n = *(double *)sp; im.n = ((double *)sp)[1]; } else { re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; } - p = lj_strfmt_wnum(p, &re); - if (!(im.u32.hi & 0x80000000u) || im.n != im.n) *p++ = '+'; - p = lj_strfmt_wnum(p, &im); - *p = *(p-1) >= 'a' ? 'I' : 'i'; - p++; - return lj_str_new(L, buf, p-buf); + lj_strfmt_putfnum(sb, STRFMT_G14, re.n); + if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+'); + lj_strfmt_putfnum(sb, STRFMT_G14, im.n); + lj_buf_putchar(sb, sbufP(sb)[-1] >= 'a' ? 'I' : 'i'); + return lj_buf_str(L, sb); } /* -- C type state -------------------------------------------------------- */ diff --git a/src/lj_meta.c b/src/lj_meta.c index 104ecf07..de229571 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -278,25 +278,25 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) */ TValue *e, *o = top; uint64_t tlen = tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; - char *p, *buf; + SBuf *sb; do { o--; tlen += tvisstr(o) ? strV(o)->len : STRFMT_MAXBUF_NUM; } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); - p = buf = lj_buf_tmp(L, (MSize)tlen); + sb = lj_buf_tmp_(L); + lj_buf_more(sb, (MSize)tlen); for (e = top, top = o; o <= e; o++) { if (tvisstr(o)) { GCstr *s = strV(o); MSize len = s->len; - p = lj_buf_wmem(p, strdata(s), len); + lj_buf_putmem(sb, strdata(s), len); } else if (tvisint(o)) { - p = lj_strfmt_wint(p, intV(o)); + lj_strfmt_putint(sb, intV(o)); } else { - lua_assert(tvisnum(o)); - p = lj_strfmt_wnum(p, o); + lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)); } } - setstrV(L, top, lj_str_new(L, buf, (size_t)(p-buf))); + setstrV(L, top, lj_buf_str(L, sb)); } } while (left >= 1); if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index d54e796a..ba9e1ff8 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -18,7 +18,7 @@ /* -- Format parser ------------------------------------------------------- */ static const uint8_t strfmt_map[('x'-'A')+1] = { - STRFMT_A,0,0,0,STRFMT_E,0,STRFMT_G,0,0,0,0,0,0, + STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0, 0,0,0,0,0,0, STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, @@ -89,24 +89,6 @@ retlit: /* -- Raw conversions ----------------------------------------------------- */ -/* Write number to bufer. */ -char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o) -{ - if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ -#if __BIONIC__ - if (tvismzero(o)) { *p++ = '-'; *p++ = '0'; return p; } -#endif - return p + lua_number2str(p, o->n); - } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { - *p++ = 'n'; *p++ = 'a'; *p++ = 'n'; - } else if ((o->u32.hi & 0x80000000) == 0) { - *p++ = 'i'; *p++ = 'n'; *p++ = 'f'; - } else { - *p++ = '-'; *p++ = 'i'; *p++ = 'n'; *p++ = 'f'; - } - return p; -} - #define WINT_R(x, sh, sc) \ { uint32_t d = (x*(((1<>sh; x -= d*sc; *p++ = (char)('0'+d); } @@ -168,21 +150,22 @@ char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) return p; } -/* Return string or write number to buffer and return pointer to start. */ -const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp) +/* Return string or write number to tmp buffer and return pointer to start. */ +const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) { + SBuf *sb; if (tvisstr(o)) { *lenp = strV(o)->len; return strVdata(o); } else if (tvisint(o)) { - *lenp = (MSize)(lj_strfmt_wint(buf, intV(o)) - buf); - return buf; + sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); } else if (tvisnum(o)) { - *lenp = (MSize)(lj_strfmt_wnum(buf, o) - buf); - return buf; + sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); } else { return NULL; } + *lenp = sbuflen(sb); + return sbufB(sb); } /* -- Unformatted conversions to buffer ----------------------------------- */ @@ -198,8 +181,7 @@ SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) /* Add number to buffer. */ SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) { - setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), o)); - return sb; + return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); } #endif @@ -360,63 +342,6 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) return lj_strfmt_putfxint(sb, sf, (uint64_t)k); } -/* Max. sprintf buffer size needed. At least #string.format("%.99f", -1e308). */ -#define STRFMT_FMTNUMBUF 512 - -/* Add formatted floating-point number to buffer. */ -SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) -{ - TValue tv; - tv.n = n; - if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { - /* Canonicalize output of non-finite values. */ - MSize width = STRFMT_WIDTH(sf), len = 3; - int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0; - char *p; - if (((tv.u32.hi & 0x000fffff) | tv.u32.lo) != 0) { - ch ^= ('n' << 16) | ('a' << 8) | 'n'; - if ((sf & STRFMT_F_SPACE)) prefix = ' '; - } else { - ch ^= ('i' << 16) | ('n' << 8) | 'f'; - if ((tv.u32.hi & 0x80000000)) prefix = '-'; - else if ((sf & STRFMT_F_PLUS)) prefix = '+'; - else if ((sf & STRFMT_F_SPACE)) prefix = ' '; - } - if (prefix) len = 4; - p = lj_buf_more(sb, width > len ? width : len); - if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; - if (prefix) *p++ = prefix; - *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch; - if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; - setsbufP(sb, p); - } else { /* Delegate to sprintf() for now. */ - uint8_t width = (uint8_t)STRFMT_WIDTH(sf), prec = (uint8_t)STRFMT_PREC(sf); - char fmt[1+5+2+3+1+1], *p = fmt; - *p++ = '%'; - if ((sf & STRFMT_F_LEFT)) *p++ = '-'; - if ((sf & STRFMT_F_PLUS)) *p++ = '+'; - if ((sf & STRFMT_F_ZERO)) *p++ = '0'; - if ((sf & STRFMT_F_SPACE)) *p++ = ' '; - if ((sf & STRFMT_F_ALT)) *p++ = '#'; - if (width) { - uint8_t x = width / 10, y = width % 10; - if (x) *p++ = '0' + x; - *p++ = '0' + y; - } - if (prec != 255) { - uint8_t x = prec / 10, y = prec % 10; - *p++ = '.'; - if (x) *p++ = '0' + x; - *p++ = '0' + y; - } - *p++ = (0x67666561 >> (STRFMT_FP(sf)<<3)) ^ ((sf & STRFMT_F_UPPER)?0x20:0); - *p = '\0'; - p = lj_buf_more(sb, STRFMT_FMTNUMBUF); - setsbufP(sb, p + sprintf(p, fmt, n)); - } - return sb; -} - /* -- Conversions to strings ---------------------------------------------- */ /* Convert integer to string. */ @@ -427,14 +352,6 @@ GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) return lj_str_new(L, buf, len); } -/* Convert number to string. */ -GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o) -{ - char buf[STRFMT_MAXBUF_NUM]; - MSize len = (MSize)(lj_strfmt_wnum(buf, o) - buf); - return lj_str_new(L, buf, len); -} - /* Convert integer or number to string. */ GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) { @@ -510,12 +427,9 @@ const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) case STRFMT_UINT: lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t)); break; - case STRFMT_NUM: { - TValue tv; - tv.n = va_arg(argp, lua_Number); - setsbufP(sb, lj_strfmt_wnum(lj_buf_more(sb, STRFMT_MAXBUF_NUM), &tv)); + case STRFMT_NUM: + lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number)); break; - } case STRFMT_STR: { const char *s = va_arg(argp, char *); if (s == NULL) s = "(null)"; diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h index dcfaf2e3..f229f25c 100644 --- a/src/lj_strfmt.h +++ b/src/lj_strfmt.h @@ -64,11 +64,12 @@ typedef enum FormatType { #define STRFMT_S (STRFMT_STR) #define STRFMT_U (STRFMT_UINT) #define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX) +#define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC)) /* Maximum buffer sizes for conversions. */ #define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */ #define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */ -#define STRFMT_MAXBUF_NUM LUAI_MAXNUMBER2STR +#define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */ #define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */ /* Format parser. */ @@ -83,10 +84,9 @@ static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) /* Raw conversions. */ LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k); -LJ_FUNC char * LJ_FASTCALL lj_strfmt_wnum(char *p, cTValue *o); LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v); LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v); -LJ_FUNC const char *lj_strfmt_wstrnum(char *buf, cTValue *o, MSize *lenp); +LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp); /* Unformatted conversions to buffer. */ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c new file mode 100644 index 00000000..577795e0 --- /dev/null +++ b/src/lj_strfmt_num.c @@ -0,0 +1,591 @@ +/* +** String formatting for floating-point numbers. +** Copyright (C) 2005-2015 Mike Pall. See Copyright Notice in luajit.h +** Contributed by Peter Cawley. +*/ + +#include + +#define lj_strfmt_num_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_strfmt.h" + +/* -- Precomputed tables -------------------------------------------------- */ + +/* Rescale factors to push the exponent of a number towards zero. */ +#define RESCALE_EXPONENTS(P, N) \ + P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \ + P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \ + N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \ + N(251), N(270), N(289) + +#define ONE_E_P(X) 1e+0 ## X +#define ONE_E_N(X) 1e-0 ## X +static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) }; +static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) }; +#undef ONE_E_N +#undef ONE_E_P + +/* +** For p in range -70 through 57, this table encodes pairs (m, e) such that +** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds. +*/ +static const int8_t four_ulp_m_e[] = { + 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19, + -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16, + 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14, + 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3, + -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7, + 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103, + -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3, + 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2, + 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4, + 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34, + 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10, + 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13, + 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15, + 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17 +}; + +/* min(2^32-1, 10^e-1) for e in range 0 through 10 */ +static uint32_t ndigits_dec_threshold[] = { + 0, 9U, 99U, 999U, 9999U, 99999U, 999999U, + 9999999U, 99999999U, 999999999U, 0xffffffffU +}; + +/* -- Helper functions ---------------------------------------------------- */ + +/* Compute the number of digits in the decimal representation of x. */ +static MSize ndigits_dec(uint32_t x) +{ + MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */ + return t + (x > ndigits_dec_threshold[t]); +} + +#define WINT_R(x, sh, sc) \ + { uint32_t d = (x*(((1<>sh; x -= d*sc; *p++ = (char)('0'+d); } + +/* Write 9-digit unsigned integer to buffer. */ +static char *lj_strfmt_wuint9(char *p, uint32_t u) +{ + uint32_t v = u / 10000, w; + u -= v * 10000; + w = v / 10000; + v -= w * 10000; + *p++ = (char)('0'+w); + WINT_R(v, 23, 1000) + WINT_R(v, 12, 100) + WINT_R(v, 10, 10) + *p++ = (char)('0'+v); + WINT_R(u, 23, 1000) + WINT_R(u, 12, 100) + WINT_R(u, 10, 10) + *p++ = (char)('0'+u); + return p; +} +#undef WINT_R + +/* -- Extended precision arithmetic --------------------------------------- */ + +/* +** The "nd" format is a fixed-precision decimal representation for numbers. It +** consists of up to 64 uint32_t values, with each uint32_t storing a value +** in the range [0, 1e9). A number in "nd" format consists of three variables: +** +** uint32_t nd[64]; +** uint32_t ndlo; +** uint32_t ndhi; +** +** The integral part of the number is stored in nd[0 ... ndhi], the value of +** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of +** the number is zero, ndlo is zero. Otherwise, the fractional part is stored +** in nd[ndlo ... 63], the value of which is taken to be +** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}. +** +** If the array part had 128 elements rather than 64, then every double would +** have an exact representation in "nd" format. With 64 elements, all integral +** doubles have an exact representation, and all non-integral doubles have +** enough digits to make both %.99e and %.99f do the right thing. +*/ + +#if LJ_64 +#define ND_MUL2K_MAX_SHIFT 29 +#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000)) +#else +#define ND_MUL2K_MAX_SHIFT 11 +#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125) +#endif + +/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */ +static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k, + uint32_t carry_in, SFormat sf) +{ + uint32_t i, ndlo = 0, start = 1; + /* Performance hacks. */ + if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) { + start = ndhi - (STRFMT_PREC(sf) + 17) / 8; + } + /* Real logic. */ + while (k >= ND_MUL2K_MAX_SHIFT) { + for (i = ndlo; i <= ndhi; i++) { + uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in; + carry_in = ND_MUL2K_DIV1E9(val); + nd[i] = (uint32_t)val - carry_in * 1000000000; + } + if (carry_in) { + nd[++ndhi] = carry_in; carry_in = 0; + if(start++ == ndlo) ++ndlo; + } + k -= ND_MUL2K_MAX_SHIFT; + } + if (k) { + for (i = ndlo; i <= ndhi; i++) { + uint64_t val = ((uint64_t)nd[i] << k) | carry_in; + carry_in = ND_MUL2K_DIV1E9(val); + nd[i] = (uint32_t)val - carry_in * 1000000000; + } + if (carry_in) nd[++ndhi] = carry_in; + } + return ndhi; +} + +/* Divide nd by 2^k (ndlo is assumed to be zero). */ +static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf) +{ + uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0; + /* Performance hacks. */ + if (!ndhi) { + if (!nd[0]) { + return 0; + } else { + uint32_t s = lj_ffs(nd[0]); + if (s >= k) { nd[0] >>= k; return 0; } + nd[0] >>= s; k -= s; + } + } + if (k > 18) { + if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) { + stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9; + } else { + int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k; + int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114); + stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9; + stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8; + } + } + /* Real logic. */ + while (k >= 9) { + uint32_t i = ndhi, carry = 0; + for (;;) { + uint32_t val = nd[i]; + nd[i] = (val >> 9) + carry; + carry = (val & 0x1ff) * 1953125; + if (i == ndlo) break; + i = (i - 1) & 0x3f; + } + if (ndlo != stop1 && ndlo != stop2) { + if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } + if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; } + } else if (!nd[ndhi]) { + if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; } + else return ndlo; + } + k -= 9; + } + if (k) { + uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0; + for (;;) { + uint32_t val = nd[i]; + nd[i] = (val >> k) + carry; + carry = (val & mask) * mul; + if (i == ndlo) break; + i = (i - 1) & 0x3f; + } + if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } + } + return ndlo; +} + +/* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */ +static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e) +{ + uint32_t i, carry; + if (e >= 0) { + i = (uint32_t)e/9; + carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1); + } else { + int32_t f = (e-8)/9; + i = (uint32_t)(64 + f); + carry = m * (ndigits_dec_threshold[e - f*9] + 1); + } + for (;;) { + uint32_t val = nd[i] + carry; + if (LJ_UNLIKELY(val >= 1000000000)) { + val -= 1000000000; + nd[i] = val; + if (LJ_UNLIKELY(i == ndhi)) { + ndhi = (ndhi + 1) & 0x3f; + nd[ndhi] = 1; + break; + } + carry = 1; + i = (i + 1) & 0x3f; + } else { + nd[i] = val; + break; + } + } + return ndhi; +} + +/* Test whether two "nd" values are equal in their most significant digits. */ +static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, + MSize prec) +{ + char nd9[9], ref9[9]; + if (hilen <= prec) { + if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; + prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f; + if (prec >= 9) { + if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; + prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f; + } + } else { + prec -= hilen - 9; + } + lua_assert(prec < 9); + lj_strfmt_wuint9(nd9, nd[ndhi]); + lj_strfmt_wuint9(ref9, *ref); + return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5'); +} + +/* -- Formatted conversions to buffer ------------------------------------- */ + +/* Write formatted floating-point number to either sb or p. */ +static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) +{ + MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len; + TValue t; + t.n = n; + if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) { + /* Handle non-finite values uniformly for %a, %e, %f, %g. */ + int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0; + if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) { + ch ^= ('n' << 16) | ('a' << 8) | 'n'; + if ((sf & STRFMT_F_SPACE)) prefix = ' '; + } else { + ch ^= ('i' << 16) | ('n' << 8) | 'f'; + if ((t.u32.hi & 0x80000000)) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + } + len = 3 + (prefix != 0); + if (!p) p = lj_buf_more(sb, width > len ? width : len); + if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; + if (prefix) *p++ = prefix; + *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch; + } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) { + /* %a */ + const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX" + : "0123456789abcdefpx"; + int32_t e = (t.u32.hi >> 20) & 0x7ff; + char prefix = 0, eprefix = '+'; + if (t.u32.hi & 0x80000000) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + t.u32.hi &= 0xfffff; + if (e) { + t.u32.hi |= 0x100000; + e -= 1023; + } else if (t.u32.lo | t.u32.hi) { + /* Non-zero denormal - normalise it. */ + uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo); + e = -1022 - shift; + t.u64 <<= shift; + } + /* abs(n) == t.u64 * 2^(e - 52) */ + /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */ + if ((int32_t)prec < 0) { + /* Default precision: use smallest precision giving exact result. */ + prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4; + } else if (prec < 13) { + /* Precision is sufficiently low as to maybe require rounding. */ + t.u64 += (((uint64_t)1) << (51 - prec*4)); + } + if (e < 0) { + eprefix = '-'; + e = -e; + } + len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, width > len ? width : len); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + *p++ = '0'; + *p++ = hexdig[17]; /* x or X */ + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */ + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + char *q = p + 1 + prec; + *p = '.'; + if (prec < 13) t.u64 >>= (52 - prec*4); + else while (prec > 13) p[prec--] = '0'; + while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; } + p = q; + } + *p++ = hexdig[16]; /* p or P */ + *p++ = eprefix; /* + or - */ + p = lj_strfmt_wint(p, e); + } else { + /* %e or %f or %g - begin by converting n to "nd" format. */ + uint32_t nd[64]; + uint32_t ndhi = 0, ndlo, i; + int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0; + char prefix = 0, *q; + if (t.u32.hi & 0x80000000) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */ + if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) { + /* %g - decrement precision if non-zero (to make it like %e). */ + prec--; + prec ^= (uint32_t)((int32_t)prec >> 31); + } + if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) { + /* Precision is sufficiently low that rescaling will probably work. */ + if ((ndebias = rescale_e[e >> 6])) { + t.n = n * rescale_n[e >> 6]; + t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */ + nd[0] = 0x100000 | (t.u32.hi & 0xfffff); + e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29); + goto load_t_lo; rescale_failed: + t.n = n; + e = (t.u32.hi >> 20) & 0x7ff; + ndebias = ndhi = 0; + } + } + nd[0] = t.u32.hi & 0xfffff; + if (e == 0) e++; else nd[0] |= 0x100000; + e -= 1043; + if (t.u32.lo) { + e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo: +#if ND_MUL2K_MAX_SHIFT >= 29 + nd[0] = (nd[0] << 3) | (t.u32.lo >> 29); + ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf); +#elif ND_MUL2K_MAX_SHIFT >= 11 + ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf); + ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf); + ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf); +#else +#error "ND_MUL2K_MAX_SHIFT too small" +#endif + } + if (e >= 0) { + ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf); + ndlo = 0; + } else { + ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf); + if (ndhi && !nd[ndhi]) ndhi--; + } + /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */ + if ((sf & STRFMT_T_FP_E)) { + /* %e or %g - assume %e and start by calculating nd's exponent (nde). */ + char eprefix = '+'; + int32_t nde = -1; + MSize hilen; + if (ndlo && !nd[ndhi]) { + ndhi = 64; do {} while (!nd[--ndhi]); + nde -= 64 * 9; + } + hilen = ndigits_dec(nd[ndhi]); + nde += ndhi * 9 + hilen; + if (ndebias) { + /* + ** Rescaling was performed, but this introduced some error, and might + ** have pushed us across a rounding boundary. We check whether this + ** error affected the result by introducing even more error (2ulp in + ** either direction), and seeing whether a roundary boundary was + ** crossed. Having already converted the -2ulp case, we save off its + ** most significant digits, convert the +2ulp case, and compare them. + */ + int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29) + + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12)); + const int8_t *m_e = four_ulp_m_e + eidx * 2; + lua_assert(0 <= eidx && eidx < 128); + nd[33] = nd[ndhi]; + nd[32] = nd[(ndhi - 1) & 0x3f]; + nd[31] = nd[(ndhi - 2) & 0x3f]; + nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]); + if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) { + goto rescale_failed; + } + } + if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) { + /* Precision is sufficiently low as to maybe require rounding. */ + ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1); + nde += (hilen != ndigits_dec(nd[ndhi])); + } + nde += ndebias; + if ((sf & STRFMT_T_FP_F)) { + /* %g */ + if ((int32_t)prec >= nde && nde >= -4) { + if (nde < 0) ndhi = 0; + prec -= nde; + goto g_format_like_f; + } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) { + /* Decrease precision in order to strip trailing zeroes. */ + char tail[9]; + uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9; + if (prec >= maxprec) prec = maxprec; + else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f; + i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10; + lj_strfmt_wuint9(tail, nd[ndlo]); + while (prec && tail[--i] == '0') { + prec--; + if (!i) { + if (ndlo == ndhi) { prec = 0; break; } + lj_strfmt_wuint9(tail, nd[++ndlo]); + i = 9; + } + } + } + } + if (nde < 0) { + /* Make nde non-negative. */ + eprefix = '-'; + nde = -nde; + } + len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + q = lj_strfmt_wint(p + 1, nd[ndhi]); + p[0] = p[1]; /* Put leading digit in the correct place. */ + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + p[1] = '.'; p += 2; + prec -= (q - p); p = q; /* Account for the digits already emitted. */ + /* Then emit chunks of 9 digits (this may emit 8 digits too many). */ + for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) { + i = (i - 1) & 0x3f; + p = lj_strfmt_wuint9(p, nd[i]); + } + if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) { + /* %g (and not %#g) - strip trailing zeroes. */ + p += (int32_t)prec & ((int32_t)prec >> 31); + while (p[-1] == '0') p--; + if (p[-1] == '.') p--; + } else { + /* %e (or %#g) - emit trailing zeroes. */ + while ((int32_t)prec > 0) { *p++ = '0'; prec--; } + p += (int32_t)prec; + } + } else { + p++; + } + *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e'; + *p++ = eprefix; /* + or - */ + if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */ + p = lj_strfmt_wint(p, nde); + } else { + /* %f (or, shortly, %g in %f style) */ + if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) { + /* Precision is sufficiently low as to maybe require rounding. */ + ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1); + } + g_format_like_f: + if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) { + /* Decrease precision in order to strip trailing zeroes. */ + if (ndlo) { + /* nd has a fractional part; we need to look at its digits. */ + char tail[9]; + uint32_t maxprec = (64 - ndlo) * 9; + if (prec >= maxprec) prec = maxprec; + else ndlo = 64 - (prec + 8) / 9; + i = prec - ((63 - ndlo) * 9); + lj_strfmt_wuint9(tail, nd[ndlo]); + while (prec && tail[--i] == '0') { + prec--; + if (!i) { + if (ndlo == 63) { prec = 0; break; } + lj_strfmt_wuint9(tail, nd[++ndlo]); + i = 9; + } + } + } else { + /* nd has no fractional part, so precision goes straight to zero. */ + prec = 0; + } + } + len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + /* Emit integer part. */ + p = lj_strfmt_wint(p, nd[ndhi]); + i = ndhi; + while (i) p = lj_strfmt_wuint9(p, nd[--i]); + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + *p++ = '.'; + /* Emit chunks of 9 digits (this may emit 8 digits too many). */ + while ((int32_t)prec > 0 && i != ndlo) { + i = (i - 1) & 0x3f; + p = lj_strfmt_wuint9(p, nd[i]); + prec -= 9; + } + if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) { + /* %g (and not %#g) - strip trailing zeroes. */ + p += (int32_t)prec & ((int32_t)prec >> 31); + while (p[-1] == '0') p--; + if (p[-1] == '.') p--; + } else { + /* %f (or %#g) - emit trailing zeroes. */ + while ((int32_t)prec > 0) { *p++ = '0'; prec--; } + p += (int32_t)prec; + } + } + } + } + if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; + return p; +} + +/* Add formatted floating-point number to buffer. */ +SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) +{ + setsbufP(sb, lj_strfmt_wfnum(sb, sf, n, NULL)); + return sb; +} + +/* -- Conversions to strings ---------------------------------------------- */ + +/* Convert number to string. */ +GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o) +{ + char buf[STRFMT_MAXBUF_NUM]; + MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf); + return lj_str_new(L, buf, len); +} + diff --git a/src/ljamalg.c b/src/ljamalg.c index be0c52d7..93b81f2a 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -46,6 +46,7 @@ #include "lj_vmmath.c" #include "lj_strscan.c" #include "lj_strfmt.c" +#include "lj_strfmt_num.c" #include "lj_api.c" #include "lj_profile.c" #include "lj_lex.c"