From edd5cbadc5cdc7b5b66d5340ee97c5abe5a3892a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 1 Jun 2021 05:14:18 +0200 Subject: [PATCH] String buffers, part 2c: abstract out string.format. Sponsored by fmad.io. --- src/Makefile.dep | 3 +- src/lib_string.c | 85 ++-------------------------------- src/lj_strfmt.c | 117 +++++++++++++++++++++++++++++++++++++++++++---- src/lj_strfmt.h | 5 ++ 4 files changed, 121 insertions(+), 89 deletions(-) diff --git a/src/Makefile.dep b/src/Makefile.dep index 315bf632..0bf63391 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -197,7 +197,8 @@ lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_buf.h lj_gc.h lj_str.h lj_state.h lj_char.h lj_strfmt.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \ + lj_char.h lj_strfmt.h lj_lib.h lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ diff --git a/src/lib_string.c b/src/lib_string.c index 4a3ff372..75d855d6 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -640,89 +640,14 @@ LJLIB_CF(string_gsub) /* ------------------------------------------------------------------------ */ -/* Emulate tostring() inline. */ -static GCstr *string_fmt_tostring(lua_State *L, int arg, int retry) -{ - TValue *o = L->base+arg-1; - cTValue *mo; - lj_assertL(o < L->top, "bad usage"); /* Caller already checks for existence. */ - if (LJ_LIKELY(tvisstr(o))) - return strV(o); - if (retry != 2 && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { - copyTV(L, L->top++, mo); - copyTV(L, L->top++, o); - lua_call(L, 1, 1); - copyTV(L, L->base+arg-1, --L->top); - return NULL; /* Buffer may be overwritten, retry. */ - } - return lj_strfmt_obj(L, o); -} - LJLIB_CF(string_format) LJLIB_REC(.) { - int arg, top = (int)(L->top - L->base); - GCstr *fmt; - SBuf *sb; - FormatState fs; - SFormat sf; int retry = 0; -again: - arg = 1; - sb = lj_buf_tmp_(L); - fmt = lj_lib_checkstr(L, arg); - lj_strfmt_init(&fs, strdata(fmt), fmt->len); - while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { - if (sf == STRFMT_LIT) { - lj_buf_putmem(sb, fs.str, fs.len); - } else if (sf == STRFMT_ERR) { - lj_err_callerv(L, LJ_ERR_STRFMT, strdata(lj_str_new(L, fs.str, fs.len))); - } else { - if (++arg > top) - luaL_argerror(L, arg, lj_obj_typename[0]); - switch (STRFMT_TYPE(sf)) { - case STRFMT_INT: - if (tvisint(L->base+arg-1)) { - int32_t k = intV(L->base+arg-1); - if (sf == STRFMT_INT) - lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ - else - lj_strfmt_putfxint(sb, sf, k); - } else { - lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); - } - break; - case STRFMT_UINT: - if (tvisint(L->base+arg-1)) - lj_strfmt_putfxint(sb, sf, intV(L->base+arg-1)); - else - lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); - break; - case STRFMT_NUM: - lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); - break; - case STRFMT_STR: { - GCstr *str = string_fmt_tostring(L, arg, retry); - if (str == NULL) - retry = 1; - else if ((sf & STRFMT_T_QUOTED)) - lj_strfmt_putquoted(sb, str); /* No formatting. */ - else - lj_strfmt_putfstr(sb, sf, str); - break; - } - case STRFMT_CHAR: - lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); - break; - case STRFMT_PTR: /* No formatting. */ - lj_strfmt_putptr(sb, lj_obj_ptr(G(L), L->base+arg-1)); - break; - default: - lj_assertL(0, "bad string format type"); - break; - } - } - } - if (retry++ == 1) goto again; + SBuf *sb; + do { + sb = lj_buf_tmp_(L); + retry = lj_strfmt_putarg(L, sb, 1, -retry); + } while (retry > 0); setstrV(L, L->top-1, lj_buf_str(L, sb)); lj_gc_check(L); return 1; diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index bde3ec0e..a9541d41 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -9,11 +9,14 @@ #define LUA_CORE #include "lj_obj.h" +#include "lj_err.h" #include "lj_buf.h" #include "lj_str.h" +#include "lj_meta.h" #include "lj_state.h" #include "lj_char.h" #include "lj_strfmt.h" +#include "lj_lib.h" /* -- Format parser ------------------------------------------------------- */ @@ -196,10 +199,8 @@ SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) } /* Add quoted string to buffer. */ -SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) +static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len) { - const char *s = strdata(str); - MSize len = str->len; lj_buf_putb(sb, '"'); while (len--) { uint32_t c = (uint32_t)(uint8_t)*s++; @@ -225,6 +226,13 @@ SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) return sb; } +#if LJ_HASJIT +SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) +{ + return strfmt_putquotedlen(sb, strdata(str), str->len); +} +#endif + /* -- Formatted conversions to buffer ------------------------------------- */ /* Add formatted char to buffer. */ @@ -240,18 +248,26 @@ SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) } /* Add formatted string to buffer. */ -SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) +static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len) { - MSize len = str->len <= STRFMT_PREC(sf) ? str->len : STRFMT_PREC(sf); MSize width = STRFMT_WIDTH(sf); - char *w = lj_buf_more(sb, width > len ? width : len); - if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, strdata(str), len); + char *w; + if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf); + w = lj_buf_more(sb, width > len ? width : len); + if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); while (width-- > len) *w++ = ' '; - if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, strdata(str), len); + if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); sb->w = w; return sb; } +#if LJ_HASJIT +SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) +{ + return strfmt_putfstrlen(sb, sf, strdata(str), str->len); +} +#endif + /* Add formatted signed/unsigned integer to buffer. */ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) { @@ -346,6 +362,91 @@ SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) return lj_strfmt_putfxint(sb, sf, (uint64_t)k); } +/* Format stack arguments to buffer. */ +int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry) +{ + int narg = (int)(L->top - L->base); + GCstr *fmt = lj_lib_checkstr(L, arg); + FormatState fs; + SFormat sf; + lj_strfmt_init(&fs, strdata(fmt), fmt->len); + while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { + if (sf == STRFMT_LIT) { + lj_buf_putmem(sb, fs.str, fs.len); + } else if (sf == STRFMT_ERR) { + lj_err_callerv(L, LJ_ERR_STRFMT, + strdata(lj_str_new(L, fs.str, fs.len))); + } else { + TValue *o = &L->base[arg++]; + if (arg > narg) + lj_err_arg(L, arg, LJ_ERR_NOVAL); + switch (STRFMT_TYPE(sf)) { + case STRFMT_INT: + if (tvisint(o)) { + int32_t k = intV(o); + if (sf == STRFMT_INT) + lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ + else + lj_strfmt_putfxint(sb, sf, k); + } else { + lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); + } + break; + case STRFMT_UINT: + if (tvisint(o)) + lj_strfmt_putfxint(sb, sf, intV(o)); + else + lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_NUM: + lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_STR: { + MSize len; + const char *s; + cTValue *mo; + if (LJ_UNLIKELY(!tvisstr(o)) && retry >= 0 && + !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { + /* Call __tostring metamethod once. */ + copyTV(L, L->top++, mo); + copyTV(L, L->top++, o); + lua_call(L, 1, 1); + o = &L->base[arg-1]; /* Stack may have been reallocated. */ + copyTV(L, o, --L->top); /* Replace inline for retry. */ + if (retry < 2) { /* Global buffer may have been overwritten. */ + retry = 1; + break; + } + } + if (LJ_LIKELY(tvisstr(o))) { + len = strV(o)->len; + s = strVdata(o); + } else { + GCstr *str = lj_strfmt_obj(L, o); + len = str->len; + s = strdata(str); + } + if ((sf & STRFMT_T_QUOTED)) + strfmt_putquotedlen(sb, s, len); /* No formatting. */ + else + strfmt_putfstrlen(sb, sf, s, len); + break; + } + case STRFMT_CHAR: + lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); + break; + case STRFMT_PTR: /* No formatting. */ + lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o)); + break; + default: + lj_assertL(0, "bad string format type"); + break; + } + } + } + return retry; +} + /* -- Conversions to strings ---------------------------------------------- */ /* Convert integer to string. */ diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h index fa25dcd0..cb2c7360 100644 --- a/src/lj_strfmt.h +++ b/src/lj_strfmt.h @@ -95,7 +95,9 @@ LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o); #endif LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v); +#if LJ_HASJIT LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str); +#endif /* Formatted conversions to buffer. */ LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k); @@ -103,7 +105,10 @@ LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n); LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n); LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n); LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c); +#if LJ_HASJIT LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str); +#endif +LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry); /* Conversions to strings. */ LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);