2009-12-08 18:46:35 +00:00
|
|
|
/*
|
|
|
|
** String handling.
|
2013-02-11 11:54:48 +00:00
|
|
|
** Copyright (C) 2005-2013 Mike Pall. See Copyright Notice in luajit.h
|
2009-12-08 18:46:35 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#define lj_str_c
|
|
|
|
#define LUA_CORE
|
|
|
|
|
|
|
|
#include "lj_obj.h"
|
|
|
|
#include "lj_gc.h"
|
|
|
|
#include "lj_err.h"
|
2013-02-27 16:11:31 +00:00
|
|
|
#include "lj_buf.h"
|
2009-12-08 18:46:35 +00:00
|
|
|
#include "lj_str.h"
|
|
|
|
#include "lj_state.h"
|
2010-11-09 11:09:54 +00:00
|
|
|
#include "lj_char.h"
|
2009-12-08 18:46:35 +00:00
|
|
|
|
|
|
|
/* -- String interning ---------------------------------------------------- */
|
|
|
|
|
|
|
|
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
|
2009-12-08 19:35:29 +00:00
|
|
|
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
|
2009-12-08 18:46:35 +00:00
|
|
|
{
|
|
|
|
MSize i, n = a->len > b->len ? b->len : a->len;
|
|
|
|
for (i = 0; i < n; i += 4) {
|
|
|
|
/* Note: innocuous access up to end of string + 3. */
|
|
|
|
uint32_t va = *(const uint32_t *)(strdata(a)+i);
|
|
|
|
uint32_t vb = *(const uint32_t *)(strdata(b)+i);
|
|
|
|
if (va != vb) {
|
2010-11-28 16:22:45 +00:00
|
|
|
#if LJ_LE
|
2009-12-08 18:46:35 +00:00
|
|
|
va = lj_bswap(va); vb = lj_bswap(vb);
|
|
|
|
#endif
|
|
|
|
i -= n;
|
|
|
|
if ((int32_t)i >= -3) {
|
|
|
|
va >>= 32+(i<<3); vb >>= 32+(i<<3);
|
|
|
|
if (va == vb) break;
|
|
|
|
}
|
2010-01-21 14:29:23 +00:00
|
|
|
return va < vb ? -1 : 1;
|
2009-12-08 18:46:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return (int32_t)(a->len - b->len);
|
|
|
|
}
|
|
|
|
|
2010-07-21 21:55:05 +00:00
|
|
|
/* Fast string data comparison. Caveat: unaligned access to 1st string! */
|
|
|
|
static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
|
|
|
|
{
|
|
|
|
MSize i = 0;
|
|
|
|
lua_assert(len > 0);
|
2013-02-21 15:22:26 +00:00
|
|
|
lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
|
2010-07-21 21:55:05 +00:00
|
|
|
do { /* Note: innocuous access up to end of string + 3. */
|
2011-05-09 16:09:29 +00:00
|
|
|
uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
|
2010-07-21 21:55:05 +00:00
|
|
|
if (v) {
|
|
|
|
i -= len;
|
2010-11-28 16:22:45 +00:00
|
|
|
#if LJ_LE
|
2010-07-21 21:55:05 +00:00
|
|
|
return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
|
|
|
|
#else
|
|
|
|
return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
i += 4;
|
|
|
|
} while (i < len);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-12-08 18:46:35 +00:00
|
|
|
/* Resize the string hash table (grow and shrink). */
|
|
|
|
void lj_str_resize(lua_State *L, MSize newmask)
|
|
|
|
{
|
|
|
|
global_State *g = G(L);
|
|
|
|
GCRef *newhash;
|
|
|
|
MSize i;
|
|
|
|
if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
|
|
|
|
return; /* No resizing during GC traversal or if already too big. */
|
|
|
|
newhash = lj_mem_newvec(L, newmask+1, GCRef);
|
|
|
|
memset(newhash, 0, (newmask+1)*sizeof(GCRef));
|
|
|
|
for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
|
|
|
|
GCobj *p = gcref(g->strhash[i]);
|
|
|
|
while (p) { /* Follow each hash chain and reinsert all strings. */
|
|
|
|
MSize h = gco2str(p)->hash & newmask;
|
|
|
|
GCobj *next = gcnext(p);
|
|
|
|
/* NOBARRIER: The string table is a GC root. */
|
|
|
|
setgcrefr(p->gch.nextgc, newhash[h]);
|
|
|
|
setgcref(newhash[h], p);
|
|
|
|
p = next;
|
|
|
|
}
|
|
|
|
}
|
2009-12-29 03:36:35 +00:00
|
|
|
lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
|
2009-12-08 18:46:35 +00:00
|
|
|
g->strmask = newmask;
|
|
|
|
g->strhash = newhash;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Intern a string and return string object. */
|
|
|
|
GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
|
|
|
|
{
|
|
|
|
global_State *g;
|
|
|
|
GCstr *s;
|
|
|
|
GCobj *o;
|
|
|
|
MSize len = (MSize)lenx;
|
2010-07-21 20:53:27 +00:00
|
|
|
MSize a, b, h = len;
|
2009-12-08 18:46:35 +00:00
|
|
|
if (lenx >= LJ_MAX_STR)
|
|
|
|
lj_err_msg(L, LJ_ERR_STROV);
|
|
|
|
g = G(L);
|
2010-07-21 20:53:27 +00:00
|
|
|
/* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
|
|
|
|
if (len >= 4) { /* Caveat: unaligned access! */
|
2011-05-09 16:09:29 +00:00
|
|
|
a = lj_getu32(str);
|
|
|
|
h ^= lj_getu32(str+len-4);
|
|
|
|
b = lj_getu32(str+(len>>1)-2);
|
2010-07-21 20:53:27 +00:00
|
|
|
h ^= b; h -= lj_rol(b, 14);
|
2011-05-09 16:09:29 +00:00
|
|
|
b += lj_getu32(str+(len>>2)-1);
|
2010-07-21 20:53:27 +00:00
|
|
|
} else if (len > 0) {
|
|
|
|
a = *(const uint8_t *)str;
|
|
|
|
h ^= *(const uint8_t *)(str+len-1);
|
|
|
|
b = *(const uint8_t *)(str+(len>>1));
|
|
|
|
h ^= b; h -= lj_rol(b, 14);
|
|
|
|
} else {
|
|
|
|
return &g->strempty;
|
|
|
|
}
|
|
|
|
a ^= h; a -= lj_rol(h, 11);
|
|
|
|
b ^= a; b -= lj_rol(a, 25);
|
|
|
|
h ^= b; h -= lj_rol(b, 16);
|
|
|
|
/* Check if the string has already been interned. */
|
2010-07-21 21:55:05 +00:00
|
|
|
o = gcref(g->strhash[h & g->strmask]);
|
2013-02-21 15:22:26 +00:00
|
|
|
if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
|
2010-07-21 21:55:05 +00:00
|
|
|
while (o != NULL) {
|
|
|
|
GCstr *sx = gco2str(o);
|
|
|
|
if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
|
|
|
|
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
|
|
|
|
if (isdead(g, o)) flipwhite(o);
|
|
|
|
return sx; /* Return existing string. */
|
|
|
|
}
|
|
|
|
o = gcnext(o);
|
|
|
|
}
|
|
|
|
} else { /* Slow path: end of string is too close to a page boundary. */
|
|
|
|
while (o != NULL) {
|
|
|
|
GCstr *sx = gco2str(o);
|
|
|
|
if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
|
|
|
|
/* Resurrect if dead. Can only happen with fixstring() (keywords). */
|
|
|
|
if (isdead(g, o)) flipwhite(o);
|
|
|
|
return sx; /* Return existing string. */
|
|
|
|
}
|
|
|
|
o = gcnext(o);
|
2009-12-08 18:46:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Nope, create a new string. */
|
|
|
|
s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
|
|
|
|
newwhite(g, s);
|
|
|
|
s->gct = ~LJ_TSTR;
|
|
|
|
s->len = len;
|
|
|
|
s->hash = h;
|
|
|
|
s->reserved = 0;
|
|
|
|
memcpy(strdatawr(s), str, len);
|
|
|
|
strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
|
|
|
|
/* Add it to string hash table. */
|
|
|
|
h &= g->strmask;
|
|
|
|
s->nextgc = g->strhash[h];
|
|
|
|
/* NOBARRIER: The string table is a GC root. */
|
|
|
|
setgcref(g->strhash[h], obj2gco(s));
|
|
|
|
if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
|
|
|
|
lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
|
|
|
|
return s; /* Return newly interned string. */
|
|
|
|
}
|
|
|
|
|
|
|
|
void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
|
|
|
|
{
|
|
|
|
g->strnum--;
|
|
|
|
lj_mem_free(g, s, sizestring(s));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* -- Type conversions ---------------------------------------------------- */
|
|
|
|
|
2010-05-19 22:40:51 +00:00
|
|
|
/* Print number to buffer. Canonicalizes non-finite values. */
|
2013-03-25 15:41:13 +00:00
|
|
|
char * LJ_FASTCALL lj_str_bufnum(char *p, cTValue *o)
|
2010-05-19 22:40:51 +00:00
|
|
|
{
|
|
|
|
if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
|
2012-10-14 23:31:56 +00:00
|
|
|
#if __BIONIC__
|
2013-03-25 15:41:13 +00:00
|
|
|
if (tvismzero(o)) { *p++ = '-'; *p++ = '0'; return p; }
|
2012-10-14 23:31:56 +00:00
|
|
|
#endif
|
2013-03-25 15:41:13 +00:00
|
|
|
return p + lua_number2str(p, o->n);
|
2010-05-19 22:40:51 +00:00
|
|
|
} else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
|
2013-03-25 15:41:13 +00:00
|
|
|
*p++ = 'n'; *p++ = 'a'; *p++ = 'n';
|
2010-05-19 22:40:51 +00:00
|
|
|
} else if ((o->u32.hi & 0x80000000) == 0) {
|
2013-03-25 15:41:13 +00:00
|
|
|
*p++ = 'i'; *p++ = 'n'; *p++ = 'f';
|
2010-05-19 22:40:51 +00:00
|
|
|
} else {
|
2013-03-25 15:41:13 +00:00
|
|
|
*p++ = '-'; *p++ = 'i'; *p++ = 'n'; *p++ = 'f';
|
2010-05-19 22:40:51 +00:00
|
|
|
}
|
2013-03-25 15:41:13 +00:00
|
|
|
return p;
|
2010-05-19 22:40:51 +00:00
|
|
|
}
|
|
|
|
|
2013-03-25 15:41:13 +00:00
|
|
|
#define STR_BUFINT_R(x, sh, sc) \
|
|
|
|
{ uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
|
|
|
|
|
|
|
|
/* Print integer to buffer. */
|
|
|
|
char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
|
2011-02-16 23:44:14 +00:00
|
|
|
{
|
2013-03-25 15:41:13 +00:00
|
|
|
uint32_t u = (uint32_t)k;
|
|
|
|
if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
|
|
|
|
if (u < 10000) {
|
|
|
|
if (u < 10) goto dig1; if (u < 100) goto dig2; if (u < 1000) goto dig3;
|
|
|
|
} else {
|
|
|
|
uint32_t v = u / 10000; u -= v * 10000;
|
|
|
|
if (v < 10000) {
|
|
|
|
if (v < 10) goto dig5; if (v < 100) goto dig6; if (v < 1000) goto dig7;
|
|
|
|
} else {
|
|
|
|
uint32_t w = v / 10000; v -= w * 10000;
|
|
|
|
if (w >= 10) STR_BUFINT_R(w, 10, 10)
|
|
|
|
*p++ = (char)('0'+w);
|
|
|
|
}
|
|
|
|
STR_BUFINT_R(v, 23, 1000)
|
|
|
|
dig7: STR_BUFINT_R(v, 12, 100)
|
|
|
|
dig6: STR_BUFINT_R(v, 10, 10)
|
|
|
|
dig5: *p++ = (char)('0'+v);
|
|
|
|
}
|
|
|
|
STR_BUFINT_R(u, 23, 1000)
|
|
|
|
dig3: STR_BUFINT_R(u, 12, 100)
|
|
|
|
dig2: STR_BUFINT_R(u, 10, 10)
|
|
|
|
dig1: *p++ = (char)('0'+u);
|
2011-02-16 23:44:14 +00:00
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2013-03-20 21:45:52 +00:00
|
|
|
/* Print pointer to buffer. */
|
2013-03-25 15:41:13 +00:00
|
|
|
char * LJ_FASTCALL lj_str_bufptr(char *p, const void *v)
|
2013-03-20 21:45:52 +00:00
|
|
|
{
|
|
|
|
ptrdiff_t x = (ptrdiff_t)v;
|
|
|
|
MSize i, n = LJ_STR_PTRBUF;
|
|
|
|
if (x == 0) {
|
2013-03-25 15:41:13 +00:00
|
|
|
*p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
|
|
|
|
return p;
|
2013-03-20 21:45:52 +00:00
|
|
|
}
|
|
|
|
#if LJ_64
|
|
|
|
/* Shorten output for 64 bit pointers. */
|
|
|
|
n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
|
|
|
|
#endif
|
|
|
|
p[0] = '0';
|
|
|
|
p[1] = 'x';
|
|
|
|
for (i = n-1; i >= 2; i--, x >>= 4)
|
|
|
|
p[i] = "0123456789abcdef"[(x & 15)];
|
2013-03-25 15:41:13 +00:00
|
|
|
return p+n;
|
2013-03-20 21:45:52 +00:00
|
|
|
}
|
|
|
|
|
2013-03-18 16:08:37 +00:00
|
|
|
/* Print TValue to buffer (only for numbers) and return pointer to start. */
|
|
|
|
const char *lj_str_buftv(char *buf, cTValue *o, MSize *lenp)
|
|
|
|
{
|
|
|
|
if (tvisstr(o)) {
|
|
|
|
*lenp = strV(o)->len;
|
|
|
|
return strVdata(o);
|
|
|
|
} else if (tvisint(o)) {
|
2013-03-25 15:41:13 +00:00
|
|
|
*lenp = (MSize)(lj_str_bufint(buf, intV(o)) - buf);
|
|
|
|
return buf;
|
2013-03-18 16:08:37 +00:00
|
|
|
} else if (tvisnum(o)) {
|
2013-03-25 15:41:13 +00:00
|
|
|
*lenp = (MSize)(lj_str_bufnum(buf, o) - buf);
|
2013-03-18 16:08:37 +00:00
|
|
|
return buf;
|
|
|
|
} else {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-12-08 18:46:35 +00:00
|
|
|
/* Convert number to string. */
|
2009-12-08 19:35:29 +00:00
|
|
|
GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
|
2009-12-08 18:46:35 +00:00
|
|
|
{
|
2011-02-16 23:44:14 +00:00
|
|
|
char buf[LJ_STR_NUMBUF];
|
2013-03-25 15:41:13 +00:00
|
|
|
MSize len = (MSize)(lj_str_bufnum(buf, (TValue *)np) - buf);
|
2010-05-19 22:40:51 +00:00
|
|
|
return lj_str_new(L, buf, len);
|
2009-12-08 18:46:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert integer to string. */
|
2009-12-08 19:35:29 +00:00
|
|
|
GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
|
2009-12-08 18:46:35 +00:00
|
|
|
{
|
2013-03-18 16:08:37 +00:00
|
|
|
char buf[LJ_STR_INTBUF];
|
2013-03-25 15:41:13 +00:00
|
|
|
MSize len = (MSize)(lj_str_bufint(buf, k) - buf);
|
|
|
|
return lj_str_new(L, buf, len);
|
2009-12-08 18:46:35 +00:00
|
|
|
}
|
2011-02-16 23:44:14 +00:00
|
|
|
|
|
|
|
GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
|
|
|
|
{
|
|
|
|
return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
|
|
|
|
}
|
2009-12-08 18:46:35 +00:00
|
|
|
|
2013-04-23 00:20:03 +00:00
|
|
|
/* Convert char value to string. */
|
|
|
|
GCstr * LJ_FASTCALL lj_str_fromchar(lua_State *L, int c)
|
|
|
|
{
|
|
|
|
char buf[1];
|
|
|
|
buf[0] = c;
|
|
|
|
return lj_str_new(L, buf, 1);
|
|
|
|
}
|
|
|
|
|
2009-12-08 18:46:35 +00:00
|
|
|
/* -- String formatting --------------------------------------------------- */
|
|
|
|
|
|
|
|
/* Push formatted message as a string object to Lua stack. va_list variant. */
|
|
|
|
const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
|
|
|
|
{
|
|
|
|
SBuf *sb = &G(L)->tmpbuf;
|
2013-02-28 12:37:56 +00:00
|
|
|
setsbufL(sb, L);
|
|
|
|
lj_buf_need(sb, (MSize)strlen(fmt));
|
2013-02-27 16:11:31 +00:00
|
|
|
lj_buf_reset(sb);
|
2009-12-08 18:46:35 +00:00
|
|
|
for (;;) {
|
|
|
|
const char *e = strchr(fmt, '%');
|
|
|
|
if (e == NULL) break;
|
2013-02-28 12:37:56 +00:00
|
|
|
lj_buf_putmem(sb, fmt, (MSize)(e-fmt));
|
2009-12-08 18:46:35 +00:00
|
|
|
/* This function only handles %s, %c, %d, %f and %p formats. */
|
|
|
|
switch (e[1]) {
|
|
|
|
case 's': {
|
|
|
|
const char *s = va_arg(argp, char *);
|
|
|
|
if (s == NULL) s = "(null)";
|
2013-02-28 12:37:56 +00:00
|
|
|
lj_buf_putmem(sb, s, (MSize)strlen(s));
|
2009-12-08 18:46:35 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 'c':
|
2013-02-28 12:37:56 +00:00
|
|
|
lj_buf_putb(sb, va_arg(argp, int));
|
2009-12-08 18:46:35 +00:00
|
|
|
break;
|
2013-03-25 15:41:13 +00:00
|
|
|
case 'd':
|
|
|
|
setsbufP(sb, lj_str_bufint(lj_buf_more(sb, LJ_STR_INTBUF),
|
|
|
|
va_arg(argp, int32_t)));
|
2009-12-08 18:46:35 +00:00
|
|
|
break;
|
|
|
|
case 'f': {
|
2010-05-19 22:40:51 +00:00
|
|
|
TValue tv;
|
2013-03-25 15:41:13 +00:00
|
|
|
tv.n = va_arg(argp, lua_Number);
|
|
|
|
setsbufP(sb, lj_str_bufnum(lj_buf_more(sb, LJ_STR_NUMBUF), &tv));
|
2009-12-08 18:46:35 +00:00
|
|
|
break;
|
|
|
|
}
|
2013-03-25 15:41:13 +00:00
|
|
|
case 'p':
|
|
|
|
setsbufP(sb, lj_str_bufptr(lj_buf_more(sb, LJ_STR_PTRBUF),
|
|
|
|
va_arg(argp, void *)));
|
2009-12-08 18:46:35 +00:00
|
|
|
break;
|
|
|
|
case '%':
|
2013-02-28 12:37:56 +00:00
|
|
|
lj_buf_putb(sb, '%');
|
2009-12-08 18:46:35 +00:00
|
|
|
break;
|
|
|
|
default:
|
2013-02-28 12:37:56 +00:00
|
|
|
lj_buf_putb(sb, '%');
|
|
|
|
lj_buf_putb(sb, e[1]);
|
2009-12-08 18:46:35 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
fmt = e+2;
|
|
|
|
}
|
2013-02-28 12:37:56 +00:00
|
|
|
lj_buf_putmem(sb, fmt, (MSize)strlen(fmt));
|
2013-02-27 20:17:27 +00:00
|
|
|
setstrV(L, L->top, lj_buf_str(L, sb));
|
2009-12-08 18:46:35 +00:00
|
|
|
incr_top(L);
|
|
|
|
return strVdata(L->top - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Push formatted message as a string object to Lua stack. Vararg variant. */
|
|
|
|
const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
const char *msg;
|
|
|
|
va_list argp;
|
|
|
|
va_start(argp, fmt);
|
|
|
|
msg = lj_str_pushvf(L, fmt, argp);
|
|
|
|
va_end(argp);
|
|
|
|
return msg;
|
|
|
|
}
|
|
|
|
|