Partially refactor string.find().

This commit is contained in:
Mike Pall 2013-05-07 20:44:58 +02:00
parent 2be1c2658f
commit 43de451d78
3 changed files with 73 additions and 63 deletions

View File

@ -155,7 +155,6 @@ typedef struct MatchState {
} MatchState;
#define L_ESC '%'
#define SPECIALS "^$*+?.([%-"
static int check_capture(MatchState *ms, int l)
{
@ -422,30 +421,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
return s;
}
static const char *lmemfind(const char *s1, size_t l1,
const char *s2, size_t l2)
{
if (l2 == 0) {
return s1; /* empty strings are everywhere */
} else if (l2 > l1) {
return NULL; /* avoids a negative `l1' */
} else {
const char *init; /* to search for a `*s2' inside `s1' */
l2--; /* 1st char will be checked by `memchr' */
l1 = l1-l2; /* `s2' cannot be found after that */
while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
init++; /* 1st char is already checked */
if (memcmp(init, s2+1, l2) == 0) {
return init-1;
} else { /* correct `l1' and `s1' to try again */
l1 -= (size_t)(init-s1);
s1 = init;
}
}
return NULL; /* not found */
}
}
static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
{
if (i >= ms->level) {
@ -473,60 +448,56 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
return nlevels; /* number of strings pushed */
}
static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
{
/* relative string position: negative means back from end */
if (pos < 0) pos += (ptrdiff_t)len + 1;
return (pos >= 0) ? pos : 0;
}
static int str_find_aux(lua_State *L, int find)
{
size_t l1, l2;
const char *s = luaL_checklstring(L, 1, &l1);
const char *p = luaL_checklstring(L, 2, &l2);
ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
if (init < 0) {
init = 0;
} else if ((size_t)(init) > l1) {
GCstr *s = lj_lib_checkstr(L, 1);
GCstr *p = lj_lib_checkstr(L, 2);
int32_t start = lj_lib_optint(L, 3, 1);
MSize st;
if (start < 0) start += (int32_t)s->len; else start--;
if (start < 0) start = 0;
st = (MSize)start;
if (st > s->len) {
#if LJ_52
setnilV(L->top-1);
return 1;
#else
init = (ptrdiff_t)l1;
st = s->len;
#endif
}
if (find && (lua_toboolean(L, 4) || /* explicit request? */
strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
/* do a plain search */
const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
if (s2) {
lua_pushinteger(L, s2-s+1);
lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
!lj_str_haspattern(p))) { /* Search for fixed string. */
const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
if (q) {
setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
return 2;
}
} else {
} else { /* Search for pattern. */
MatchState ms;
int anchor = (*p == '^') ? (p++, 1) : 0;
const char *s1=s+init;
const char *pstr = strdata(p);
const char *sstr = strdata(s) + st;
int anchor = 0;
if (*pstr == '^') { pstr++; anchor = 1; }
ms.L = L;
ms.src_init = s;
ms.src_end = s+l1;
do {
const char *res;
ms.src_init = strdata(s);
ms.src_end = strdata(s) + s->len;
do { /* Loop through string and try to match the pattern. */
const char *q;
ms.level = ms.depth = 0;
if ((res=match(&ms, s1, p)) != NULL) {
q = match(&ms, sstr, pstr);
if (q) {
if (find) {
lua_pushinteger(L, s1-s+1); /* start */
lua_pushinteger(L, res-s); /* end */
return push_captures(&ms, NULL, 0) + 2;
setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
setintV(L->top++, (int32_t)(q-strdata(s)));
return push_captures(&ms, NULL, NULL) + 2;
} else {
return push_captures(&ms, s1, res);
return push_captures(&ms, sstr, q);
}
}
} while (s1++ < ms.src_end && !anchor);
} while (sstr++ < ms.src_end && !anchor);
}
lua_pushnil(L); /* not found */
setnilV(L->top-1); /* Not found. */
return 1;
}

View File

@ -16,7 +16,7 @@
#include "lj_state.h"
#include "lj_char.h"
/* -- String interning ---------------------------------------------------- */
/* -- String helpers ------------------------------------------------------ */
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@ -62,6 +62,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
return 0;
}
/* Find fixed string p inside string s. */
const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
{
if (plen <= slen) {
if (plen == 0) {
return s;
} else {
int c = *(const uint8_t *)p++;
plen--; slen -= plen;
while (slen) {
const char *q = (const char *)memchr(s, c, slen);
if (!q) break;
if (memcmp(q+1, p, plen) == 0) return q;
q++; slen -= (MSize)(q-s); s = q;
}
}
}
return NULL;
}
/* Check whether a string has a pattern matching character. */
int lj_str_haspattern(GCstr *s)
{
const char *p = strdata(s), *q = p + s->len;
while (p < q) {
int c = *(const uint8_t *)p++;
if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
return 1; /* Found a pattern matching char. */
}
return 0; /* No pattern matching chars found. */
}
/* -- String interning ---------------------------------------------------- */
/* Resize the string hash table (grow and shrink). */
void lj_str_resize(lua_State *L, MSize newmask)
{

View File

@ -10,8 +10,13 @@
#include "lj_obj.h"
/* String interning. */
/* String helpers. */
LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
LJ_FUNC const char *lj_str_find(const char *s, const char *f,
MSize slen, MSize flen);
LJ_FUNC int lj_str_haspattern(GCstr *s);
/* String interning. */
LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);