mirror of
https://github.com/LuaJIT/LuaJIT.git
synced 2025-02-12 17:24:09 +00:00
Partially refactor string.find().
This commit is contained in:
parent
2be1c2658f
commit
43de451d78
@ -155,7 +155,6 @@ typedef struct MatchState {
|
|||||||
} MatchState;
|
} MatchState;
|
||||||
|
|
||||||
#define L_ESC '%'
|
#define L_ESC '%'
|
||||||
#define SPECIALS "^$*+?.([%-"
|
|
||||||
|
|
||||||
static int check_capture(MatchState *ms, int l)
|
static int check_capture(MatchState *ms, int l)
|
||||||
{
|
{
|
||||||
@ -422,30 +421,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *lmemfind(const char *s1, size_t l1,
|
|
||||||
const char *s2, size_t l2)
|
|
||||||
{
|
|
||||||
if (l2 == 0) {
|
|
||||||
return s1; /* empty strings are everywhere */
|
|
||||||
} else if (l2 > l1) {
|
|
||||||
return NULL; /* avoids a negative `l1' */
|
|
||||||
} else {
|
|
||||||
const char *init; /* to search for a `*s2' inside `s1' */
|
|
||||||
l2--; /* 1st char will be checked by `memchr' */
|
|
||||||
l1 = l1-l2; /* `s2' cannot be found after that */
|
|
||||||
while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
|
|
||||||
init++; /* 1st char is already checked */
|
|
||||||
if (memcmp(init, s2+1, l2) == 0) {
|
|
||||||
return init-1;
|
|
||||||
} else { /* correct `l1' and `s1' to try again */
|
|
||||||
l1 -= (size_t)(init-s1);
|
|
||||||
s1 = init;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL; /* not found */
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
|
static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
|
||||||
{
|
{
|
||||||
if (i >= ms->level) {
|
if (i >= ms->level) {
|
||||||
@ -473,60 +448,56 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
|
|||||||
return nlevels; /* number of strings pushed */
|
return nlevels; /* number of strings pushed */
|
||||||
}
|
}
|
||||||
|
|
||||||
static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
|
|
||||||
{
|
|
||||||
/* relative string position: negative means back from end */
|
|
||||||
if (pos < 0) pos += (ptrdiff_t)len + 1;
|
|
||||||
return (pos >= 0) ? pos : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int str_find_aux(lua_State *L, int find)
|
static int str_find_aux(lua_State *L, int find)
|
||||||
{
|
{
|
||||||
size_t l1, l2;
|
GCstr *s = lj_lib_checkstr(L, 1);
|
||||||
const char *s = luaL_checklstring(L, 1, &l1);
|
GCstr *p = lj_lib_checkstr(L, 2);
|
||||||
const char *p = luaL_checklstring(L, 2, &l2);
|
int32_t start = lj_lib_optint(L, 3, 1);
|
||||||
ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
|
MSize st;
|
||||||
if (init < 0) {
|
if (start < 0) start += (int32_t)s->len; else start--;
|
||||||
init = 0;
|
if (start < 0) start = 0;
|
||||||
} else if ((size_t)(init) > l1) {
|
st = (MSize)start;
|
||||||
|
if (st > s->len) {
|
||||||
#if LJ_52
|
#if LJ_52
|
||||||
setnilV(L->top-1);
|
setnilV(L->top-1);
|
||||||
return 1;
|
return 1;
|
||||||
#else
|
#else
|
||||||
init = (ptrdiff_t)l1;
|
st = s->len;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
if (find && (lua_toboolean(L, 4) || /* explicit request? */
|
if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
|
||||||
strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
|
!lj_str_haspattern(p))) { /* Search for fixed string. */
|
||||||
/* do a plain search */
|
const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
|
||||||
const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
|
if (q) {
|
||||||
if (s2) {
|
setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
|
||||||
lua_pushinteger(L, s2-s+1);
|
setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
|
||||||
lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
|
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
} else {
|
} else { /* Search for pattern. */
|
||||||
MatchState ms;
|
MatchState ms;
|
||||||
int anchor = (*p == '^') ? (p++, 1) : 0;
|
const char *pstr = strdata(p);
|
||||||
const char *s1=s+init;
|
const char *sstr = strdata(s) + st;
|
||||||
|
int anchor = 0;
|
||||||
|
if (*pstr == '^') { pstr++; anchor = 1; }
|
||||||
ms.L = L;
|
ms.L = L;
|
||||||
ms.src_init = s;
|
ms.src_init = strdata(s);
|
||||||
ms.src_end = s+l1;
|
ms.src_end = strdata(s) + s->len;
|
||||||
do {
|
do { /* Loop through string and try to match the pattern. */
|
||||||
const char *res;
|
const char *q;
|
||||||
ms.level = ms.depth = 0;
|
ms.level = ms.depth = 0;
|
||||||
if ((res=match(&ms, s1, p)) != NULL) {
|
q = match(&ms, sstr, pstr);
|
||||||
|
if (q) {
|
||||||
if (find) {
|
if (find) {
|
||||||
lua_pushinteger(L, s1-s+1); /* start */
|
setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
|
||||||
lua_pushinteger(L, res-s); /* end */
|
setintV(L->top++, (int32_t)(q-strdata(s)));
|
||||||
return push_captures(&ms, NULL, 0) + 2;
|
return push_captures(&ms, NULL, NULL) + 2;
|
||||||
} else {
|
} else {
|
||||||
return push_captures(&ms, s1, res);
|
return push_captures(&ms, sstr, q);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (s1++ < ms.src_end && !anchor);
|
} while (sstr++ < ms.src_end && !anchor);
|
||||||
}
|
}
|
||||||
lua_pushnil(L); /* not found */
|
setnilV(L->top-1); /* Not found. */
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
36
src/lj_str.c
36
src/lj_str.c
@ -16,7 +16,7 @@
|
|||||||
#include "lj_state.h"
|
#include "lj_state.h"
|
||||||
#include "lj_char.h"
|
#include "lj_char.h"
|
||||||
|
|
||||||
/* -- String interning ---------------------------------------------------- */
|
/* -- String helpers ------------------------------------------------------ */
|
||||||
|
|
||||||
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
|
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
|
||||||
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
|
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
|
||||||
@ -62,6 +62,40 @@ static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Find fixed string p inside string s. */
|
||||||
|
const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
|
||||||
|
{
|
||||||
|
if (plen <= slen) {
|
||||||
|
if (plen == 0) {
|
||||||
|
return s;
|
||||||
|
} else {
|
||||||
|
int c = *(const uint8_t *)p++;
|
||||||
|
plen--; slen -= plen;
|
||||||
|
while (slen) {
|
||||||
|
const char *q = (const char *)memchr(s, c, slen);
|
||||||
|
if (!q) break;
|
||||||
|
if (memcmp(q+1, p, plen) == 0) return q;
|
||||||
|
q++; slen -= (MSize)(q-s); s = q;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check whether a string has a pattern matching character. */
|
||||||
|
int lj_str_haspattern(GCstr *s)
|
||||||
|
{
|
||||||
|
const char *p = strdata(s), *q = p + s->len;
|
||||||
|
while (p < q) {
|
||||||
|
int c = *(const uint8_t *)p++;
|
||||||
|
if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
|
||||||
|
return 1; /* Found a pattern matching char. */
|
||||||
|
}
|
||||||
|
return 0; /* No pattern matching chars found. */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* -- String interning ---------------------------------------------------- */
|
||||||
|
|
||||||
/* Resize the string hash table (grow and shrink). */
|
/* Resize the string hash table (grow and shrink). */
|
||||||
void lj_str_resize(lua_State *L, MSize newmask)
|
void lj_str_resize(lua_State *L, MSize newmask)
|
||||||
{
|
{
|
||||||
|
@ -10,8 +10,13 @@
|
|||||||
|
|
||||||
#include "lj_obj.h"
|
#include "lj_obj.h"
|
||||||
|
|
||||||
/* String interning. */
|
/* String helpers. */
|
||||||
LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
|
LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
|
||||||
|
LJ_FUNC const char *lj_str_find(const char *s, const char *f,
|
||||||
|
MSize slen, MSize flen);
|
||||||
|
LJ_FUNC int lj_str_haspattern(GCstr *s);
|
||||||
|
|
||||||
|
/* String interning. */
|
||||||
LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
|
LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
|
||||||
LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
|
LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
|
||||||
LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
|
LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
|
||||||
|
Loading…
Reference in New Issue
Block a user