Miscellaneous cleanup of regular-expression compiler.
Revert our previous addition of "all" flags to copyins() and copyouts(); they're no longer needed, and were never anything but an unsightly hack. Improve a couple of infelicities in the REG_DEBUG code for dumping the NFA data structure, including adding code to count the total number of states and arcs. Add a couple of missed error checks. Add some more documentation in the README file, and some regression tests illustrating cases that exceeded the state-count limit and/or took unreasonable amounts of time before this set of patches. Back-patch to all supported branches.
This commit is contained in:
parent
b94c2b6a69
commit
4083a52f41
@ -823,14 +823,11 @@ moveins(struct nfa * nfa,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* copyins - copy in arcs of a state to another state
|
* copyins - copy in arcs of a state to another state
|
||||||
*
|
|
||||||
* Either all arcs, or only non-empty ones as determined by all value.
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
copyins(struct nfa * nfa,
|
copyins(struct nfa * nfa,
|
||||||
struct state * oldState,
|
struct state * oldState,
|
||||||
struct state * newState,
|
struct state * newState)
|
||||||
int all)
|
|
||||||
{
|
{
|
||||||
assert(oldState != newState);
|
assert(oldState != newState);
|
||||||
|
|
||||||
@ -840,7 +837,6 @@ copyins(struct nfa * nfa,
|
|||||||
struct arc *a;
|
struct arc *a;
|
||||||
|
|
||||||
for (a = oldState->ins; a != NULL; a = a->inchain)
|
for (a = oldState->ins; a != NULL; a = a->inchain)
|
||||||
if (all || a->type != EMPTY)
|
|
||||||
cparc(nfa, a, a->from, newState);
|
cparc(nfa, a, a->from, newState);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -873,12 +869,6 @@ copyins(struct nfa * nfa,
|
|||||||
{
|
{
|
||||||
struct arc *a = oa;
|
struct arc *a = oa;
|
||||||
|
|
||||||
if (!all && a->type == EMPTY)
|
|
||||||
{
|
|
||||||
oa = oa->inchain;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (sortins_cmp(&oa, &na))
|
switch (sortins_cmp(&oa, &na))
|
||||||
{
|
{
|
||||||
case -1:
|
case -1:
|
||||||
@ -904,12 +894,6 @@ copyins(struct nfa * nfa,
|
|||||||
/* newState does not have anything matching oa */
|
/* newState does not have anything matching oa */
|
||||||
struct arc *a = oa;
|
struct arc *a = oa;
|
||||||
|
|
||||||
if (!all && a->type == EMPTY)
|
|
||||||
{
|
|
||||||
oa = oa->inchain;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
oa = oa->inchain;
|
oa = oa->inchain;
|
||||||
createarc(nfa, a->type, a->co, a->from, newState);
|
createarc(nfa, a->type, a->co, a->from, newState);
|
||||||
}
|
}
|
||||||
@ -1107,14 +1091,11 @@ moveouts(struct nfa * nfa,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* copyouts - copy out arcs of a state to another state
|
* copyouts - copy out arcs of a state to another state
|
||||||
*
|
|
||||||
* Either all arcs, or only non-empty ones as determined by all value.
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
copyouts(struct nfa * nfa,
|
copyouts(struct nfa * nfa,
|
||||||
struct state * oldState,
|
struct state * oldState,
|
||||||
struct state * newState,
|
struct state * newState)
|
||||||
int all)
|
|
||||||
{
|
{
|
||||||
assert(oldState != newState);
|
assert(oldState != newState);
|
||||||
|
|
||||||
@ -1124,7 +1105,6 @@ copyouts(struct nfa * nfa,
|
|||||||
struct arc *a;
|
struct arc *a;
|
||||||
|
|
||||||
for (a = oldState->outs; a != NULL; a = a->outchain)
|
for (a = oldState->outs; a != NULL; a = a->outchain)
|
||||||
if (all || a->type != EMPTY)
|
|
||||||
cparc(nfa, a, newState, a->to);
|
cparc(nfa, a, newState, a->to);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1157,12 +1137,6 @@ copyouts(struct nfa * nfa,
|
|||||||
{
|
{
|
||||||
struct arc *a = oa;
|
struct arc *a = oa;
|
||||||
|
|
||||||
if (!all && a->type == EMPTY)
|
|
||||||
{
|
|
||||||
oa = oa->outchain;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (sortouts_cmp(&oa, &na))
|
switch (sortouts_cmp(&oa, &na))
|
||||||
{
|
{
|
||||||
case -1:
|
case -1:
|
||||||
@ -1188,12 +1162,6 @@ copyouts(struct nfa * nfa,
|
|||||||
/* newState does not have anything matching oa */
|
/* newState does not have anything matching oa */
|
||||||
struct arc *a = oa;
|
struct arc *a = oa;
|
||||||
|
|
||||||
if (!all && a->type == EMPTY)
|
|
||||||
{
|
|
||||||
oa = oa->outchain;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
oa = oa->outchain;
|
oa = oa->outchain;
|
||||||
createarc(nfa, a->type, a->co, newState, a->to);
|
createarc(nfa, a->type, a->co, newState, a->to);
|
||||||
}
|
}
|
||||||
@ -1452,6 +1420,10 @@ optimize(struct nfa * nfa,
|
|||||||
fprintf(f, "\nfinal cleanup:\n");
|
fprintf(f, "\nfinal cleanup:\n");
|
||||||
#endif
|
#endif
|
||||||
cleanup(nfa); /* final tidying */
|
cleanup(nfa); /* final tidying */
|
||||||
|
#ifdef REG_DEBUG
|
||||||
|
if (verbose)
|
||||||
|
dumpnfa(nfa, f);
|
||||||
|
#endif
|
||||||
return analyze(nfa); /* and analysis */
|
return analyze(nfa); /* and analysis */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1568,7 +1540,7 @@ pull(struct nfa * nfa,
|
|||||||
s = newstate(nfa);
|
s = newstate(nfa);
|
||||||
if (NISERR())
|
if (NISERR())
|
||||||
return 0;
|
return 0;
|
||||||
copyins(nfa, from, s, 1); /* duplicate inarcs */
|
copyins(nfa, from, s); /* duplicate inarcs */
|
||||||
cparc(nfa, con, s, to); /* move constraint arc */
|
cparc(nfa, con, s, to); /* move constraint arc */
|
||||||
freearc(nfa, con);
|
freearc(nfa, con);
|
||||||
if (NISERR())
|
if (NISERR())
|
||||||
@ -1735,7 +1707,7 @@ push(struct nfa * nfa,
|
|||||||
s = newstate(nfa);
|
s = newstate(nfa);
|
||||||
if (NISERR())
|
if (NISERR())
|
||||||
return 0;
|
return 0;
|
||||||
copyouts(nfa, to, s, 1); /* duplicate outarcs */
|
copyouts(nfa, to, s); /* duplicate outarcs */
|
||||||
cparc(nfa, con, from, s); /* move constraint arc */
|
cparc(nfa, con, from, s); /* move constraint arc */
|
||||||
freearc(nfa, con);
|
freearc(nfa, con);
|
||||||
if (NISERR())
|
if (NISERR())
|
||||||
@ -2952,6 +2924,8 @@ dumpnfa(struct nfa * nfa,
|
|||||||
{
|
{
|
||||||
#ifdef REG_DEBUG
|
#ifdef REG_DEBUG
|
||||||
struct state *s;
|
struct state *s;
|
||||||
|
int nstates = 0;
|
||||||
|
int narcs = 0;
|
||||||
|
|
||||||
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
|
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
|
||||||
if (nfa->bos[0] != COLORLESS)
|
if (nfa->bos[0] != COLORLESS)
|
||||||
@ -2964,7 +2938,12 @@ dumpnfa(struct nfa * nfa,
|
|||||||
fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
|
fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
|
||||||
fprintf(f, "\n");
|
fprintf(f, "\n");
|
||||||
for (s = nfa->states; s != NULL; s = s->next)
|
for (s = nfa->states; s != NULL; s = s->next)
|
||||||
|
{
|
||||||
dumpstate(s, f);
|
dumpstate(s, f);
|
||||||
|
nstates++;
|
||||||
|
narcs += s->nouts;
|
||||||
|
}
|
||||||
|
fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
|
||||||
if (nfa->parent == NULL)
|
if (nfa->parent == NULL)
|
||||||
dumpcolors(nfa->cm, f);
|
dumpcolors(nfa->cm, f);
|
||||||
fflush(f);
|
fflush(f);
|
||||||
|
@ -136,10 +136,10 @@ static int sortins_cmp(const void *, const void *);
|
|||||||
static void sortouts(struct nfa *, struct state *);
|
static void sortouts(struct nfa *, struct state *);
|
||||||
static int sortouts_cmp(const void *, const void *);
|
static int sortouts_cmp(const void *, const void *);
|
||||||
static void moveins(struct nfa *, struct state *, struct state *);
|
static void moveins(struct nfa *, struct state *, struct state *);
|
||||||
static void copyins(struct nfa *, struct state *, struct state *, int);
|
static void copyins(struct nfa *, struct state *, struct state *);
|
||||||
static void mergeins(struct nfa *, struct state *, struct arc **, int);
|
static void mergeins(struct nfa *, struct state *, struct arc **, int);
|
||||||
static void moveouts(struct nfa *, struct state *, struct state *);
|
static void moveouts(struct nfa *, struct state *, struct state *);
|
||||||
static void copyouts(struct nfa *, struct state *, struct state *, int);
|
static void copyouts(struct nfa *, struct state *, struct state *);
|
||||||
static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
|
static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
|
||||||
static void delsub(struct nfa *, struct state *, struct state *);
|
static void delsub(struct nfa *, struct state *, struct state *);
|
||||||
static void deltraverse(struct nfa *, struct state *, struct state *);
|
static void deltraverse(struct nfa *, struct state *, struct state *);
|
||||||
@ -181,7 +181,6 @@ static void dumpnfa(struct nfa *, FILE *);
|
|||||||
#ifdef REG_DEBUG
|
#ifdef REG_DEBUG
|
||||||
static void dumpstate(struct state *, FILE *);
|
static void dumpstate(struct state *, FILE *);
|
||||||
static void dumparcs(struct state *, FILE *);
|
static void dumparcs(struct state *, FILE *);
|
||||||
static int dumprarcs(struct arc *, struct state *, FILE *, int);
|
|
||||||
static void dumparc(struct arc *, struct state *, FILE *);
|
static void dumparc(struct arc *, struct state *, FILE *);
|
||||||
static void dumpcnfa(struct cnfa *, FILE *);
|
static void dumpcnfa(struct cnfa *, FILE *);
|
||||||
static void dumpcstate(int, struct cnfa *, FILE *);
|
static void dumpcstate(int, struct cnfa *, FILE *);
|
||||||
@ -613,7 +612,9 @@ makesearch(struct vars * v,
|
|||||||
for (s = slist; s != NULL; s = s2)
|
for (s = slist; s != NULL; s = s2)
|
||||||
{
|
{
|
||||||
s2 = newstate(nfa);
|
s2 = newstate(nfa);
|
||||||
copyouts(nfa, s, s2, 1);
|
NOERR();
|
||||||
|
copyouts(nfa, s, s2);
|
||||||
|
NOERR();
|
||||||
for (a = s->ins; a != NULL; a = b)
|
for (a = s->ins; a != NULL; a = b)
|
||||||
{
|
{
|
||||||
b = a->inchain;
|
b = a->inchain;
|
||||||
@ -1997,7 +1998,7 @@ dump(regex_t *re,
|
|||||||
dumpcolors(&g->cmap, f);
|
dumpcolors(&g->cmap, f);
|
||||||
if (!NULLCNFA(g->search))
|
if (!NULLCNFA(g->search))
|
||||||
{
|
{
|
||||||
printf("\nsearch:\n");
|
fprintf(f, "\nsearch:\n");
|
||||||
dumpcnfa(&g->search, f);
|
dumpcnfa(&g->search, f);
|
||||||
}
|
}
|
||||||
for (i = 1; i < g->nlacons; i++)
|
for (i = 1; i < g->nlacons; i++)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user