Miscellaneous cleanup of regular-expression compiler.
Revert our previous addition of "all" flags to copyins() and copyouts(); they're no longer needed, and were never anything but an unsightly hack. Improve a couple of infelicities in the REG_DEBUG code for dumping the NFA data structure, including adding code to count the total number of states and arcs. Add a couple of missed error checks. Add some more documentation in the README file, and some regression tests illustrating cases that exceeded the state-count limit and/or took unreasonable amounts of time before this set of patches. Back-patch to all supported branches.
This commit is contained in:
parent
b94c2b6a69
commit
4083a52f41
@ -823,14 +823,11 @@ moveins(struct nfa * nfa,
|
||||
|
||||
/*
|
||||
* copyins - copy in arcs of a state to another state
|
||||
*
|
||||
* Either all arcs, or only non-empty ones as determined by all value.
|
||||
*/
|
||||
static void
|
||||
copyins(struct nfa * nfa,
|
||||
struct state * oldState,
|
||||
struct state * newState,
|
||||
int all)
|
||||
struct state * newState)
|
||||
{
|
||||
assert(oldState != newState);
|
||||
|
||||
@ -840,8 +837,7 @@ copyins(struct nfa * nfa,
|
||||
struct arc *a;
|
||||
|
||||
for (a = oldState->ins; a != NULL; a = a->inchain)
|
||||
if (all || a->type != EMPTY)
|
||||
cparc(nfa, a, a->from, newState);
|
||||
cparc(nfa, a, a->from, newState);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -873,12 +869,6 @@ copyins(struct nfa * nfa,
|
||||
{
|
||||
struct arc *a = oa;
|
||||
|
||||
if (!all && a->type == EMPTY)
|
||||
{
|
||||
oa = oa->inchain;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (sortins_cmp(&oa, &na))
|
||||
{
|
||||
case -1:
|
||||
@ -904,12 +894,6 @@ copyins(struct nfa * nfa,
|
||||
/* newState does not have anything matching oa */
|
||||
struct arc *a = oa;
|
||||
|
||||
if (!all && a->type == EMPTY)
|
||||
{
|
||||
oa = oa->inchain;
|
||||
continue;
|
||||
}
|
||||
|
||||
oa = oa->inchain;
|
||||
createarc(nfa, a->type, a->co, a->from, newState);
|
||||
}
|
||||
@ -1107,14 +1091,11 @@ moveouts(struct nfa * nfa,
|
||||
|
||||
/*
|
||||
* copyouts - copy out arcs of a state to another state
|
||||
*
|
||||
* Either all arcs, or only non-empty ones as determined by all value.
|
||||
*/
|
||||
static void
|
||||
copyouts(struct nfa * nfa,
|
||||
struct state * oldState,
|
||||
struct state * newState,
|
||||
int all)
|
||||
struct state * newState)
|
||||
{
|
||||
assert(oldState != newState);
|
||||
|
||||
@ -1124,8 +1105,7 @@ copyouts(struct nfa * nfa,
|
||||
struct arc *a;
|
||||
|
||||
for (a = oldState->outs; a != NULL; a = a->outchain)
|
||||
if (all || a->type != EMPTY)
|
||||
cparc(nfa, a, newState, a->to);
|
||||
cparc(nfa, a, newState, a->to);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1157,12 +1137,6 @@ copyouts(struct nfa * nfa,
|
||||
{
|
||||
struct arc *a = oa;
|
||||
|
||||
if (!all && a->type == EMPTY)
|
||||
{
|
||||
oa = oa->outchain;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (sortouts_cmp(&oa, &na))
|
||||
{
|
||||
case -1:
|
||||
@ -1188,12 +1162,6 @@ copyouts(struct nfa * nfa,
|
||||
/* newState does not have anything matching oa */
|
||||
struct arc *a = oa;
|
||||
|
||||
if (!all && a->type == EMPTY)
|
||||
{
|
||||
oa = oa->outchain;
|
||||
continue;
|
||||
}
|
||||
|
||||
oa = oa->outchain;
|
||||
createarc(nfa, a->type, a->co, newState, a->to);
|
||||
}
|
||||
@ -1452,6 +1420,10 @@ optimize(struct nfa * nfa,
|
||||
fprintf(f, "\nfinal cleanup:\n");
|
||||
#endif
|
||||
cleanup(nfa); /* final tidying */
|
||||
#ifdef REG_DEBUG
|
||||
if (verbose)
|
||||
dumpnfa(nfa, f);
|
||||
#endif
|
||||
return analyze(nfa); /* and analysis */
|
||||
}
|
||||
|
||||
@ -1568,7 +1540,7 @@ pull(struct nfa * nfa,
|
||||
s = newstate(nfa);
|
||||
if (NISERR())
|
||||
return 0;
|
||||
copyins(nfa, from, s, 1); /* duplicate inarcs */
|
||||
copyins(nfa, from, s); /* duplicate inarcs */
|
||||
cparc(nfa, con, s, to); /* move constraint arc */
|
||||
freearc(nfa, con);
|
||||
if (NISERR())
|
||||
@ -1735,7 +1707,7 @@ push(struct nfa * nfa,
|
||||
s = newstate(nfa);
|
||||
if (NISERR())
|
||||
return 0;
|
||||
copyouts(nfa, to, s, 1); /* duplicate outarcs */
|
||||
copyouts(nfa, to, s); /* duplicate outarcs */
|
||||
cparc(nfa, con, from, s); /* move constraint arc */
|
||||
freearc(nfa, con);
|
||||
if (NISERR())
|
||||
@ -2952,6 +2924,8 @@ dumpnfa(struct nfa * nfa,
|
||||
{
|
||||
#ifdef REG_DEBUG
|
||||
struct state *s;
|
||||
int nstates = 0;
|
||||
int narcs = 0;
|
||||
|
||||
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
|
||||
if (nfa->bos[0] != COLORLESS)
|
||||
@ -2964,7 +2938,12 @@ dumpnfa(struct nfa * nfa,
|
||||
fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
|
||||
fprintf(f, "\n");
|
||||
for (s = nfa->states; s != NULL; s = s->next)
|
||||
{
|
||||
dumpstate(s, f);
|
||||
nstates++;
|
||||
narcs += s->nouts;
|
||||
}
|
||||
fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
|
||||
if (nfa->parent == NULL)
|
||||
dumpcolors(nfa->cm, f);
|
||||
fflush(f);
|
||||
|
@ -136,10 +136,10 @@ static int sortins_cmp(const void *, const void *);
|
||||
static void sortouts(struct nfa *, struct state *);
|
||||
static int sortouts_cmp(const void *, const void *);
|
||||
static void moveins(struct nfa *, struct state *, struct state *);
|
||||
static void copyins(struct nfa *, struct state *, struct state *, int);
|
||||
static void copyins(struct nfa *, struct state *, struct state *);
|
||||
static void mergeins(struct nfa *, struct state *, struct arc **, int);
|
||||
static void moveouts(struct nfa *, struct state *, struct state *);
|
||||
static void copyouts(struct nfa *, struct state *, struct state *, int);
|
||||
static void copyouts(struct nfa *, struct state *, struct state *);
|
||||
static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
|
||||
static void delsub(struct nfa *, struct state *, struct state *);
|
||||
static void deltraverse(struct nfa *, struct state *, struct state *);
|
||||
@ -181,7 +181,6 @@ static void dumpnfa(struct nfa *, FILE *);
|
||||
#ifdef REG_DEBUG
|
||||
static void dumpstate(struct state *, FILE *);
|
||||
static void dumparcs(struct state *, FILE *);
|
||||
static int dumprarcs(struct arc *, struct state *, FILE *, int);
|
||||
static void dumparc(struct arc *, struct state *, FILE *);
|
||||
static void dumpcnfa(struct cnfa *, FILE *);
|
||||
static void dumpcstate(int, struct cnfa *, FILE *);
|
||||
@ -613,7 +612,9 @@ makesearch(struct vars * v,
|
||||
for (s = slist; s != NULL; s = s2)
|
||||
{
|
||||
s2 = newstate(nfa);
|
||||
copyouts(nfa, s, s2, 1);
|
||||
NOERR();
|
||||
copyouts(nfa, s, s2);
|
||||
NOERR();
|
||||
for (a = s->ins; a != NULL; a = b)
|
||||
{
|
||||
b = a->inchain;
|
||||
@ -1997,7 +1998,7 @@ dump(regex_t *re,
|
||||
dumpcolors(&g->cmap, f);
|
||||
if (!NULLCNFA(g->search))
|
||||
{
|
||||
printf("\nsearch:\n");
|
||||
fprintf(f, "\nsearch:\n");
|
||||
dumpcnfa(&g->search, f);
|
||||
}
|
||||
for (i = 1; i < g->nlacons; i++)
|
||||
|
Loading…
x
Reference in New Issue
Block a user