Miscellaneous cleanup of regular-expression compiler.

Revert our previous addition of "all" flags to copyins() and copyouts();
they're no longer needed, and were never anything but an unsightly hack.

Improve a couple of infelicities in the REG_DEBUG code for dumping
the NFA data structure, including adding code to count the total
number of states and arcs.

Add a couple of missed error checks.

Add some more documentation in the README file, and some regression tests
illustrating cases that exceeded the state-count limit and/or took
unreasonable amounts of time before this set of patches.

Back-patch to all supported branches.
This commit is contained in:
Tom Lane 2015-10-16 15:52:12 -04:00
parent b94c2b6a69
commit 4083a52f41
2 changed files with 23 additions and 43 deletions

View File

@ -823,14 +823,11 @@ moveins(struct nfa * nfa,
/* /*
* copyins - copy in arcs of a state to another state * copyins - copy in arcs of a state to another state
*
* Either all arcs, or only non-empty ones as determined by all value.
*/ */
static void static void
copyins(struct nfa * nfa, copyins(struct nfa * nfa,
struct state * oldState, struct state * oldState,
struct state * newState, struct state * newState)
int all)
{ {
assert(oldState != newState); assert(oldState != newState);
@ -840,7 +837,6 @@ copyins(struct nfa * nfa,
struct arc *a; struct arc *a;
for (a = oldState->ins; a != NULL; a = a->inchain) for (a = oldState->ins; a != NULL; a = a->inchain)
if (all || a->type != EMPTY)
cparc(nfa, a, a->from, newState); cparc(nfa, a, a->from, newState);
} }
else else
@ -873,12 +869,6 @@ copyins(struct nfa * nfa,
{ {
struct arc *a = oa; struct arc *a = oa;
if (!all && a->type == EMPTY)
{
oa = oa->inchain;
continue;
}
switch (sortins_cmp(&oa, &na)) switch (sortins_cmp(&oa, &na))
{ {
case -1: case -1:
@ -904,12 +894,6 @@ copyins(struct nfa * nfa,
/* newState does not have anything matching oa */ /* newState does not have anything matching oa */
struct arc *a = oa; struct arc *a = oa;
if (!all && a->type == EMPTY)
{
oa = oa->inchain;
continue;
}
oa = oa->inchain; oa = oa->inchain;
createarc(nfa, a->type, a->co, a->from, newState); createarc(nfa, a->type, a->co, a->from, newState);
} }
@ -1107,14 +1091,11 @@ moveouts(struct nfa * nfa,
/* /*
* copyouts - copy out arcs of a state to another state * copyouts - copy out arcs of a state to another state
*
* Either all arcs, or only non-empty ones as determined by all value.
*/ */
static void static void
copyouts(struct nfa * nfa, copyouts(struct nfa * nfa,
struct state * oldState, struct state * oldState,
struct state * newState, struct state * newState)
int all)
{ {
assert(oldState != newState); assert(oldState != newState);
@ -1124,7 +1105,6 @@ copyouts(struct nfa * nfa,
struct arc *a; struct arc *a;
for (a = oldState->outs; a != NULL; a = a->outchain) for (a = oldState->outs; a != NULL; a = a->outchain)
if (all || a->type != EMPTY)
cparc(nfa, a, newState, a->to); cparc(nfa, a, newState, a->to);
} }
else else
@ -1157,12 +1137,6 @@ copyouts(struct nfa * nfa,
{ {
struct arc *a = oa; struct arc *a = oa;
if (!all && a->type == EMPTY)
{
oa = oa->outchain;
continue;
}
switch (sortouts_cmp(&oa, &na)) switch (sortouts_cmp(&oa, &na))
{ {
case -1: case -1:
@ -1188,12 +1162,6 @@ copyouts(struct nfa * nfa,
/* newState does not have anything matching oa */ /* newState does not have anything matching oa */
struct arc *a = oa; struct arc *a = oa;
if (!all && a->type == EMPTY)
{
oa = oa->outchain;
continue;
}
oa = oa->outchain; oa = oa->outchain;
createarc(nfa, a->type, a->co, newState, a->to); createarc(nfa, a->type, a->co, newState, a->to);
} }
@ -1452,6 +1420,10 @@ optimize(struct nfa * nfa,
fprintf(f, "\nfinal cleanup:\n"); fprintf(f, "\nfinal cleanup:\n");
#endif #endif
cleanup(nfa); /* final tidying */ cleanup(nfa); /* final tidying */
#ifdef REG_DEBUG
if (verbose)
dumpnfa(nfa, f);
#endif
return analyze(nfa); /* and analysis */ return analyze(nfa); /* and analysis */
} }
@ -1568,7 +1540,7 @@ pull(struct nfa * nfa,
s = newstate(nfa); s = newstate(nfa);
if (NISERR()) if (NISERR())
return 0; return 0;
copyins(nfa, from, s, 1); /* duplicate inarcs */ copyins(nfa, from, s); /* duplicate inarcs */
cparc(nfa, con, s, to); /* move constraint arc */ cparc(nfa, con, s, to); /* move constraint arc */
freearc(nfa, con); freearc(nfa, con);
if (NISERR()) if (NISERR())
@ -1735,7 +1707,7 @@ push(struct nfa * nfa,
s = newstate(nfa); s = newstate(nfa);
if (NISERR()) if (NISERR())
return 0; return 0;
copyouts(nfa, to, s, 1); /* duplicate outarcs */ copyouts(nfa, to, s); /* duplicate outarcs */
cparc(nfa, con, from, s); /* move constraint arc */ cparc(nfa, con, from, s); /* move constraint arc */
freearc(nfa, con); freearc(nfa, con);
if (NISERR()) if (NISERR())
@ -2952,6 +2924,8 @@ dumpnfa(struct nfa * nfa,
{ {
#ifdef REG_DEBUG #ifdef REG_DEBUG
struct state *s; struct state *s;
int nstates = 0;
int narcs = 0;
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
if (nfa->bos[0] != COLORLESS) if (nfa->bos[0] != COLORLESS)
@ -2964,7 +2938,12 @@ dumpnfa(struct nfa * nfa,
fprintf(f, ", eol [%ld]", (long) nfa->eos[1]); fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
fprintf(f, "\n"); fprintf(f, "\n");
for (s = nfa->states; s != NULL; s = s->next) for (s = nfa->states; s != NULL; s = s->next)
{
dumpstate(s, f); dumpstate(s, f);
nstates++;
narcs += s->nouts;
}
fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
if (nfa->parent == NULL) if (nfa->parent == NULL)
dumpcolors(nfa->cm, f); dumpcolors(nfa->cm, f);
fflush(f); fflush(f);

View File

@ -136,10 +136,10 @@ static int sortins_cmp(const void *, const void *);
static void sortouts(struct nfa *, struct state *); static void sortouts(struct nfa *, struct state *);
static int sortouts_cmp(const void *, const void *); static int sortouts_cmp(const void *, const void *);
static void moveins(struct nfa *, struct state *, struct state *); static void moveins(struct nfa *, struct state *, struct state *);
static void copyins(struct nfa *, struct state *, struct state *, int); static void copyins(struct nfa *, struct state *, struct state *);
static void mergeins(struct nfa *, struct state *, struct arc **, int); static void mergeins(struct nfa *, struct state *, struct arc **, int);
static void moveouts(struct nfa *, struct state *, struct state *); static void moveouts(struct nfa *, struct state *, struct state *);
static void copyouts(struct nfa *, struct state *, struct state *, int); static void copyouts(struct nfa *, struct state *, struct state *);
static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
static void delsub(struct nfa *, struct state *, struct state *); static void delsub(struct nfa *, struct state *, struct state *);
static void deltraverse(struct nfa *, struct state *, struct state *); static void deltraverse(struct nfa *, struct state *, struct state *);
@ -181,7 +181,6 @@ static void dumpnfa(struct nfa *, FILE *);
#ifdef REG_DEBUG #ifdef REG_DEBUG
static void dumpstate(struct state *, FILE *); static void dumpstate(struct state *, FILE *);
static void dumparcs(struct state *, FILE *); static void dumparcs(struct state *, FILE *);
static int dumprarcs(struct arc *, struct state *, FILE *, int);
static void dumparc(struct arc *, struct state *, FILE *); static void dumparc(struct arc *, struct state *, FILE *);
static void dumpcnfa(struct cnfa *, FILE *); static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct cnfa *, FILE *); static void dumpcstate(int, struct cnfa *, FILE *);
@ -613,7 +612,9 @@ makesearch(struct vars * v,
for (s = slist; s != NULL; s = s2) for (s = slist; s != NULL; s = s2)
{ {
s2 = newstate(nfa); s2 = newstate(nfa);
copyouts(nfa, s, s2, 1); NOERR();
copyouts(nfa, s, s2);
NOERR();
for (a = s->ins; a != NULL; a = b) for (a = s->ins; a != NULL; a = b)
{ {
b = a->inchain; b = a->inchain;
@ -1997,7 +1998,7 @@ dump(regex_t *re,
dumpcolors(&g->cmap, f); dumpcolors(&g->cmap, f);
if (!NULLCNFA(g->search)) if (!NULLCNFA(g->search))
{ {
printf("\nsearch:\n"); fprintf(f, "\nsearch:\n");
dumpcnfa(&g->search, f); dumpcnfa(&g->search, f);
} }
for (i = 1; i < g->nlacons; i++) for (i = 1; i < g->nlacons; i++)