Miscellaneous cleanup of regular-expression compiler.

Revert our previous addition of "all" flags to copyins() and copyouts();
they're no longer needed, and were never anything but an unsightly hack.

Improve a couple of infelicities in the REG_DEBUG code for dumping
the NFA data structure, including adding code to count the total
number of states and arcs.

Add a couple of missed error checks.

Add some more documentation in the README file, and some regression tests
illustrating cases that exceeded the state-count limit and/or took
unreasonable amounts of time before this set of patches.

Back-patch to all supported branches.
This commit is contained in:
Tom Lane 2015-10-16 15:52:12 -04:00
parent b94c2b6a69
commit 4083a52f41
2 changed files with 23 additions and 43 deletions

View File

@ -823,14 +823,11 @@ moveins(struct nfa * nfa,
/*
* copyins - copy in arcs of a state to another state
*
* Either all arcs, or only non-empty ones as determined by all value.
*/
static void
copyins(struct nfa * nfa,
struct state * oldState,
struct state * newState,
int all)
struct state * newState)
{
assert(oldState != newState);
@ -840,7 +837,6 @@ copyins(struct nfa * nfa,
struct arc *a;
for (a = oldState->ins; a != NULL; a = a->inchain)
if (all || a->type != EMPTY)
cparc(nfa, a, a->from, newState);
}
else
@ -873,12 +869,6 @@ copyins(struct nfa * nfa,
{
struct arc *a = oa;
if (!all && a->type == EMPTY)
{
oa = oa->inchain;
continue;
}
switch (sortins_cmp(&oa, &na))
{
case -1:
@ -904,12 +894,6 @@ copyins(struct nfa * nfa,
/* newState does not have anything matching oa */
struct arc *a = oa;
if (!all && a->type == EMPTY)
{
oa = oa->inchain;
continue;
}
oa = oa->inchain;
createarc(nfa, a->type, a->co, a->from, newState);
}
@ -1107,14 +1091,11 @@ moveouts(struct nfa * nfa,
/*
* copyouts - copy out arcs of a state to another state
*
* Either all arcs, or only non-empty ones as determined by all value.
*/
static void
copyouts(struct nfa * nfa,
struct state * oldState,
struct state * newState,
int all)
struct state * newState)
{
assert(oldState != newState);
@ -1124,7 +1105,6 @@ copyouts(struct nfa * nfa,
struct arc *a;
for (a = oldState->outs; a != NULL; a = a->outchain)
if (all || a->type != EMPTY)
cparc(nfa, a, newState, a->to);
}
else
@ -1157,12 +1137,6 @@ copyouts(struct nfa * nfa,
{
struct arc *a = oa;
if (!all && a->type == EMPTY)
{
oa = oa->outchain;
continue;
}
switch (sortouts_cmp(&oa, &na))
{
case -1:
@ -1188,12 +1162,6 @@ copyouts(struct nfa * nfa,
/* newState does not have anything matching oa */
struct arc *a = oa;
if (!all && a->type == EMPTY)
{
oa = oa->outchain;
continue;
}
oa = oa->outchain;
createarc(nfa, a->type, a->co, newState, a->to);
}
@ -1452,6 +1420,10 @@ optimize(struct nfa * nfa,
fprintf(f, "\nfinal cleanup:\n");
#endif
cleanup(nfa); /* final tidying */
#ifdef REG_DEBUG
if (verbose)
dumpnfa(nfa, f);
#endif
return analyze(nfa); /* and analysis */
}
@ -1568,7 +1540,7 @@ pull(struct nfa * nfa,
s = newstate(nfa);
if (NISERR())
return 0;
copyins(nfa, from, s, 1); /* duplicate inarcs */
copyins(nfa, from, s); /* duplicate inarcs */
cparc(nfa, con, s, to); /* move constraint arc */
freearc(nfa, con);
if (NISERR())
@ -1735,7 +1707,7 @@ push(struct nfa * nfa,
s = newstate(nfa);
if (NISERR())
return 0;
copyouts(nfa, to, s, 1); /* duplicate outarcs */
copyouts(nfa, to, s); /* duplicate outarcs */
cparc(nfa, con, from, s); /* move constraint arc */
freearc(nfa, con);
if (NISERR())
@ -2952,6 +2924,8 @@ dumpnfa(struct nfa * nfa,
{
#ifdef REG_DEBUG
struct state *s;
int nstates = 0;
int narcs = 0;
fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
if (nfa->bos[0] != COLORLESS)
@ -2964,7 +2938,12 @@ dumpnfa(struct nfa * nfa,
fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
fprintf(f, "\n");
for (s = nfa->states; s != NULL; s = s->next)
{
dumpstate(s, f);
nstates++;
narcs += s->nouts;
}
fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
if (nfa->parent == NULL)
dumpcolors(nfa->cm, f);
fflush(f);

View File

@ -136,10 +136,10 @@ static int sortins_cmp(const void *, const void *);
static void sortouts(struct nfa *, struct state *);
static int sortouts_cmp(const void *, const void *);
static void moveins(struct nfa *, struct state *, struct state *);
static void copyins(struct nfa *, struct state *, struct state *, int);
static void copyins(struct nfa *, struct state *, struct state *);
static void mergeins(struct nfa *, struct state *, struct arc **, int);
static void moveouts(struct nfa *, struct state *, struct state *);
static void copyouts(struct nfa *, struct state *, struct state *, int);
static void copyouts(struct nfa *, struct state *, struct state *);
static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
static void delsub(struct nfa *, struct state *, struct state *);
static void deltraverse(struct nfa *, struct state *, struct state *);
@ -181,7 +181,6 @@ static void dumpnfa(struct nfa *, FILE *);
#ifdef REG_DEBUG
static void dumpstate(struct state *, FILE *);
static void dumparcs(struct state *, FILE *);
static int dumprarcs(struct arc *, struct state *, FILE *, int);
static void dumparc(struct arc *, struct state *, FILE *);
static void dumpcnfa(struct cnfa *, FILE *);
static void dumpcstate(int, struct cnfa *, FILE *);
@ -613,7 +612,9 @@ makesearch(struct vars * v,
for (s = slist; s != NULL; s = s2)
{
s2 = newstate(nfa);
copyouts(nfa, s, s2, 1);
NOERR();
copyouts(nfa, s, s2);
NOERR();
for (a = s->ins; a != NULL; a = b)
{
b = a->inchain;
@ -1997,7 +1998,7 @@ dump(regex_t *re,
dumpcolors(&g->cmap, f);
if (!NULLCNFA(g->search))
{
printf("\nsearch:\n");
fprintf(f, "\nsearch:\n");
dumpcnfa(&g->search, f);
}
for (i = 1; i < g->nlacons; i++)