postgres/src/backend/regex/regexport.c

/*-------------------------------------------------------------------------
 *
 * regexport.c
 *	  Functions for exporting info about a regex's NFA
 *
 * In this implementation, the NFA defines a necessary but not sufficient
 * condition for a string to match the regex: that is, there can be strings
 * that match the NFA but don't match the full regex, but not vice versa.
 * Thus, for example, it is okay for the functions below to ignore lookaround
 * constraints, which merely constrain the string some more.
 *
 * Notice that these functions return info into caller-provided arrays
 * rather than doing their own malloc's.  This simplifies the APIs by
 * eliminating a class of error conditions, and in the case of colors
 * allows the caller to decide how big is too big to bother with.
 *
 *
 * Portions Copyright (c) 2013-2015, PostgreSQL Global Development Group
 * Portions Copyright (c) 1998, 1999 Henry Spencer
 *
 * IDENTIFICATION
 *	  src/backend/regex/regexport.c
 *
 *-------------------------------------------------------------------------
 */

#include "regex/regguts.h"

#include "regex/regexport.h"

static void scancolormap(struct colormap * cm, int co,
			 union tree * t, int level, chr partial,
			 pg_wchar **chars, int *chars_len);


/*
 * Get total number of NFA states.
 */
int
pg_reg_getnumstates(const regex_t *regex)
{
	struct cnfa *cnfa;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cnfa = &((struct guts *) regex->re_guts)->search;

	return cnfa->nstates;
}

/*
 * Get initial state of NFA.
 */
int
pg_reg_getinitialstate(const regex_t *regex)
{
	struct cnfa *cnfa;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cnfa = &((struct guts *) regex->re_guts)->search;

	return cnfa->pre;
}

/*
 * Get final state of NFA.
 */
int
pg_reg_getfinalstate(const regex_t *regex)
{
	struct cnfa *cnfa;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cnfa = &((struct guts *) regex->re_guts)->search;

	return cnfa->post;
}

/*
 * Get number of outgoing NFA arcs of state number "st".
 *
 * Note: LACON arcs are ignored, both here and in pg_reg_getoutarcs().
 */
int
pg_reg_getnumoutarcs(const regex_t *regex, int st)
{
	struct cnfa *cnfa;
	struct carc *ca;
	int			count;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cnfa = &((struct guts *) regex->re_guts)->search;

	if (st < 0 || st >= cnfa->nstates)
		return 0;
	count = 0;
	for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
	{
		if (ca->co < cnfa->ncolors)
			count++;
	}
	return count;
}

/*
 * Write array of outgoing NFA arcs of state number "st" into arcs[],
 * whose length arcs_len must be at least as long as indicated by
 * pg_reg_getnumoutarcs(), else not all arcs will be returned.
 */
void
pg_reg_getoutarcs(const regex_t *regex, int st,
				  regex_arc_t *arcs, int arcs_len)
{
	struct cnfa *cnfa;
	struct carc *ca;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cnfa = &((struct guts *) regex->re_guts)->search;

	if (st < 0 || st >= cnfa->nstates || arcs_len <= 0)
		return;
	for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
	{
		if (ca->co < cnfa->ncolors)
		{
			arcs->co = ca->co;
			arcs->to = ca->to;
			arcs++;
			if (--arcs_len == 0)
				break;
		}
	}
}

/*
 * Get total number of colors.
 */
int
pg_reg_getnumcolors(const regex_t *regex)
{
	struct colormap *cm;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cm = &((struct guts *) regex->re_guts)->cmap;

	return cm->max + 1;
}

/*
 * Check if color is beginning of line/string.
 *
 * (We might at some point need to offer more refined handling of pseudocolors,
 * but this will do for now.)
 */
int
pg_reg_colorisbegin(const regex_t *regex, int co)
{
	struct cnfa *cnfa;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cnfa = &((struct guts *) regex->re_guts)->search;

	if (co == cnfa->bos[0] || co == cnfa->bos[1])
		return true;
	else
		return false;
}

/*
 * Check if color is end of line/string.
 */
int
pg_reg_colorisend(const regex_t *regex, int co)
{
	struct cnfa *cnfa;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cnfa = &((struct guts *) regex->re_guts)->search;

	if (co == cnfa->eos[0] || co == cnfa->eos[1])
		return true;
	else
		return false;
}

/*
 * Get number of member chrs of color number "co".
 *
 * Note: we return -1 if the color number is invalid, or if it is a special
 * color (WHITE or a pseudocolor), or if the number of members is uncertain.
 * The latter case cannot arise right now but is specified to allow for future
 * improvements (see musings about run-time handling of higher character codes
 * in regex/README).  Callers should not try to extract the members if -1 is
 * returned.
 */
int
pg_reg_getnumcharacters(const regex_t *regex, int co)
{
	struct colormap *cm;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cm = &((struct guts *) regex->re_guts)->cmap;

	if (co <= 0 || co > cm->max)	/* we reject 0 which is WHITE */
		return -1;
	if (cm->cd[co].flags & PSEUDO)		/* also pseudocolors (BOS etc) */
		return -1;

	return cm->cd[co].nchrs;
}

/*
 * Write array of member chrs of color number "co" into chars[],
 * whose length chars_len must be at least as long as indicated by
 * pg_reg_getnumcharacters(), else not all chars will be returned.
 *
 * Fetching the members of WHITE or a pseudocolor is not supported.
 *
 * Caution: this is a relatively expensive operation.
 */
void
pg_reg_getcharacters(const regex_t *regex, int co,
					 pg_wchar *chars, int chars_len)
{
	struct colormap *cm;

	assert(regex != NULL && regex->re_magic == REMAGIC);
	cm = &((struct guts *) regex->re_guts)->cmap;

	if (co <= 0 || co > cm->max || chars_len <= 0)
		return;
	if (cm->cd[co].flags & PSEUDO)
		return;

	/* Recursively search the colormap tree */
	scancolormap(cm, co, cm->tree, 0, 0, &chars, &chars_len);
}

/*
 * Recursively scan the colormap tree to find chrs belonging to color "co".
 * See regex/README for info about the tree structure.
 *
 * t: tree block to scan
 * level: level (from 0) of t
 * partial: partial chr code for chrs within t
 * chars, chars_len: output area
 */
static void
scancolormap(struct colormap * cm, int co,
			 union tree * t, int level, chr partial,
			 pg_wchar **chars, int *chars_len)
{
	int			i;

	if (level < NBYTS - 1)
	{
		/* non-leaf node */
		for (i = 0; i < BYTTAB; i++)
		{
			/*
			 * We do not support search for chrs of color 0 (WHITE), so
			 * all-white subtrees need not be searched.  These can be
			 * recognized because they are represented by the fill blocks in
			 * the colormap struct.  This typically allows us to avoid
			 * scanning large regions of higher-numbered chrs.
			 */
			if (t->tptr[i] == &cm->tree[level + 1])
				continue;

			/* Recursively scan next level down */
			scancolormap(cm, co,
						 t->tptr[i], level + 1,
						 (partial | (chr) i) << BYTBITS,
						 chars, chars_len);
		}
	}
	else
	{
		/* leaf node */
		for (i = 0; i < BYTTAB; i++)
		{
			if (t->tcolor[i] == co)
			{
				if (*chars_len > 0)
				{
					**chars = partial | (chr) i;
					(*chars)++;
					(*chars_len)--;
				}
			}
		}
	}
}