diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c index 0e87ad2deba..5fe9bb161a9 100644 --- a/src/backend/regex/regc_lex.c +++ b/src/backend/regex/regc_lex.c @@ -792,13 +792,13 @@ lexescape(struct vars * v) break; case CHR('u'): c = lexdigits(v, 16, 4, 4); - if (ISERR()) + if (ISERR() || c < CHR_MIN || c > CHR_MAX) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; case CHR('U'): c = lexdigits(v, 16, 8, 8); - if (ISERR()) + if (ISERR() || c < CHR_MIN || c > CHR_MAX) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; @@ -816,7 +816,7 @@ lexescape(struct vars * v) case CHR('x'): NOTE(REG_UUNPORT); c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ - if (ISERR()) + if (ISERR() || c < CHR_MIN || c > CHR_MAX) FAILW(REG_EESCAPE); RETV(PLAIN, c); break; @@ -872,6 +872,9 @@ lexescape(struct vars * v) /* * lexdigits - slurp up digits and return chr value + * + * This does not account for overflow; callers should range-check the result + * if maxlen is large enough to make that possible. */ static chr /* chr value; errors signalled via ERR */ lexdigits(struct vars * v, diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c index e6f5fc45da0..7b5821173c6 100644 --- a/src/backend/regex/regc_locale.c +++ b/src/backend/regex/regc_locale.c @@ -398,8 +398,7 @@ range(struct vars * v, /* context */ int nchrs; struct cvec *cv; celt c, - lc, - uc; + cc; if (a != b && !before(a, b)) { @@ -417,24 +416,51 @@ range(struct vars * v, /* context */ /* * When case-independent, it's hard to decide when cvec ranges are usable, - * so for now at least, we won't try. We allocate enough space for two - * case variants plus a little extra for the two title case variants. + * so for now at least, we won't try. We use a range for the originally + * specified chrs and then add on any case-equivalents that are outside + * that range as individual chrs. + * + * To ensure sane behavior if someone specifies a very large range, limit + * the allocation size to 100000 chrs (arbitrary) and check for overrun + * inside the loop below. */ + nchrs = b - a + 1; + if (nchrs <= 0 || nchrs > 100000) + nchrs = 100000; - nchrs = (b - a + 1) * 2 + 4; - - cv = getcvec(v, nchrs, 0); + cv = getcvec(v, nchrs, 1); NOERRN(); + addrange(cv, a, b); for (c = a; c <= b; c++) { - addchr(cv, c); - lc = pg_wc_tolower((chr) c); - if (c != lc) - addchr(cv, lc); - uc = pg_wc_toupper((chr) c); - if (c != uc) - addchr(cv, uc); + cc = pg_wc_tolower((chr) c); + if (cc != c && + (before(cc, a) || before(b, cc))) + { + if (cv->nchrs >= cv->chrspace) + { + ERR(REG_ETOOBIG); + return NULL; + } + addchr(cv, cc); + } + cc = pg_wc_toupper((chr) c); + if (cc != c && + (before(cc, a) || before(b, cc))) + { + if (cv->nchrs >= cv->chrspace) + { + ERR(REG_ETOOBIG); + return NULL; + } + addchr(cv, cc); + } + if (CANCEL_REQUESTED(v->re)) + { + ERR(REG_CANCEL); + return NULL; + } } return cv; diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index c7d43a766c2..1686008af8a 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1569,6 +1569,7 @@ dovec(struct vars * v, { ch = *p; newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); + NOERR(); } /* and the ranges */ @@ -1578,6 +1579,7 @@ dovec(struct vars * v, to = *(p + 1); if (from <= to) subrange(v, from, to, lp, rp); + NOERR(); } } diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h index dbb461a0ce7..3f1d14e1908 100644 --- a/src/include/regex/regcustom.h +++ b/src/include/regex/regcustom.h @@ -65,7 +65,8 @@ typedef int celt; /* type to hold chr, or NOCELT */ #define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ #define CHRBITS 32 /* bits in a chr; must not use sizeof */ #define CHR_MIN 0x00000000 /* smallest and largest chr; the value */ -#define CHR_MAX 0xfffffffe /* CHR_MAX-CHR_MIN+1 should fit in uchr */ +#define CHR_MAX 0x7ffffffe /* CHR_MAX-CHR_MIN+1 must fit in an int, and + * CHR_MAX+1 must fit in both chr and celt */ /* functions operating on chr */ #define iscalnum(x) pg_wc_isalnum(x)