Add planner support functions for range operators <@ and @>.

These support functions will transform expressions with constant
range values into direct comparisons on the range bound values,
which are frequently better-optimizable.  The transformation is
skipped however if it would require double evaluation of a
volatile or expensive element expression.

Along the way, add the range opfamily OID to range typcache entries,
since load_rangetype_info has to compute that anyway and it seems
silly to duplicate the work later.

Kim Johan Andersson and Jian He, reviewed by Laurenz Albe

Discussion: https://postgr.es/m/94f64d1f-b8c0-b0c5-98bc-0793a34e0851@kimmet.dk
This commit is contained in:
Tom Lane 2024-01-20 13:57:54 -05:00
parent abb0b4fc03
commit 075df6b208
8 changed files with 469 additions and 11 deletions

View File

@ -30,19 +30,20 @@
*/
#include "postgres.h"
#include "access/tupmacs.h"
#include "common/hashfn.h"
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/miscnodes.h"
#include "port/pg_bitutils.h"
#include "nodes/supportnodes.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/lsyscache.h"
#include "utils/rangetypes.h"
#include "utils/timestamp.h"
#include "varatt.h"
/* fn_extra cache entry for one of the range I/O functions */
@ -69,6 +70,12 @@ static Size datum_compute_size(Size data_length, Datum val, bool typbyval,
char typalign, int16 typlen, char typstorage);
static Pointer datum_write(Pointer ptr, Datum datum, bool typbyval,
char typalign, int16 typlen, char typstorage);
static Node *find_simplified_clause(PlannerInfo *root,
Expr *rangeExpr, Expr *elemExpr);
static Expr *build_bound_expr(Expr *elemExpr, Datum val,
bool isLowerBound, bool isInclusive,
TypeCacheEntry *typeCache,
Oid opfamily, Oid rng_collation);
/*
@ -2173,6 +2180,58 @@ make_empty_range(TypeCacheEntry *typcache)
return make_range(typcache, &lower, &upper, true, NULL);
}
/*
* Planner support function for elem_contained_by_range (<@ operator).
*/
Datum
elem_contained_by_range_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
Node *ret = NULL;
if (IsA(rawreq, SupportRequestSimplify))
{
SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
FuncExpr *fexpr = req->fcall;
Expr *leftop,
*rightop;
Assert(list_length(fexpr->args) == 2);
leftop = linitial(fexpr->args);
rightop = lsecond(fexpr->args);
ret = find_simplified_clause(req->root, rightop, leftop);
}
PG_RETURN_POINTER(ret);
}
/*
* Planner support function for range_contains_elem (@> operator).
*/
Datum
range_contains_elem_support(PG_FUNCTION_ARGS)
{
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
Node *ret = NULL;
if (IsA(rawreq, SupportRequestSimplify))
{
SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
FuncExpr *fexpr = req->fcall;
Expr *leftop,
*rightop;
Assert(list_length(fexpr->args) == 2);
leftop = linitial(fexpr->args);
rightop = lsecond(fexpr->args);
ret = find_simplified_clause(req->root, leftop, rightop);
}
PG_RETURN_POINTER(ret);
}
/*
*----------------------------------------------------------
@ -2715,3 +2774,180 @@ datum_write(Pointer ptr, Datum datum, bool typbyval, char typalign,
return ptr;
}
/*
* Common code for the elem_contained_by_range and range_contains_elem
* support functions. The caller has extracted the function argument
* expressions, and swapped them if necessary to pass the range first.
*
* Returns a simplified replacement expression, or NULL if we can't simplify.
*/
static Node *
find_simplified_clause(PlannerInfo *root, Expr *rangeExpr, Expr *elemExpr)
{
RangeType *range;
TypeCacheEntry *rangetypcache;
RangeBound lower;
RangeBound upper;
bool empty;
/* can't do anything unless the range is a non-null constant */
if (!IsA(rangeExpr, Const) || ((Const *) rangeExpr)->constisnull)
return NULL;
range = DatumGetRangeTypeP(((Const *) rangeExpr)->constvalue);
rangetypcache = lookup_type_cache(RangeTypeGetOid(range),
TYPECACHE_RANGE_INFO);
if (rangetypcache->rngelemtype == NULL)
elog(ERROR, "type %u is not a range type", RangeTypeGetOid(range));
range_deserialize(rangetypcache, range, &lower, &upper, &empty);
if (empty)
{
/* if the range is empty, then there can be no matches */
return makeBoolConst(false, false);
}
else if (lower.infinite && upper.infinite)
{
/* the range has infinite bounds, so it matches everything */
return makeBoolConst(true, false);
}
else
{
/* at least one bound is available, we have something to work with */
TypeCacheEntry *elemTypcache = rangetypcache->rngelemtype;
Oid opfamily = rangetypcache->rng_opfamily;
Oid rng_collation = rangetypcache->rng_collation;
Expr *lowerExpr = NULL;
Expr *upperExpr = NULL;
if (!lower.infinite && !upper.infinite)
{
/*
* When both bounds are present, we have a problem: the
* "simplified" clause would need to evaluate the elemExpr twice.
* That's definitely not okay if the elemExpr is volatile, and
* it's also unattractive if the elemExpr is expensive.
*/
QualCost eval_cost;
if (contain_volatile_functions((Node *) elemExpr))
return NULL;
/*
* We define "expensive" as "contains any subplan or more than 10
* operators". Note that the subplan search has to be done
* explicitly, since cost_qual_eval() will barf on unplanned
* subselects.
*/
if (contain_subplans((Node *) elemExpr))
return NULL;
cost_qual_eval_node(&eval_cost, (Node *) elemExpr, root);
if (eval_cost.startup + eval_cost.per_tuple >
10 * cpu_operator_cost)
return NULL;
}
/* Okay, try to build boundary comparison expressions */
if (!lower.infinite)
{
lowerExpr = build_bound_expr(elemExpr,
lower.val,
true,
lower.inclusive,
elemTypcache,
opfamily,
rng_collation);
if (lowerExpr == NULL)
return NULL;
}
if (!upper.infinite)
{
/* Copy the elemExpr if we need two copies */
if (!lower.infinite)
elemExpr = copyObject(elemExpr);
upperExpr = build_bound_expr(elemExpr,
upper.val,
false,
upper.inclusive,
elemTypcache,
opfamily,
rng_collation);
if (upperExpr == NULL)
return NULL;
}
if (lowerExpr != NULL && upperExpr != NULL)
return (Node *) make_andclause(list_make2(lowerExpr, upperExpr));
else if (lowerExpr != NULL)
return (Node *) lowerExpr;
else if (upperExpr != NULL)
return (Node *) upperExpr;
else
{
Assert(false);
return NULL;
}
}
}
/*
* Helper function for find_simplified_clause().
*
* Build the expression (elemExpr Operator val), where the operator is
* the appropriate member of the given opfamily depending on
* isLowerBound and isInclusive. typeCache is the typcache entry for
* the "val" value (presently, this will be the same type as elemExpr).
* rng_collation is the collation to use in the comparison.
*
* Return NULL on failure (if, for some reason, we can't find the operator).
*/
static Expr *
build_bound_expr(Expr *elemExpr, Datum val,
bool isLowerBound, bool isInclusive,
TypeCacheEntry *typeCache,
Oid opfamily, Oid rng_collation)
{
Oid elemType = typeCache->type_id;
int16 elemTypeLen = typeCache->typlen;
bool elemByValue = typeCache->typbyval;
Oid elemCollation = typeCache->typcollation;
int16 strategy;
Oid oproid;
Expr *constExpr;
/* Identify the comparison operator to use */
if (isLowerBound)
strategy = isInclusive ? BTGreaterEqualStrategyNumber : BTGreaterStrategyNumber;
else
strategy = isInclusive ? BTLessEqualStrategyNumber : BTLessStrategyNumber;
/*
* We could use exprType(elemExpr) here, if it ever becomes possible that
* elemExpr is not the exact same type as the range elements.
*/
oproid = get_opfamily_member(opfamily, elemType, elemType, strategy);
/* We don't really expect failure here, but just in case ... */
if (!OidIsValid(oproid))
return NULL;
/* OK, convert "val" to a full-fledged Const node, and make the OpExpr */
constExpr = (Expr *) makeConst(elemType,
-1,
elemCollation,
elemTypeLen,
val,
false,
elemByValue);
return make_opclause(oproid,
BOOLOID,
false,
elemExpr,
constExpr,
InvalidOid,
rng_collation);
}

View File

@ -196,9 +196,10 @@ rangesel(PG_FUNCTION_ARGS)
else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
{
/*
* Here, the Var is the elem, not the range. For now we just punt and
* return the default estimate. In future we could disassemble the
* range constant and apply scalarineqsel ...
* Here, the Var is the elem, not the range. In typical cases
* elem_contained_by_range_support will have simplified this case, so
* that we won't get here. If we do get here we'll fall back on a
* default estimate.
*/
}
else if (((Const *) other)->consttype == vardata.vartype)

View File

@ -940,6 +940,7 @@ load_rangetype_info(TypeCacheEntry *typentry)
/* get opclass properties and look up the comparison function */
opfamilyOid = get_opclass_family(opclassOid);
opcintype = get_opclass_input_type(opclassOid);
typentry->rng_opfamily = opfamilyOid;
cmpFnOid = get_opfamily_proc(opfamilyOid, opcintype, opcintype,
BTORDER_PROC);

View File

@ -57,6 +57,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202401191
#define CATALOG_VERSION_NO 202401201
#endif

View File

@ -10503,13 +10503,15 @@
proname => 'range_overlaps', prorettype => 'bool',
proargtypes => 'anyrange anyrange', prosrc => 'range_overlaps' },
{ oid => '3858',
proname => 'range_contains_elem', prorettype => 'bool',
proargtypes => 'anyrange anyelement', prosrc => 'range_contains_elem' },
proname => 'range_contains_elem', prosupport => 'range_contains_elem_support',
prorettype => 'bool', proargtypes => 'anyrange anyelement',
prosrc => 'range_contains_elem' },
{ oid => '3859',
proname => 'range_contains', prorettype => 'bool',
proargtypes => 'anyrange anyrange', prosrc => 'range_contains' },
{ oid => '3860',
proname => 'elem_contained_by_range', prorettype => 'bool',
proname => 'elem_contained_by_range',
prosupport => 'elem_contained_by_range_support', prorettype => 'bool',
proargtypes => 'anyelement anyrange', prosrc => 'elem_contained_by_range' },
{ oid => '3861',
proname => 'range_contained_by', prorettype => 'bool',
@ -10532,6 +10534,12 @@
{ oid => '3867',
proname => 'range_union', prorettype => 'anyrange',
proargtypes => 'anyrange anyrange', prosrc => 'range_union' },
{ oid => '9998', descr => 'planner support for range_contains_elem',
proname => 'range_contains_elem_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'range_contains_elem_support' },
{ oid => '9999', descr => 'planner support for elem_contained_by_range',
proname => 'elem_contained_by_range_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'elem_contained_by_range_support' },
{ oid => '4057',
descr => 'the smallest range which includes both of the given ranges',
proname => 'range_merge', prorettype => 'anyrange',

View File

@ -96,6 +96,7 @@ typedef struct TypeCacheEntry
* btree comparison function.
*/
struct TypeCacheEntry *rngelemtype; /* range's element type */
Oid rng_opfamily; /* opfamily to use for range comparisons */
Oid rng_collation; /* collation for comparisons, if any */
FmgrInfo rng_cmp_proc_finfo; /* comparison function */
FmgrInfo rng_canonical_finfo; /* canonicalization function, if any */

View File

@ -1834,3 +1834,145 @@ create function table_fail(i anyelement) returns table(i anyelement, r anyrange)
as $$ select $1, '[1,10]' $$ language sql;
ERROR: cannot determine result data type
DETAIL: A result of type anyrange requires at least one input of type anyrange or anymultirange.
--
-- Test support functions
--
-- empty range
explain (verbose, costs off)
select current_date <@ daterange 'empty';
QUERY PLAN
-----------------
Result
Output: false
(2 rows)
-- unbounded range
explain (verbose, costs off)
select current_date <@ daterange(NULL, NULL);
QUERY PLAN
----------------
Result
Output: true
(2 rows)
-- only lower bound present
explain (verbose, costs off)
select current_date <@ daterange('2000-01-01', NULL, '[)');
QUERY PLAN
------------------------------------------------
Result
Output: (CURRENT_DATE >= '01-01-2000'::date)
(2 rows)
-- only upper bound present
explain (verbose, costs off)
select current_date <@ daterange(NULL, '2000-01-01', '(]');
QUERY PLAN
-----------------------------------------------
Result
Output: (CURRENT_DATE < '01-02-2000'::date)
(2 rows)
-- lower range "-Infinity" excluded
explain (verbose, costs off)
select current_date <@ daterange('-Infinity', '1997-04-10'::date, '()');
QUERY PLAN
----------------------------------------------------------------------------------------
Result
Output: ((CURRENT_DATE > '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date))
(2 rows)
-- lower range "-Infinity" included
explain (verbose, costs off)
select current_date <@ daterange('-Infinity', '1997-04-10'::date, '[)');
QUERY PLAN
-----------------------------------------------------------------------------------------
Result
Output: ((CURRENT_DATE >= '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date))
(2 rows)
-- upper range "Infinity" excluded
explain (verbose, costs off)
select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[)');
QUERY PLAN
----------------------------------------------------------------------------------------
Result
Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE < 'infinity'::date))
(2 rows)
-- upper range "Infinity" included
explain (verbose, costs off)
select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[]');
QUERY PLAN
-----------------------------------------------------------------------------------------
Result
Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE <= 'infinity'::date))
(2 rows)
-- should also work if we use "@>"
explain (verbose, costs off)
select daterange('-Infinity', '1997-04-10'::date, '()') @> current_date;
QUERY PLAN
----------------------------------------------------------------------------------------
Result
Output: ((CURRENT_DATE > '-infinity'::date) AND (CURRENT_DATE < '04-10-1997'::date))
(2 rows)
explain (verbose, costs off)
select daterange('2002-09-25'::date, 'Infinity', '[]') @> current_date;
QUERY PLAN
-----------------------------------------------------------------------------------------
Result
Output: ((CURRENT_DATE >= '09-25-2002'::date) AND (CURRENT_DATE <= 'infinity'::date))
(2 rows)
-- Check that volatile cases are not optimized
explain (verbose, costs off)
select now() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00');
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------
Result
Output: ((now() >= 'Sat Jan 20 00:00:00 2024 PST'::timestamp with time zone) AND (now() < 'Sun Jan 21 00:00:00 2024 PST'::timestamp with time zone))
(2 rows)
explain (verbose, costs off) -- unsafe!
select clock_timestamp() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00');
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Result
Output: (clock_timestamp() <@ '["Sat Jan 20 00:00:00 2024 PST","Sun Jan 21 00:00:00 2024 PST")'::tstzrange)
(2 rows)
explain (verbose, costs off)
select clock_timestamp() <@ tstzrange('2024-01-20 00:00', NULL);
QUERY PLAN
-------------------------------------------------------------------------------------------
Result
Output: (clock_timestamp() >= 'Sat Jan 20 00:00:00 2024 PST'::timestamp with time zone)
(2 rows)
-- test a custom range type with a non-default operator class
create type textrange_supp as range (
subtype = text,
subtype_opclass = text_pattern_ops
);
create temp table text_support_test (t text collate "C");
insert into text_support_test values ('a'), ('c'), ('d'), ('ch');
explain (costs off)
select * from text_support_test where t <@ textrange_supp('a', 'd');
QUERY PLAN
------------------------------------------------------
Seq Scan on text_support_test
Filter: ((t ~>=~ 'a'::text) AND (t ~<~ 'd'::text))
(2 rows)
select * from text_support_test where t <@ textrange_supp('a', 'd');
t
----
a
c
ch
(3 rows)
drop table text_support_test;
drop type textrange_supp;

View File

@ -629,3 +629,72 @@ create function inoutparam_fail(inout i anyelement, out r anyrange)
--should fail
create function table_fail(i anyelement) returns table(i anyelement, r anyrange)
as $$ select $1, '[1,10]' $$ language sql;
--
-- Test support functions
--
-- empty range
explain (verbose, costs off)
select current_date <@ daterange 'empty';
-- unbounded range
explain (verbose, costs off)
select current_date <@ daterange(NULL, NULL);
-- only lower bound present
explain (verbose, costs off)
select current_date <@ daterange('2000-01-01', NULL, '[)');
-- only upper bound present
explain (verbose, costs off)
select current_date <@ daterange(NULL, '2000-01-01', '(]');
-- lower range "-Infinity" excluded
explain (verbose, costs off)
select current_date <@ daterange('-Infinity', '1997-04-10'::date, '()');
-- lower range "-Infinity" included
explain (verbose, costs off)
select current_date <@ daterange('-Infinity', '1997-04-10'::date, '[)');
-- upper range "Infinity" excluded
explain (verbose, costs off)
select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[)');
-- upper range "Infinity" included
explain (verbose, costs off)
select current_date <@ daterange('2002-09-25'::date, 'Infinity', '[]');
-- should also work if we use "@>"
explain (verbose, costs off)
select daterange('-Infinity', '1997-04-10'::date, '()') @> current_date;
explain (verbose, costs off)
select daterange('2002-09-25'::date, 'Infinity', '[]') @> current_date;
-- Check that volatile cases are not optimized
explain (verbose, costs off)
select now() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00');
explain (verbose, costs off) -- unsafe!
select clock_timestamp() <@ tstzrange('2024-01-20 00:00', '2024-01-21 00:00');
explain (verbose, costs off)
select clock_timestamp() <@ tstzrange('2024-01-20 00:00', NULL);
-- test a custom range type with a non-default operator class
create type textrange_supp as range (
subtype = text,
subtype_opclass = text_pattern_ops
);
create temp table text_support_test (t text collate "C");
insert into text_support_test values ('a'), ('c'), ('d'), ('ch');
explain (costs off)
select * from text_support_test where t <@ textrange_supp('a', 'd');
select * from text_support_test where t <@ textrange_supp('a', 'd');
drop table text_support_test;
drop type textrange_supp;