diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index c4fcd0076ea..771781e28ca 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -120,6 +120,7 @@ #include "optimizer/plancat.h" #include "parser/parse_clause.h" #include "parser/parsetree.h" +#include "rewrite/rewriteManip.h" #include "statistics/statistics.h" #include "storage/bufmgr.h" #include "utils/acl.h" @@ -3273,6 +3274,15 @@ add_unique_group_var(PlannerInfo *root, List *varinfos, ndistinct = get_variable_numdistinct(vardata, &isdefault); + /* + * The nullingrels bits within the var could cause the same var to be + * counted multiple times if it's marked with different nullingrels. They + * could also prevent us from matching the var to the expressions in + * extended statistics (see estimate_multivariate_ndistinct). So strip + * them out first. + */ + var = remove_nulling_relids(var, root->outer_join_rels, NULL); + foreach(lc, varinfos) { varinfo = (GroupVarInfo *) lfirst(lc); @@ -4980,6 +4990,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, { Node *basenode; Relids varnos; + Relids basevarnos; RelOptInfo *onerel; /* Make sure we don't return dangling pointers in vardata */ @@ -5021,10 +5032,11 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, * relation are considered "real" vars. */ varnos = pull_varnos(root, basenode); + basevarnos = bms_difference(varnos, root->outer_join_rels); onerel = NULL; - switch (bms_membership(varnos)) + switch (bms_membership(basevarnos)) { case BMS_EMPTY_SET: /* No Vars at all ... must be pseudo-constant clause */ @@ -5033,7 +5045,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, if (varRelid == 0 || bms_is_member(varRelid, varnos)) { onerel = find_base_rel(root, - (varRelid ? varRelid : bms_singleton_member(varnos))); + (varRelid ? varRelid : bms_singleton_member(basevarnos))); vardata->rel = onerel; node = basenode; /* strip any relabeling */ } @@ -5057,7 +5069,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, break; } - bms_free(varnos); + bms_free(basevarnos); vardata->var = node; vardata->atttype = exprType(node); @@ -5082,6 +5094,14 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, ListCell *slist; Oid userid; + /* + * The nullingrels bits within the expression could prevent us from + * matching it to expressional index columns or to the expressions in + * extended statistics. So strip them out first. + */ + if (bms_overlap(varnos, root->outer_join_rels)) + node = remove_nulling_relids(node, root->outer_join_rels, NULL); + /* * Determine the user ID to use for privilege checks: either * onerel->userid if it's set (e.g., in case we're accessing the table @@ -5352,6 +5372,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid, } } } + + bms_free(varnos); } /* diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 9af8d61a732..84e35981ed8 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -2517,10 +2517,11 @@ where t1.f1 = coalesce(t2.f1, 1); -> Materialize -> Seq Scan on int4_tbl t2 Filter: (f1 > 1) - -> Seq Scan on int4_tbl t3 + -> Materialize + -> Seq Scan on int4_tbl t3 -> Materialize -> Seq Scan on int4_tbl t4 -(13 rows) +(14 rows) explain (costs off) select * from int4_tbl t1 @@ -7981,3 +7982,24 @@ where exists (select 1 from j3 (13 rows) drop table j3; +-- Test that we do not account for nullingrels when looking up statistics +CREATE TABLE group_tbl (a INT, b INT); +INSERT INTO group_tbl SELECT 1, 1; +CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl; +ANALYZE group_tbl; +EXPLAIN (COSTS OFF) +SELECT 1 FROM group_tbl t1 + LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE +GROUP BY s.c1, s.c2; + QUERY PLAN +-------------------------------------------- + Group + Group Key: t2.a, (COALESCE(t2.a)) + -> Sort + Sort Key: t2.a, (COALESCE(t2.a)) + -> Nested Loop Left Join + -> Seq Scan on group_tbl t1 + -> Seq Scan on group_tbl t2 +(7 rows) + +DROP TABLE group_tbl; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index 41949d41dd6..d6f646a1d50 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2928,3 +2928,16 @@ where exists (select 1 from j3 and t1.unique1 < 1; drop table j3; + +-- Test that we do not account for nullingrels when looking up statistics +CREATE TABLE group_tbl (a INT, b INT); +INSERT INTO group_tbl SELECT 1, 1; +CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl; +ANALYZE group_tbl; + +EXPLAIN (COSTS OFF) +SELECT 1 FROM group_tbl t1 + LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE +GROUP BY s.c1, s.c2; + +DROP TABLE group_tbl;