1996-07-09 06:22:35 +00:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
1999-02-13 23:22:53 +00:00
|
|
|
* pathnode.h
|
2000-09-29 18:21:41 +00:00
|
|
|
* prototypes for pathnode.c, relnode.c.
|
1996-07-09 06:22:35 +00:00
|
|
|
*
|
|
|
|
*
|
2016-01-02 13:33:40 -05:00
|
|
|
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
2000-01-26 05:58:53 +00:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1996-07-09 06:22:35 +00:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/optimizer/pathnode.h
|
1996-07-09 06:22:35 +00:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef PATHNODE_H
|
|
|
|
#define PATHNODE_H
|
|
|
|
|
1997-11-26 01:14:33 +00:00
|
|
|
#include "nodes/relation.h"
|
|
|
|
|
2003-02-08 20:20:55 +00:00
|
|
|
|
1996-07-09 06:22:35 +00:00
|
|
|
/*
|
|
|
|
* prototypes for pathnode.c
|
|
|
|
*/
|
2000-02-15 20:49:31 +00:00
|
|
|
extern int compare_path_costs(Path *path1, Path *path2,
|
2000-04-12 17:17:23 +00:00
|
|
|
CostSelector criterion);
|
2000-02-15 20:49:31 +00:00
|
|
|
extern int compare_fractional_path_costs(Path *path1, Path *path2,
|
2000-04-12 17:17:23 +00:00
|
|
|
double fraction);
|
2000-02-15 20:49:31 +00:00
|
|
|
extern void set_cheapest(RelOptInfo *parent_rel);
|
2000-02-07 04:41:04 +00:00
|
|
|
extern void add_path(RelOptInfo *parent_rel, Path *new_path);
|
2012-01-27 19:26:38 -05:00
|
|
|
extern bool add_path_precheck(RelOptInfo *parent_rel,
|
|
|
|
Cost startup_cost, Cost total_cost,
|
|
|
|
List *pathkeys, Relids required_outer);
|
2016-01-20 14:29:22 -05:00
|
|
|
extern void add_partial_path(RelOptInfo *parent_rel, Path *new_path);
|
|
|
|
extern bool add_partial_path_precheck(RelOptInfo *parent_rel,
|
|
|
|
Cost total_cost, List *pathkeys);
|
1999-08-16 02:17:58 +00:00
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
extern Path *create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
|
2016-01-20 14:29:22 -05:00
|
|
|
Relids required_outer, int parallel_degree);
|
2015-05-15 14:37:10 -04:00
|
|
|
extern Path *create_samplescan_path(PlannerInfo *root, RelOptInfo *rel,
|
2015-05-23 21:35:49 -04:00
|
|
|
Relids required_outer);
|
2005-06-05 22:32:58 +00:00
|
|
|
extern IndexPath *create_index_path(PlannerInfo *root,
|
2000-04-12 17:17:23 +00:00
|
|
|
IndexOptInfo *index,
|
2011-12-24 19:03:21 -05:00
|
|
|
List *indexclauses,
|
|
|
|
List *indexclausecols,
|
2010-12-02 20:50:48 -05:00
|
|
|
List *indexorderbys,
|
2011-12-24 19:03:21 -05:00
|
|
|
List *indexorderbycols,
|
2000-12-14 22:30:45 +00:00
|
|
|
List *pathkeys,
|
2005-04-22 21:58:32 +00:00
|
|
|
ScanDirection indexscandir,
|
2011-10-07 20:13:02 -04:00
|
|
|
bool indexonly,
|
2012-01-27 19:26:38 -05:00
|
|
|
Relids required_outer,
|
|
|
|
double loop_count);
|
2005-06-05 22:32:58 +00:00
|
|
|
extern BitmapHeapPath *create_bitmap_heap_path(PlannerInfo *root,
|
2005-10-15 02:49:52 +00:00
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *bitmapqual,
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
Relids required_outer,
|
2012-01-27 19:26:38 -05:00
|
|
|
double loop_count);
|
2005-06-05 22:32:58 +00:00
|
|
|
extern BitmapAndPath *create_bitmap_and_path(PlannerInfo *root,
|
2005-10-15 02:49:52 +00:00
|
|
|
RelOptInfo *rel,
|
|
|
|
List *bitmapquals);
|
2005-06-05 22:32:58 +00:00
|
|
|
extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
|
2005-10-15 02:49:52 +00:00
|
|
|
RelOptInfo *rel,
|
|
|
|
List *bitmapquals);
|
2005-06-05 22:32:58 +00:00
|
|
|
extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
|
2012-08-26 22:48:55 -04:00
|
|
|
List *tidquals, Relids required_outer);
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths,
|
2016-01-20 14:29:22 -05:00
|
|
|
Relids required_outer, int parallel_degree);
|
2010-10-14 16:56:39 -04:00
|
|
|
extern MergeAppendPath *create_merge_append_path(PlannerInfo *root,
|
2011-04-10 11:42:00 -04:00
|
|
|
RelOptInfo *rel,
|
|
|
|
List *subpaths,
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
List *pathkeys,
|
|
|
|
Relids required_outer);
|
2016-03-08 16:28:27 -05:00
|
|
|
extern ResultPath *create_result_path(PlannerInfo *root, RelOptInfo *rel,
|
|
|
|
PathTarget *target, List *resconstantqual);
|
2002-11-30 05:21:03 +00:00
|
|
|
extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
|
2005-06-05 22:32:58 +00:00
|
|
|
extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
|
2008-08-14 18:48:00 +00:00
|
|
|
Path *subpath, SpecialJoinInfo *sjinfo);
|
Add a Gather executor node.
A Gather executor node runs any number of copies of a plan in an equal
number of workers and merges all of the results into a single tuple
stream. It can also run the plan itself, if the workers are
unavailable or haven't started up yet. It is intended to work with
the Partial Seq Scan node which will be added in future commits.
It could also be used to implement parallel query of a different sort
by itself, without help from Partial Seq Scan, if the single_copy mode
is used. In that mode, a worker executes the plan, and the parallel
leader does not, merely collecting the worker's results. So, a Gather
node could be inserted into a plan to split the execution of that plan
across two processes. Nested Gather nodes aren't currently supported,
but we might want to add support for that in the future.
There's nothing in the planner to actually generate Gather nodes yet,
so it's not quite time to break out the champagne. But we're getting
close.
Amit Kapila. Some designs suggestions were provided by me, and I also
reviewed the patch. Single-copy mode, documentation, and other minor
changes also by me.
2015-09-30 19:23:36 -04:00
|
|
|
extern GatherPath *create_gather_path(PlannerInfo *root,
|
2016-01-20 14:29:22 -05:00
|
|
|
RelOptInfo *rel, Path *subpath, Relids required_outer);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 15:58:22 -05:00
|
|
|
extern SubqueryScanPath *create_subqueryscan_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel, Path *subpath,
|
2012-06-10 15:20:04 -04:00
|
|
|
List *pathkeys, Relids required_outer);
|
2012-08-07 19:02:54 -04:00
|
|
|
extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
|
Support multi-argument UNNEST(), and TABLE() syntax for multiple functions.
This patch adds the ability to write TABLE( function1(), function2(), ...)
as a single FROM-clause entry. The result is the concatenation of the
first row from each function, followed by the second row from each
function, etc; with NULLs inserted if any function produces fewer rows than
others. This is believed to be a much more useful behavior than what
Postgres currently does with multiple SRFs in a SELECT list.
This syntax also provides a reasonable way to combine use of column
definition lists with WITH ORDINALITY: put the column definition list
inside TABLE(), where it's clear that it doesn't control the ordinality
column as well.
Also implement SQL-compliant multiple-argument UNNEST(), by turning
UNNEST(a,b,c) into TABLE(unnest(a), unnest(b), unnest(c)).
The SQL standard specifies TABLE() with only a single function, not
multiple functions, and it seems to require an implicit UNNEST() which is
not what this patch does. There may be something wrong with that reading
of the spec, though, because if it's right then the spec's TABLE() is just
a pointless alternative spelling of UNNEST(). After further review of
that, we might choose to adopt a different syntax for what this patch does,
but in any case this functionality seems clearly worthwhile.
Andrew Gierth, reviewed by Zoltán Böszörményi and Heikki Linnakangas, and
significantly revised by me
2013-11-21 19:37:02 -05:00
|
|
|
List *pathkeys, Relids required_outer);
|
2012-08-12 16:01:26 -04:00
|
|
|
extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
|
|
|
|
Relids required_outer);
|
2012-08-26 22:48:55 -04:00
|
|
|
extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
|
|
|
|
Relids required_outer);
|
|
|
|
extern Path *create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel,
|
|
|
|
Relids required_outer);
|
2012-03-05 16:15:59 -05:00
|
|
|
extern ForeignPath *create_foreignscan_path(PlannerInfo *root, RelOptInfo *rel,
|
|
|
|
double rows, Cost startup_cost, Cost total_cost,
|
|
|
|
List *pathkeys,
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
Relids required_outer,
|
Allow foreign and custom joins to handle EvalPlanQual rechecks.
Commit e7cb7ee14555cc9c5773e2c102efd6371f6f2005 provided basic
infrastructure for allowing a foreign data wrapper or custom scan
provider to replace a join of one or more tables with a scan.
However, this infrastructure failed to take into account the need
for possible EvalPlanQual rechecks, and ExecScanFetch would fail
an assertion (or just overwrite memory) if such a check was attempted
for a plan containing a pushed-down join. To fix, adjust the EPQ
machinery to skip some processing steps when scanrelid == 0, making
those the responsibility of scan's recheck method, which also has
the responsibility in this case of correctly populating the relevant
slot.
To allow foreign scans to gain control in the right place to make
use of this new facility, add a new, optional RecheckForeignScan
method. Also, allow a foreign scan to have a child plan, which can
be used to correctly populate the slot (or perhaps for something
else, but this is the only use currently envisioned).
KaiGai Kohei, reviewed by Robert Haas, Etsuro Fujita, and Kyotaro
Horiguchi.
2015-12-08 12:31:03 -05:00
|
|
|
Path *fdw_outerpath,
|
2012-03-05 16:15:59 -05:00
|
|
|
List *fdw_private);
|
1999-08-16 02:17:58 +00:00
|
|
|
|
2012-01-27 19:26:38 -05:00
|
|
|
extern Relids calc_nestloop_required_outer(Path *outer_path, Path *inner_path);
|
|
|
|
extern Relids calc_non_nestloop_required_outer(Path *outer_path, Path *inner_path);
|
|
|
|
|
2005-06-05 22:32:58 +00:00
|
|
|
extern NestPath *create_nestloop_path(PlannerInfo *root,
|
2001-10-25 05:50:21 +00:00
|
|
|
RelOptInfo *joinrel,
|
|
|
|
JoinType jointype,
|
2012-01-27 19:26:38 -05:00
|
|
|
JoinCostWorkspace *workspace,
|
2008-08-14 18:48:00 +00:00
|
|
|
SpecialJoinInfo *sjinfo,
|
2012-01-27 19:26:38 -05:00
|
|
|
SemiAntiJoinFactors *semifactors,
|
2001-10-25 05:50:21 +00:00
|
|
|
Path *outer_path,
|
|
|
|
Path *inner_path,
|
|
|
|
List *restrict_clauses,
|
2012-01-27 19:26:38 -05:00
|
|
|
List *pathkeys,
|
|
|
|
Relids required_outer);
|
2000-01-09 00:26:47 +00:00
|
|
|
|
2005-06-05 22:32:58 +00:00
|
|
|
extern MergePath *create_mergejoin_path(PlannerInfo *root,
|
2001-10-25 05:50:21 +00:00
|
|
|
RelOptInfo *joinrel,
|
|
|
|
JoinType jointype,
|
2012-01-27 19:26:38 -05:00
|
|
|
JoinCostWorkspace *workspace,
|
2008-08-14 18:48:00 +00:00
|
|
|
SpecialJoinInfo *sjinfo,
|
2001-10-25 05:50:21 +00:00
|
|
|
Path *outer_path,
|
|
|
|
Path *inner_path,
|
|
|
|
List *restrict_clauses,
|
|
|
|
List *pathkeys,
|
2012-01-27 19:26:38 -05:00
|
|
|
Relids required_outer,
|
2001-10-25 05:50:21 +00:00
|
|
|
List *mergeclauses,
|
|
|
|
List *outersortkeys,
|
|
|
|
List *innersortkeys);
|
2000-01-09 00:26:47 +00:00
|
|
|
|
2005-06-05 22:32:58 +00:00
|
|
|
extern HashPath *create_hashjoin_path(PlannerInfo *root,
|
2001-10-25 05:50:21 +00:00
|
|
|
RelOptInfo *joinrel,
|
|
|
|
JoinType jointype,
|
2012-01-27 19:26:38 -05:00
|
|
|
JoinCostWorkspace *workspace,
|
2008-08-14 18:48:00 +00:00
|
|
|
SpecialJoinInfo *sjinfo,
|
2012-01-27 19:26:38 -05:00
|
|
|
SemiAntiJoinFactors *semifactors,
|
2001-10-25 05:50:21 +00:00
|
|
|
Path *outer_path,
|
|
|
|
Path *inner_path,
|
|
|
|
List *restrict_clauses,
|
2012-01-27 19:26:38 -05:00
|
|
|
Relids required_outer,
|
2001-10-25 05:50:21 +00:00
|
|
|
List *hashclauses);
|
1996-07-09 06:22:35 +00:00
|
|
|
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 15:58:22 -05:00
|
|
|
extern ProjectionPath *create_projection_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
PathTarget *target);
|
|
|
|
extern Path *apply_projection_to_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *path,
|
|
|
|
PathTarget *target);
|
|
|
|
extern SortPath *create_sort_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
List *pathkeys,
|
|
|
|
double limit_tuples);
|
|
|
|
extern GroupPath *create_group_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
PathTarget *target,
|
|
|
|
List *groupClause,
|
|
|
|
List *qual,
|
|
|
|
double numGroups);
|
|
|
|
extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
int numCols,
|
|
|
|
double numGroups);
|
|
|
|
extern AggPath *create_agg_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
PathTarget *target,
|
|
|
|
AggStrategy aggstrategy,
|
|
|
|
List *groupClause,
|
|
|
|
List *qual,
|
|
|
|
const AggClauseCosts *aggcosts,
|
|
|
|
double numGroups);
|
|
|
|
extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
PathTarget *target,
|
|
|
|
List *having_qual,
|
|
|
|
List *rollup_lists,
|
|
|
|
List *rollup_groupclauses,
|
|
|
|
const AggClauseCosts *agg_costs,
|
|
|
|
double numGroups);
|
|
|
|
extern MinMaxAggPath *create_minmaxagg_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
PathTarget *target,
|
|
|
|
List *mmaggregates,
|
|
|
|
List *quals);
|
|
|
|
extern WindowAggPath *create_windowagg_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
PathTarget *target,
|
|
|
|
List *windowFuncs,
|
|
|
|
WindowClause *winclause,
|
|
|
|
List *winpathkeys);
|
|
|
|
extern SetOpPath *create_setop_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
SetOpCmd cmd,
|
|
|
|
SetOpStrategy strategy,
|
|
|
|
List *distinctList,
|
|
|
|
AttrNumber flagColIdx,
|
|
|
|
int firstFlag,
|
|
|
|
double numGroups,
|
|
|
|
double outputRows);
|
|
|
|
extern RecursiveUnionPath *create_recursiveunion_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
Path *leftpath,
|
|
|
|
Path *rightpath,
|
|
|
|
PathTarget *target,
|
|
|
|
List *distinctList,
|
|
|
|
int wtParam,
|
|
|
|
double numGroups);
|
|
|
|
extern LockRowsPath *create_lockrows_path(PlannerInfo *root, RelOptInfo *rel,
|
|
|
|
Path *subpath, List *rowMarks, int epqParam);
|
|
|
|
extern ModifyTablePath *create_modifytable_path(PlannerInfo *root,
|
|
|
|
RelOptInfo *rel,
|
|
|
|
CmdType operation, bool canSetTag,
|
|
|
|
Index nominalRelation,
|
|
|
|
List *resultRelations, List *subpaths,
|
|
|
|
List *subroots,
|
|
|
|
List *withCheckOptionLists, List *returningLists,
|
|
|
|
List *rowMarks, OnConflictExpr *onconflict,
|
|
|
|
int epqParam);
|
|
|
|
extern LimitPath *create_limit_path(PlannerInfo *root, RelOptInfo *rel,
|
|
|
|
Path *subpath,
|
|
|
|
Node *limitOffset, Node *limitCount,
|
|
|
|
int64 offset_est, int64 count_est);
|
|
|
|
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
extern Path *reparameterize_path(PlannerInfo *root, Path *path,
|
|
|
|
Relids required_outer,
|
|
|
|
double loop_count);
|
|
|
|
|
1996-07-09 06:22:35 +00:00
|
|
|
/*
|
2000-02-07 04:41:04 +00:00
|
|
|
* prototypes for relnode.c
|
1996-07-09 06:22:35 +00:00
|
|
|
*/
|
2011-09-03 15:35:12 -04:00
|
|
|
extern void setup_simple_rel_arrays(PlannerInfo *root);
|
2006-01-31 21:39:25 +00:00
|
|
|
extern RelOptInfo *build_simple_rel(PlannerInfo *root, int relid,
|
2006-10-04 00:30:14 +00:00
|
|
|
RelOptKind reloptkind);
|
2005-06-05 22:32:58 +00:00
|
|
|
extern RelOptInfo *find_base_rel(PlannerInfo *root, int relid);
|
|
|
|
extern RelOptInfo *find_join_rel(PlannerInfo *root, Relids relids);
|
|
|
|
extern RelOptInfo *build_join_rel(PlannerInfo *root,
|
2003-08-04 00:43:34 +00:00
|
|
|
Relids joinrelids,
|
|
|
|
RelOptInfo *outer_rel,
|
|
|
|
RelOptInfo *inner_rel,
|
2008-08-14 18:48:00 +00:00
|
|
|
SpecialJoinInfo *sjinfo,
|
2003-08-04 00:43:34 +00:00
|
|
|
List **restrictlist_ptr);
|
Still more fixes for planner's handling of LATERAL references.
More fuzz testing by Andreas Seltenreich exposed that the planner did not
cope well with chains of lateral references. If relation X references Y
laterally, and Y references Z laterally, then we will have to scan X on the
inside of a nestloop with Z, so for all intents and purposes X is laterally
dependent on Z too. The planner did not understand this and would generate
intermediate joins that could not be used. While that was usually harmless
except for wasting some planning cycles, under the right circumstances it
would lead to "failed to build any N-way joins" or "could not devise a
query plan" planner failures.
To fix that, convert the existing per-relation lateral_relids and
lateral_referencers relid sets into their transitive closures; that is,
they now show all relations on which a rel is directly or indirectly
laterally dependent. This not only fixes the chained-reference problem
but allows some of the relevant tests to be made substantially simpler
and faster, since they can be reduced to simple bitmap manipulations
instead of searches of the LateralJoinInfo list.
Also, when a PlaceHolderVar that is due to be evaluated at a join contains
lateral references, we should treat those references as indirect lateral
dependencies of each of the join's base relations. This prevents us from
trying to join any individual base relations to the lateral reference
source before the join is formed, which again cannot work.
Andreas' testing also exposed another oversight in the "dangerous
PlaceHolderVar" test added in commit 85e5e222b1dd02f1. Simply rejecting
unsafe join paths in joinpath.c is insufficient, because in some cases
we will end up rejecting *all* possible paths for a particular join, again
leading to "could not devise a query plan" failures. The restriction has
to be known also to join_is_legal and its cohort functions, so that they
will not select a join for which that will happen. I chose to move the
supporting logic into joinrels.c where the latter functions are.
Back-patch to 9.3 where LATERAL support was introduced.
2015-12-11 14:22:20 -05:00
|
|
|
extern Relids min_join_parameterization(PlannerInfo *root,
|
|
|
|
Relids joinrelids,
|
|
|
|
RelOptInfo *outer_rel,
|
|
|
|
RelOptInfo *inner_rel);
|
Simplify query_planner's API by having it return the top-level RelOptInfo.
Formerly, query_planner returned one or possibly two Paths for the topmost
join relation, so that grouping_planner didn't see the join RelOptInfo
(at least not directly; it didn't have any hesitation about examining
cheapest_path->parent, though). However, correct selection of the Paths
involved a significant amount of coupling between query_planner and
grouping_planner, a problem which has gotten worse over time. It seems
best to give up on this API choice and instead return the topmost
RelOptInfo explicitly. Then grouping_planner can pull out the Paths it
wants from the rel's path list. In this way we can remove all knowledge
of grouping behaviors from query_planner.
The only real benefit of the old way is that in the case of an empty
FROM clause, we never made any RelOptInfos at all, just a Path. Now
we have to gin up a dummy RelOptInfo to represent the empty FROM clause.
That's not a very big deal though.
While at it, simplify query_planner's API a bit more by having the caller
set up root->tuple_fraction and root->limit_tuples, rather than passing
those values as separate parameters. Since query_planner no longer does
anything with either value, requiring it to fill the PlannerInfo fields
seemed pretty arbitrary.
This patch just rearranges code; it doesn't (intentionally) change any
behaviors. Followup patches will do more interesting things.
2013-08-05 15:00:57 -04:00
|
|
|
extern RelOptInfo *build_empty_join_rel(PlannerInfo *root);
|
Make the upper part of the planner work by generating and comparing Paths.
I've been saying we needed to do this for more than five years, and here it
finally is. This patch removes the ever-growing tangle of spaghetti logic
that grouping_planner() used to use to try to identify the best plan for
post-scan/join query steps. Now, there is (nearly) independent
consideration of each execution step, and entirely separate construction of
Paths to represent each of the possible ways to do that step. We choose
the best Path or set of Paths using the same add_path() logic that's been
used inside query_planner() for years.
In addition, this patch removes the old restriction that subquery_planner()
could return only a single Plan. It now returns a RelOptInfo containing a
set of Paths, just as query_planner() does, and the parent query level can
use each of those Paths as the basis of a SubqueryScanPath at its level.
This allows finding some optimizations that we missed before, wherein a
subquery was capable of returning presorted data and thereby avoiding a
sort in the parent level, making the overall cost cheaper even though
delivering sorted output was not the cheapest plan for the subquery in
isolation. (A couple of regression test outputs change in consequence of
that. However, there is very little change in visible planner behavior
overall, because the point of this patch is not to get immediate planning
benefits but to create the infrastructure for future improvements.)
There is a great deal left to do here. This patch unblocks a lot of
planner work that was basically impractical in the old code structure,
such as allowing FDWs to implement remote aggregation, or rewriting
plan_set_operations() to allow consideration of multiple implementation
orders for set operations. (The latter will likely require a full
rewrite of plan_set_operations(); what I've done here is only to fix it
to return Paths not Plans.) I have also left unfinished some localized
refactoring in createplan.c and planner.c, because it was not necessary
to get this patch to a working state.
Thanks to Robert Haas, David Rowley, and Amit Kapila for review.
2016-03-07 15:58:22 -05:00
|
|
|
extern RelOptInfo *fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind,
|
|
|
|
Relids relids);
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
extern AppendRelInfo *find_childrel_appendrelinfo(PlannerInfo *root,
|
2012-06-10 15:20:04 -04:00
|
|
|
RelOptInfo *rel);
|
Fix some more problems with nested append relations.
As of commit a87c72915 (which later got backpatched as far as 9.1),
we're explicitly supporting the notion that append relations can be
nested; this can occur when UNION ALL constructs are nested, or when
a UNION ALL contains a table with inheritance children.
Bug #11457 from Nelson Page, as well as an earlier report from Elvis
Pranskevichus, showed that there were still nasty bugs associated with such
cases: in particular the EquivalenceClass mechanism could try to generate
"join" clauses connecting an appendrel child to some grandparent appendrel,
which would result in assertion failures or bogus plans.
Upon investigation I concluded that all current callers of
find_childrel_appendrelinfo() need to be fixed to explicitly consider
multiple levels of parent appendrels. The most complex fix was in
processing of "broken" EquivalenceClasses, which are ECs for which we have
been unable to generate all the derived equality clauses we would like to
because of missing cross-type equality operators in the underlying btree
operator family. That code path is more or less entirely untested by
the regression tests to date, because no standard opfamilies have such
holes in them. So I wrote a new regression test script to try to exercise
it a bit, which turned out to be quite a worthwhile activity as it exposed
existing bugs in all supported branches.
The present patch is essentially the same as far back as 9.2, which is
where parameterized paths were introduced. In 9.0 and 9.1, we only need
to back-patch a small fragment of commit 5b7b5518d, which fixes failure to
propagate out the original WHERE clauses when a broken EC contains constant
members. (The regression test case results show that these older branches
are noticeably stupider than 9.2+ in terms of the quality of the plans
generated; but we don't really care about plan quality in such cases,
only that the plan not be outright wrong. A more invasive fix in the
older branches would not be a good idea anyway from a plan-stability
standpoint.)
2014-10-01 19:30:24 -04:00
|
|
|
extern RelOptInfo *find_childrel_top_parent(PlannerInfo *root, RelOptInfo *rel);
|
|
|
|
extern Relids find_childrel_parents(PlannerInfo *root, RelOptInfo *rel);
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
extern ParamPathInfo *get_baserel_parampathinfo(PlannerInfo *root,
|
2012-06-10 15:20:04 -04:00
|
|
|
RelOptInfo *baserel,
|
|
|
|
Relids required_outer);
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
extern ParamPathInfo *get_joinrel_parampathinfo(PlannerInfo *root,
|
2012-06-10 15:20:04 -04:00
|
|
|
RelOptInfo *joinrel,
|
|
|
|
Path *outer_path,
|
|
|
|
Path *inner_path,
|
|
|
|
SpecialJoinInfo *sjinfo,
|
|
|
|
Relids required_outer,
|
|
|
|
List **restrict_clauses);
|
Revise parameterized-path mechanism to fix assorted issues.
This patch adjusts the treatment of parameterized paths so that all paths
with the same parameterization (same set of required outer rels) for the
same relation will have the same rowcount estimate. We cache the rowcount
estimates to ensure that property, and hopefully save a few cycles too.
Doing this makes it practical for add_path_precheck to operate without
a rowcount estimate: it need only assume that paths with different
parameterizations never dominate each other, which is close enough to
true anyway for coarse filtering, because normally a more-parameterized
path should yield fewer rows thanks to having more join clauses to apply.
In add_path, we do the full nine yards of comparing rowcount estimates
along with everything else, so that we can discard parameterized paths that
don't actually have an advantage. This fixes some issues I'd found with
add_path rejecting parameterized paths on the grounds that they were more
expensive than not-parameterized ones, even though they yielded many fewer
rows and hence would be cheaper once subsequent joining was considered.
To make the same-rowcounts assumption valid, we have to require that any
parameterized path enforce *all* join clauses that could be obtained from
the particular set of outer rels, even if not all of them are useful for
indexing. This is required at both base scans and joins. It's a good
thing anyway since the net impact is that join quals are checked at the
lowest practical level in the join tree. Hence, discard the original
rather ad-hoc mechanism for choosing parameterization joinquals, and build
a better one that has a more principled rule for when clauses can be moved.
The original rule was actually buggy anyway for lack of knowledge about
which relations are part of an outer join's outer side; getting this right
requires adding an outer_relids field to RestrictInfo.
2012-04-19 15:52:46 -04:00
|
|
|
extern ParamPathInfo *get_appendrel_parampathinfo(RelOptInfo *appendrel,
|
2012-06-10 15:20:04 -04:00
|
|
|
Relids required_outer);
|
2001-10-28 06:26:15 +00:00
|
|
|
|
2001-11-05 17:46:40 +00:00
|
|
|
#endif /* PATHNODE_H */
|