1997-11-25 22:07:18 +00:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* parse_agg.h
|
2008-12-28 18:54:01 +00:00
|
|
|
* handle aggregates and window functions in parser
|
1997-11-25 22:07:18 +00:00
|
|
|
*
|
2017-01-03 13:48:53 -05:00
|
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
2000-01-26 05:58:53 +00:00
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
1997-11-25 22:07:18 +00:00
|
|
|
*
|
2010-09-20 22:08:53 +02:00
|
|
|
* src/include/parser/parse_agg.h
|
1997-11-25 22:07:18 +00:00
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#ifndef PARSE_AGG_H
|
|
|
|
#define PARSE_AGG_H
|
|
|
|
|
1999-07-15 23:04:24 +00:00
|
|
|
#include "parser/parse_node.h"
|
1997-11-25 22:07:18 +00:00
|
|
|
|
2009-12-15 17:57:48 +00:00
|
|
|
extern void transformAggregateCall(ParseState *pstate, Aggref *agg,
|
2010-03-17 16:52:38 +00:00
|
|
|
List *args, List *aggorder,
|
2010-02-26 02:01:40 +00:00
|
|
|
bool agg_distinct);
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
|
|
|
|
extern Node *transformGroupingFunc(ParseState *pstate, GroupingFunc *g);
|
|
|
|
|
2008-12-28 18:54:01 +00:00
|
|
|
extern void transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc,
|
2009-06-11 14:49:15 +00:00
|
|
|
WindowDef *windef);
|
2003-06-06 15:04:03 +00:00
|
|
|
|
2003-01-17 03:25:04 +00:00
|
|
|
extern void parseCheckAggregates(ParseState *pstate, Query *qry);
|
2001-10-28 06:26:15 +00:00
|
|
|
|
Support GROUPING SETS, CUBE and ROLLUP.
This SQL standard functionality allows to aggregate data by different
GROUP BY clauses at once. Each grouping set returns rows with columns
grouped by in other sets set to NULL.
This could previously be achieved by doing each grouping as a separate
query, conjoined by UNION ALLs. Besides being considerably more concise,
grouping sets will in many cases be faster, requiring only one scan over
the underlying data.
The current implementation of grouping sets only supports using sorting
for input. Individual sets that share a sort order are computed in one
pass. If there are sets that don't share a sort order, additional sort &
aggregation steps are performed. These additional passes are sourced by
the previous sort step; thus avoiding repeated scans of the source data.
The code is structured in a way that adding support for purely using
hash aggregation or a mix of hashing and sorting is possible. Sorting
was chosen to be supported first, as it is the most generic method of
implementation.
Instead of, as in an earlier versions of the patch, representing the
chain of sort and aggregation steps as full blown planner and executor
nodes, all but the first sort are performed inside the aggregation node
itself. This avoids the need to do some unusual gymnastics to handle
having to return aggregated and non-aggregated tuples from underlying
nodes, as well as having to shut down underlying nodes early to limit
memory usage. The optimizer still builds Sort/Agg node to describe each
phase, but they're not part of the plan tree, but instead additional
data for the aggregation node. They're a convenient and preexisting way
to describe aggregation and sorting. The first (and possibly only) sort
step is still performed as a separate execution step. That retains
similarity with existing group by plans, makes rescans fairly simple,
avoids very deep plans (leading to slow explains) and easily allows to
avoid the sorting step if the underlying data is sorted by other means.
A somewhat ugly side of this patch is having to deal with a grammar
ambiguity between the new CUBE keyword and the cube extension/functions
named cube (and rollup). To avoid breaking existing deployments of the
cube extension it has not been renamed, neither has cube been made a
reserved keyword. Instead precedence hacking is used to make GROUP BY
cube(..) refer to the CUBE grouping sets feature, and not the function
cube(). To actually group by a function cube(), unlikely as that might
be, the function name has to be quoted.
Needs a catversion bump because stored rules may change.
Author: Andrew Gierth and Atri Sharma, with contributions from Andres Freund
Reviewed-By: Andres Freund, Noah Misch, Tom Lane, Svenne Krap, Tomas
Vondra, Erik Rijkers, Marti Raudsepp, Pavel Stehule
Discussion: CAOeZVidmVRe2jU6aMk_5qkxnB7dfmPROzM7Ur8JPW5j8Y5X-Lw@mail.gmail.com
2015-05-16 03:40:59 +02:00
|
|
|
extern List *expand_grouping_sets(List *groupingSets, int limit);
|
|
|
|
|
Support ordered-set (WITHIN GROUP) aggregates.
This patch introduces generic support for ordered-set and hypothetical-set
aggregate functions, as well as implementations of the instances defined in
SQL:2008 (percentile_cont(), percentile_disc(), rank(), dense_rank(),
percent_rank(), cume_dist()). We also added mode() though it is not in the
spec, as well as versions of percentile_cont() and percentile_disc() that
can compute multiple percentile values in one pass over the data.
Unlike the original submission, this patch puts full control of the sorting
process in the hands of the aggregate's support functions. To allow the
support functions to find out how they're supposed to sort, a new API
function AggGetAggref() is added to nodeAgg.c. This allows retrieval of
the aggregate call's Aggref node, which may have other uses beyond the
immediate need. There is also support for ordered-set aggregates to
install cleanup callback functions, so that they can be sure that
infrastructure such as tuplesort objects gets cleaned up.
In passing, make some fixes in the recently-added support for variadic
aggregates, and make some editorial adjustments in the recent FILTER
additions for aggregates. Also, simplify use of IsBinaryCoercible() by
allowing it to succeed whenever the target type is ANY or ANYELEMENT.
It was inconsistent that it dealt with other polymorphic target types
but not these.
Atri Sharma and Andrew Gierth; reviewed by Pavel Stehule and Vik Fearing,
and rather heavily editorialized upon by Tom Lane
2013-12-23 16:11:35 -05:00
|
|
|
extern int get_aggregate_argtypes(Aggref *aggref, Oid *inputTypes);
|
|
|
|
|
|
|
|
extern Oid resolve_aggregate_transtype(Oid aggfuncid,
|
|
|
|
Oid aggtranstype,
|
|
|
|
Oid *inputTypes,
|
|
|
|
int numArguments);
|
|
|
|
|
2015-08-04 17:53:10 +03:00
|
|
|
extern void build_aggregate_transfn_expr(Oid *agg_input_types,
|
2016-06-09 18:02:36 -04:00
|
|
|
int agg_num_inputs,
|
|
|
|
int agg_num_direct_inputs,
|
|
|
|
bool agg_variadic,
|
|
|
|
Oid agg_state_type,
|
|
|
|
Oid agg_input_collation,
|
|
|
|
Oid transfn_oid,
|
|
|
|
Oid invtransfn_oid,
|
|
|
|
Expr **transfnexpr,
|
|
|
|
Expr **invtransfnexpr);
|
2015-08-04 17:53:10 +03:00
|
|
|
|
2016-01-20 13:46:50 -05:00
|
|
|
extern void build_aggregate_combinefn_expr(Oid agg_state_type,
|
2016-06-09 18:02:36 -04:00
|
|
|
Oid agg_input_collation,
|
|
|
|
Oid combinefn_oid,
|
|
|
|
Expr **combinefnexpr);
|
2016-01-20 13:46:50 -05:00
|
|
|
|
Fix type-safety problem with parallel aggregate serial/deserialization.
The original specification for this called for the deserialization function
to have signature "deserialize(serialtype) returns transtype", which is a
security violation if transtype is INTERNAL (which it always would be in
practice) and serialtype is not (which ditto). The patch blithely overrode
the opr_sanity check for that, which was sloppy-enough work in itself,
but the indisputable reason this cannot be allowed to stand is that CREATE
FUNCTION will reject such a signature and thus it'd be impossible for
extensions to create parallelizable aggregates.
The minimum fix to make the signature type-safe is to add a second, dummy
argument of type INTERNAL. But to lock it down a bit more and make misuse
of INTERNAL-accepting functions less likely, let's get rid of the ability
to specify a "serialtype" for an aggregate and just say that the only
useful serialtype is BYTEA --- which, in practice, is the only interesting
value anyway, due to the usefulness of the send/recv infrastructure for
this purpose. That means we only have to allow "serialize(internal)
returns bytea" and "deserialize(bytea, internal) returns internal" as
the signatures for these support functions.
In passing fix bogus signature of int4_avg_combine, which I found thanks
to adding an opr_sanity check on combinefunc signatures.
catversion bump due to removing pg_aggregate.aggserialtype and adjusting
signatures of assorted built-in functions.
David Rowley and Tom Lane
Discussion: <27247.1466185504@sss.pgh.pa.us>
2016-06-22 16:52:41 -04:00
|
|
|
extern void build_aggregate_serialfn_expr(Oid serialfn_oid,
|
2016-06-09 18:02:36 -04:00
|
|
|
Expr **serialfnexpr);
|
2016-03-29 15:04:05 -04:00
|
|
|
|
Fix type-safety problem with parallel aggregate serial/deserialization.
The original specification for this called for the deserialization function
to have signature "deserialize(serialtype) returns transtype", which is a
security violation if transtype is INTERNAL (which it always would be in
practice) and serialtype is not (which ditto). The patch blithely overrode
the opr_sanity check for that, which was sloppy-enough work in itself,
but the indisputable reason this cannot be allowed to stand is that CREATE
FUNCTION will reject such a signature and thus it'd be impossible for
extensions to create parallelizable aggregates.
The minimum fix to make the signature type-safe is to add a second, dummy
argument of type INTERNAL. But to lock it down a bit more and make misuse
of INTERNAL-accepting functions less likely, let's get rid of the ability
to specify a "serialtype" for an aggregate and just say that the only
useful serialtype is BYTEA --- which, in practice, is the only interesting
value anyway, due to the usefulness of the send/recv infrastructure for
this purpose. That means we only have to allow "serialize(internal)
returns bytea" and "deserialize(bytea, internal) returns internal" as
the signatures for these support functions.
In passing fix bogus signature of int4_avg_combine, which I found thanks
to adding an opr_sanity check on combinefunc signatures.
catversion bump due to removing pg_aggregate.aggserialtype and adjusting
signatures of assorted built-in functions.
David Rowley and Tom Lane
Discussion: <27247.1466185504@sss.pgh.pa.us>
2016-06-22 16:52:41 -04:00
|
|
|
extern void build_aggregate_deserialfn_expr(Oid deserialfn_oid,
|
|
|
|
Expr **deserialfnexpr);
|
|
|
|
|
2015-08-04 17:53:10 +03:00
|
|
|
extern void build_aggregate_finalfn_expr(Oid *agg_input_types,
|
2016-06-09 18:02:36 -04:00
|
|
|
int num_finalfn_inputs,
|
|
|
|
Oid agg_state_type,
|
|
|
|
Oid agg_result_type,
|
|
|
|
Oid agg_input_collation,
|
|
|
|
Oid finalfn_oid,
|
|
|
|
Expr **finalfnexpr);
|
2003-07-01 19:10:53 +00:00
|
|
|
|
2001-11-05 17:46:40 +00:00
|
|
|
#endif /* PARSE_AGG_H */
|