Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* statscmds.c
|
|
|
|
* Commands for creating and altering extended statistics
|
|
|
|
*
|
|
|
|
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
|
|
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* src/backend/commands/statscmds.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "access/relscan.h"
|
|
|
|
#include "catalog/dependency.h"
|
|
|
|
#include "catalog/indexing.h"
|
|
|
|
#include "catalog/namespace.h"
|
|
|
|
#include "catalog/pg_namespace.h"
|
|
|
|
#include "catalog/pg_statistic_ext.h"
|
|
|
|
#include "commands/defrem.h"
|
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "statistics/statistics.h"
|
|
|
|
#include "utils/builtins.h"
|
|
|
|
#include "utils/inval.h"
|
|
|
|
#include "utils/memutils.h"
|
|
|
|
#include "utils/rel.h"
|
|
|
|
#include "utils/syscache.h"
|
|
|
|
#include "utils/typcache.h"
|
|
|
|
|
|
|
|
|
|
|
|
/* used for sorting the attnums in CreateStatistics */
|
|
|
|
static int
|
|
|
|
compare_int16(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
return memcmp(a, b, sizeof(int16));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* CREATE STATISTICS
|
|
|
|
*/
|
|
|
|
ObjectAddress
|
|
|
|
CreateStatistics(CreateStatsStmt *stmt)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
ListCell *l;
|
|
|
|
int16 attnums[STATS_MAX_DIMENSIONS];
|
|
|
|
int numcols = 0;
|
|
|
|
ObjectAddress address = InvalidObjectAddress;
|
|
|
|
char *namestr;
|
|
|
|
NameData staname;
|
|
|
|
Oid statoid;
|
|
|
|
Oid namespaceId;
|
|
|
|
HeapTuple htup;
|
|
|
|
Datum values[Natts_pg_statistic_ext];
|
|
|
|
bool nulls[Natts_pg_statistic_ext];
|
|
|
|
int2vector *stakeys;
|
|
|
|
Relation statrel;
|
|
|
|
Relation rel;
|
|
|
|
Oid relid;
|
|
|
|
ObjectAddress parentobject,
|
|
|
|
childobject;
|
2017-04-05 18:00:42 -04:00
|
|
|
Datum types[2]; /* one for each possible type of statistics */
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
int ntypes;
|
|
|
|
ArrayType *staenabled;
|
|
|
|
bool build_ndistinct;
|
2017-04-05 18:00:42 -04:00
|
|
|
bool build_dependencies;
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
bool requested_type = false;
|
|
|
|
|
|
|
|
Assert(IsA(stmt, CreateStatsStmt));
|
|
|
|
|
|
|
|
/* resolve the pieces of the name (namespace etc.) */
|
|
|
|
namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
|
|
|
|
namestrcpy(&staname, namestr);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If if_not_exists was given and the statistics already exists, bail out.
|
|
|
|
*/
|
|
|
|
if (SearchSysCacheExists2(STATEXTNAMENSP,
|
|
|
|
PointerGetDatum(&staname),
|
|
|
|
ObjectIdGetDatum(namespaceId)))
|
|
|
|
{
|
|
|
|
if (stmt->if_not_exists)
|
|
|
|
{
|
|
|
|
ereport(NOTICE,
|
|
|
|
(errcode(ERRCODE_DUPLICATE_OBJECT),
|
|
|
|
errmsg("statistics \"%s\" already exist, skipping",
|
|
|
|
namestr)));
|
|
|
|
return InvalidObjectAddress;
|
|
|
|
}
|
|
|
|
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_DUPLICATE_OBJECT),
|
|
|
|
errmsg("statistics \"%s\" already exist", namestr)));
|
|
|
|
}
|
|
|
|
|
2017-04-05 18:22:32 -04:00
|
|
|
/*
|
|
|
|
* CREATE STATISTICS will influence future execution plans but does
|
|
|
|
* not interfere with currently executing plans so it is safe to
|
|
|
|
* take only ShareUpdateExclusiveLock on relation, conflicting with
|
|
|
|
* ANALYZE and other DDL that sets statistical information.
|
|
|
|
*/
|
2017-04-17 17:55:17 -03:00
|
|
|
rel = relation_openrv(stmt->relation, ShareUpdateExclusiveLock);
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
relid = RelationGetRelid(rel);
|
|
|
|
|
|
|
|
if (rel->rd_rel->relkind != RELKIND_RELATION &&
|
2017-04-17 17:55:17 -03:00
|
|
|
rel->rd_rel->relkind != RELKIND_MATVIEW &&
|
|
|
|
rel->rd_rel->relkind != RELKIND_FOREIGN_TABLE &&
|
|
|
|
rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
|
2017-04-17 17:55:17 -03:00
|
|
|
errmsg("relation \"%s\" is not a table, foreign table, or materialized view",
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
RelationGetRelationName(rel))));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transform column names to array of attnums. While at it, enforce some
|
|
|
|
* constraints.
|
|
|
|
*/
|
|
|
|
foreach(l, stmt->keys)
|
|
|
|
{
|
|
|
|
char *attname = strVal(lfirst(l));
|
|
|
|
HeapTuple atttuple;
|
|
|
|
Form_pg_attribute attForm;
|
|
|
|
TypeCacheEntry *type;
|
|
|
|
|
|
|
|
atttuple = SearchSysCacheAttName(relid, attname);
|
|
|
|
if (!HeapTupleIsValid(atttuple))
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("column \"%s\" referenced in statistics does not exist",
|
|
|
|
attname)));
|
|
|
|
attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
|
|
|
|
|
|
|
|
/* Disallow use of system attributes in extended stats */
|
|
|
|
if (attForm->attnum < 0)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("statistic creation on system columns is not supported")));
|
|
|
|
|
|
|
|
/* Disallow data types without a less-than operator */
|
|
|
|
type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
|
|
|
|
if (type->lt_opr == InvalidOid)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
|
|
errmsg("only scalar types can be used in extended statistics")));
|
|
|
|
|
|
|
|
/* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
|
|
|
|
if (numcols >= STATS_MAX_DIMENSIONS)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_TOO_MANY_COLUMNS),
|
|
|
|
errmsg("cannot have more than %d keys in statistics",
|
|
|
|
STATS_MAX_DIMENSIONS)));
|
|
|
|
|
|
|
|
attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
|
|
|
|
ReleaseSysCache(atttuple);
|
|
|
|
numcols++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check that at least two columns were specified in the statement. The
|
|
|
|
* upper bound was already checked in the loop above.
|
|
|
|
*/
|
|
|
|
if (numcols < 2)
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_TOO_MANY_COLUMNS),
|
|
|
|
errmsg("statistics require at least 2 columns")));
|
|
|
|
|
|
|
|
/*
|
2017-04-05 18:00:42 -04:00
|
|
|
* Sort the attnums, which makes detecting duplicities somewhat easier, and
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
* it does not hurt (it does not affect the efficiency, unlike for
|
|
|
|
* indexes, for example).
|
|
|
|
*/
|
|
|
|
qsort(attnums, numcols, sizeof(int16), compare_int16);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Look for duplicities in the list of columns. The attnums are sorted so
|
|
|
|
* just check consecutive elements.
|
|
|
|
*/
|
|
|
|
for (i = 1; i < numcols; i++)
|
|
|
|
if (attnums[i] == attnums[i - 1])
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
|
|
errmsg("duplicate column name in statistics definition")));
|
|
|
|
|
|
|
|
stakeys = buildint2vector(attnums, numcols);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parse the statistics options. Currently only statistics types are
|
|
|
|
* recognized.
|
|
|
|
*/
|
|
|
|
build_ndistinct = false;
|
2017-04-05 18:00:42 -04:00
|
|
|
build_dependencies = false;
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
foreach(l, stmt->options)
|
|
|
|
{
|
|
|
|
DefElem *opt = (DefElem *) lfirst(l);
|
|
|
|
|
|
|
|
if (strcmp(opt->defname, "ndistinct") == 0)
|
|
|
|
{
|
|
|
|
build_ndistinct = defGetBoolean(opt);
|
|
|
|
requested_type = true;
|
|
|
|
}
|
2017-04-05 18:00:42 -04:00
|
|
|
else if (strcmp(opt->defname, "dependencies") == 0)
|
|
|
|
{
|
|
|
|
build_dependencies = defGetBoolean(opt);
|
|
|
|
requested_type = true;
|
|
|
|
}
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
else
|
|
|
|
ereport(ERROR,
|
|
|
|
(errcode(ERRCODE_SYNTAX_ERROR),
|
|
|
|
errmsg("unrecognized STATISTICS option \"%s\"",
|
|
|
|
opt->defname)));
|
|
|
|
}
|
|
|
|
/* If no statistic type was specified, build them all. */
|
|
|
|
if (!requested_type)
|
2017-04-05 18:00:42 -04:00
|
|
|
{
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
build_ndistinct = true;
|
2017-04-05 18:00:42 -04:00
|
|
|
build_dependencies = true;
|
|
|
|
}
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
|
|
|
|
/* construct the char array of enabled statistic types */
|
|
|
|
ntypes = 0;
|
|
|
|
if (build_ndistinct)
|
|
|
|
types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
|
2017-04-05 18:00:42 -04:00
|
|
|
if (build_dependencies)
|
|
|
|
types[ntypes++] = CharGetDatum(STATS_EXT_DEPENDENCIES);
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
Assert(ntypes > 0);
|
|
|
|
staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c');
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Everything seems fine, so let's build the pg_statistic_ext tuple.
|
|
|
|
*/
|
|
|
|
memset(values, 0, sizeof(values));
|
|
|
|
memset(nulls, false, sizeof(nulls));
|
|
|
|
values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid);
|
|
|
|
values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname);
|
|
|
|
values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId);
|
|
|
|
values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId());
|
|
|
|
values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys);
|
|
|
|
values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled);
|
|
|
|
|
|
|
|
/* no statistics build yet */
|
|
|
|
nulls[Anum_pg_statistic_ext_standistinct - 1] = true;
|
2017-04-05 18:00:42 -04:00
|
|
|
nulls[Anum_pg_statistic_ext_stadependencies - 1] = true;
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
|
|
|
|
/* insert it into pg_statistic_ext */
|
|
|
|
statrel = heap_open(StatisticExtRelationId, RowExclusiveLock);
|
|
|
|
htup = heap_form_tuple(statrel->rd_att, values, nulls);
|
|
|
|
CatalogTupleInsert(statrel, htup);
|
|
|
|
statoid = HeapTupleGetOid(htup);
|
|
|
|
heap_freetuple(htup);
|
2017-04-17 17:55:17 -03:00
|
|
|
relation_close(statrel, RowExclusiveLock);
|
2017-03-24 15:43:03 -03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Invalidate relcache so that others see the new statistics.
|
|
|
|
*/
|
|
|
|
CacheInvalidateRelcache(rel);
|
|
|
|
|
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE
STATISTICS), allowing collection of statistics on more complex
combinations that individual table columns. Companion commands DROP
STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are
added too. All this DDL has been designed so that more statistic types
can be added later on, such as multivariate most-common-values and
multivariate histograms between columns of a single table, leaving room
for permitting columns on multiple tables, too, as well as expressions.
This commit only adds support for collection of n-distinct coefficient
on user-specified sets of columns in a single table. This is useful to
estimate number of distinct groups in GROUP BY and DISTINCT clauses;
estimation errors there can cause over-allocation of memory in hashed
aggregates, for instance, so it's a worthwhile problem to solve. A new
special pseudo-type pg_ndistinct is used.
(num-distinct estimation was deemed sufficiently useful by itself that
this is worthwhile even if no further statistic types are added
immediately; so much so that another version of essentially the same
functionality was submitted by Kyotaro Horiguchi:
https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp
though this commit does not use that code.)
Author: Tomas Vondra. Some code rework by Álvaro.
Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes,
Ideriha Takeshi
Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz
https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
2017-03-24 14:06:10 -03:00
|
|
|
relation_close(rel, NoLock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add a dependency on a table, so that stats get dropped on DROP TABLE.
|
|
|
|
*/
|
|
|
|
ObjectAddressSet(parentobject, RelationRelationId, relid);
|
|
|
|
ObjectAddressSet(childobject, StatisticExtRelationId, statoid);
|
|
|
|
recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Also add dependency on the schema. This is required to ensure that we
|
|
|
|
* drop the statistics on DROP SCHEMA. This is not handled automatically
|
|
|
|
* by DROP TABLE because the statistics are not an object in the table's
|
|
|
|
* schema.
|
|
|
|
*/
|
|
|
|
ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
|
|
|
|
recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
|
|
|
|
|
|
|
|
ObjectAddressSet(address, StatisticExtRelationId, statoid);
|
|
|
|
|
|
|
|
return address;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Guts of statistics deletion.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
RemoveStatisticsById(Oid statsOid)
|
|
|
|
{
|
|
|
|
Relation relation;
|
|
|
|
Oid relid;
|
|
|
|
Relation rel;
|
|
|
|
HeapTuple tup;
|
|
|
|
Form_pg_statistic_ext statext;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Delete the pg_statistic_ext tuple.
|
|
|
|
*/
|
|
|
|
relation = heap_open(StatisticExtRelationId, RowExclusiveLock);
|
|
|
|
|
|
|
|
tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
|
|
|
|
|
|
|
|
if (!HeapTupleIsValid(tup)) /* should not happen */
|
|
|
|
elog(ERROR, "cache lookup failed for statistics %u", statsOid);
|
|
|
|
|
|
|
|
statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
|
|
|
|
relid = statext->starelid;
|
|
|
|
|
|
|
|
rel = heap_open(relid, AccessExclusiveLock);
|
|
|
|
|
|
|
|
simple_heap_delete(relation, &tup->t_self);
|
|
|
|
|
|
|
|
CacheInvalidateRelcache(rel);
|
|
|
|
|
|
|
|
ReleaseSysCache(tup);
|
|
|
|
|
|
|
|
heap_close(relation, RowExclusiveLock);
|
|
|
|
heap_close(rel, NoLock);
|
|
|
|
}
|