Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

997 lines
23 KiB
C
Raw Normal View History

2005-08-02 14:07:27 +00:00
/*
* dbsize.c
* Database object size functions, and related inquiries
2005-08-02 14:07:27 +00:00
*
* Copyright (c) 2002-2022, PostgreSQL Global Development Group
2005-08-02 14:07:27 +00:00
*
* IDENTIFICATION
2010-09-20 22:08:53 +02:00
* src/backend/utils/adt/dbsize.c
2005-08-02 14:07:27 +00:00
*
*/
#include "postgres.h"
#include <sys/stat.h>
#include "access/htup_details.h"
#include "access/relation.h"
#include "catalog/catalog.h"
2005-08-02 14:07:27 +00:00
#include "catalog/namespace.h"
#include "catalog/pg_authid.h"
2005-08-02 14:07:27 +00:00
#include "catalog/pg_tablespace.h"
#include "commands/dbcommands.h"
#include "commands/tablespace.h"
#include "miscadmin.h"
#include "storage/fd.h"
#include "utils/acl.h"
2005-08-02 14:07:27 +00:00
#include "utils/builtins.h"
#include "utils/numeric.h"
#include "utils/rel.h"
#include "utils/relfilenodemap.h"
#include "utils/relmapper.h"
2005-08-02 14:07:27 +00:00
#include "utils/syscache.h"
Fix incorrect return value in pg_size_pretty(bigint) Due to how pg_size_pretty(bigint) was implemented, it's possible that when given a negative number of bytes that the returning value would not match the equivalent positive return value when given the equivalent positive number of bytes. This was due to two separate issues. 1. The function used bit shifting to convert the number of bytes into larger units. The rounding performed by bit shifting is not the same as dividing. For example -3 >> 1 = -2, but -3 / 2 = -1. These two operations are only equivalent with positive numbers. 2. The half_rounded() macro rounded towards positive infinity. This meant that negative numbers rounded towards zero and positive numbers rounded away from zero. Here we fix #1 by dividing the values instead of bit shifting. We fix #2 by adjusting the half_rounded macro always to round away from zero. Additionally, adjust the pg_size_pretty(numeric) function to be more explicit that it's using division rather than bit shifting. A casual observer might have believed bit shifting was used due to a static function being named numeric_shift_right. However, that function was calculating the divisor from the number of bits and performed division. Here we make that more clear. This change is just cosmetic and does not affect the return value of the numeric version of the function. Here we also add a set of regression tests both versions of pg_size_pretty() which test the values directly before and after the function switches to the next unit. This bug was introduced in 8a1fab36a. Prior to that negative values were always displayed in bytes. Author: Dean Rasheed, David Rowley Discussion: https://postgr.es/m/CAEZATCXnNW4HsmZnxhfezR5FuiGgp+mkY4AzcL5eRGO4fuadWg@mail.gmail.com Backpatch-through: 9.6, where the bug was introduced.
2021-07-09 14:04:30 +12:00
/* Divide by two and round away from zero */
#define half_rounded(x) (((x) + ((x) < 0 ? -1 : 1)) / 2)
2005-08-02 14:07:27 +00:00
/* Units used in pg_size_pretty functions. All units must be powers of 2 */
struct size_pretty_unit
{
const char *name; /* bytes, kB, MB, GB etc */
uint32 limit; /* upper limit, prior to half rounding after
* converting to this unit. */
bool round; /* do half rounding for this unit */
uint8 unitbits; /* (1 << unitbits) bytes to make 1 of this
* unit */
};
/* When adding units here also update the error message in pg_size_bytes */
static const struct size_pretty_unit size_pretty_units[] = {
{"bytes", 10 * 1024, false, 0},
{"kB", 20 * 1024 - 1, true, 10},
{"MB", 20 * 1024 - 1, true, 20},
{"GB", 20 * 1024 - 1, true, 30},
{"TB", 20 * 1024 - 1, true, 40},
{"PB", 20 * 1024 - 1, true, 50},
{NULL, 0, false, 0}
};
2005-08-02 14:07:27 +00:00
/* Return physical size of directory contents, or 0 if dir doesn't exist */
static int64
db_dir_size(const char *path)
{
int64 dirsize = 0;
struct dirent *direntry;
DIR *dirdesc;
char filename[MAXPGPATH * 2];
2005-08-02 14:07:27 +00:00
dirdesc = AllocateDir(path);
if (!dirdesc)
return 0;
while ((direntry = ReadDir(dirdesc, path)) != NULL)
2005-08-02 14:07:27 +00:00
{
struct stat fst;
CHECK_FOR_INTERRUPTS();
2005-08-02 14:07:27 +00:00
if (strcmp(direntry->d_name, ".") == 0 ||
strcmp(direntry->d_name, "..") == 0)
continue;
snprintf(filename, sizeof(filename), "%s/%s", path, direntry->d_name);
2005-08-02 14:07:27 +00:00
if (stat(filename, &fst) < 0)
{
if (errno == ENOENT)
continue;
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", filename)));
}
2005-08-02 14:07:27 +00:00
dirsize += fst.st_size;
}
FreeDir(dirdesc);
return dirsize;
}
/*
* calculate size of database in all tablespaces
*/
static int64
calculate_database_size(Oid dbOid)
{
int64 totalsize;
2005-08-02 14:07:27 +00:00
DIR *dirdesc;
struct dirent *direntry;
char dirpath[MAXPGPATH];
char pathname[MAXPGPATH + 21 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
AclResult aclresult;
/*
* User must have connect privilege for target database or have privileges
* of pg_read_all_stats
*/
aclresult = pg_database_aclcheck(dbOid, GetUserId(), ACL_CONNECT);
if (aclresult != ACLCHECK_OK &&
!has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS))
{
aclcheck_error(aclresult, OBJECT_DATABASE,
get_database_name(dbOid));
}
2005-08-02 14:07:27 +00:00
/* Shared storage in pg_global is not counted */
/* Include pg_default storage */
snprintf(pathname, sizeof(pathname), "base/%u", dbOid);
totalsize = db_dir_size(pathname);
2005-08-02 14:07:27 +00:00
/* Scan the non-default tablespaces */
snprintf(dirpath, MAXPGPATH, "pg_tblspc");
dirdesc = AllocateDir(dirpath);
2005-08-02 14:07:27 +00:00
while ((direntry = ReadDir(dirdesc, dirpath)) != NULL)
2005-08-02 14:07:27 +00:00
{
CHECK_FOR_INTERRUPTS();
2005-08-02 14:07:27 +00:00
if (strcmp(direntry->d_name, ".") == 0 ||
strcmp(direntry->d_name, "..") == 0)
continue;
snprintf(pathname, sizeof(pathname), "pg_tblspc/%s/%s/%u",
direntry->d_name, TABLESPACE_VERSION_DIRECTORY, dbOid);
2005-08-02 14:07:27 +00:00
totalsize += db_dir_size(pathname);
}
FreeDir(dirdesc);
return totalsize;
}
Datum
pg_database_size_oid(PG_FUNCTION_ARGS)
{
Oid dbOid = PG_GETARG_OID(0);
int64 size;
size = calculate_database_size(dbOid);
2005-08-02 14:07:27 +00:00
if (size == 0)
PG_RETURN_NULL();
PG_RETURN_INT64(size);
2005-08-02 14:07:27 +00:00
}
Datum
pg_database_size_name(PG_FUNCTION_ARGS)
{
Name dbName = PG_GETARG_NAME(0);
Oid dbOid = get_database_oid(NameStr(*dbName), false);
int64 size;
size = calculate_database_size(dbOid);
2005-08-02 14:07:27 +00:00
if (size == 0)
PG_RETURN_NULL();
PG_RETURN_INT64(size);
2005-08-02 14:07:27 +00:00
}
/*
* Calculate total size of tablespace. Returns -1 if the tablespace directory
* cannot be found.
2005-08-02 14:07:27 +00:00
*/
static int64
calculate_tablespace_size(Oid tblspcOid)
{
char tblspcPath[MAXPGPATH];
char pathname[MAXPGPATH * 2];
2005-08-02 14:07:27 +00:00
int64 totalsize = 0;
DIR *dirdesc;
struct dirent *direntry;
AclResult aclresult;
/*
* User must have privileges of pg_read_all_stats or have CREATE privilege
* for target tablespace, either explicitly granted or implicitly because
* it is default for current database.
*/
if (tblspcOid != MyDatabaseTableSpace &&
!has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS))
{
aclresult = pg_tablespace_aclcheck(tblspcOid, GetUserId(), ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_TABLESPACE,
get_tablespace_name(tblspcOid));
}
2005-08-02 14:07:27 +00:00
if (tblspcOid == DEFAULTTABLESPACE_OID)
snprintf(tblspcPath, MAXPGPATH, "base");
2005-08-02 14:07:27 +00:00
else if (tblspcOid == GLOBALTABLESPACE_OID)
snprintf(tblspcPath, MAXPGPATH, "global");
2005-08-02 14:07:27 +00:00
else
snprintf(tblspcPath, MAXPGPATH, "pg_tblspc/%u/%s", tblspcOid,
TABLESPACE_VERSION_DIRECTORY);
2005-08-02 14:07:27 +00:00
dirdesc = AllocateDir(tblspcPath);
if (!dirdesc)
return -1;
2005-08-02 14:07:27 +00:00
while ((direntry = ReadDir(dirdesc, tblspcPath)) != NULL)
2005-08-02 14:07:27 +00:00
{
struct stat fst;
CHECK_FOR_INTERRUPTS();
2005-08-02 14:07:27 +00:00
if (strcmp(direntry->d_name, ".") == 0 ||
strcmp(direntry->d_name, "..") == 0)
continue;
snprintf(pathname, sizeof(pathname), "%s/%s", tblspcPath, direntry->d_name);
2005-08-02 14:07:27 +00:00
if (stat(pathname, &fst) < 0)
{
if (errno == ENOENT)
continue;
else
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", pathname)));
}
2005-08-02 14:07:27 +00:00
if (S_ISDIR(fst.st_mode))
2005-08-02 14:07:27 +00:00
totalsize += db_dir_size(pathname);
2005-10-15 02:49:52 +00:00
2005-08-02 14:07:27 +00:00
totalsize += fst.st_size;
}
FreeDir(dirdesc);
2005-10-15 02:49:52 +00:00
2005-08-02 14:07:27 +00:00
return totalsize;
}
Datum
pg_tablespace_size_oid(PG_FUNCTION_ARGS)
{
Oid tblspcOid = PG_GETARG_OID(0);
int64 size;
size = calculate_tablespace_size(tblspcOid);
2005-10-15 02:49:52 +00:00
if (size < 0)
PG_RETURN_NULL();
PG_RETURN_INT64(size);
2005-08-02 14:07:27 +00:00
}
Datum
pg_tablespace_size_name(PG_FUNCTION_ARGS)
{
Name tblspcName = PG_GETARG_NAME(0);
Oid tblspcOid = get_tablespace_oid(NameStr(*tblspcName), false);
int64 size;
size = calculate_tablespace_size(tblspcOid);
2005-08-02 14:07:27 +00:00
if (size < 0)
PG_RETURN_NULL();
PG_RETURN_INT64(size);
2005-08-02 14:07:27 +00:00
}
/*
* calculate size of (one fork of) a relation
*
* Note: we can safely apply this to temp tables of other sessions, so there
* is no check here or at the call sites for that.
2005-08-02 14:07:27 +00:00
*/
static int64
calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum)
2005-08-02 14:07:27 +00:00
{
int64 totalsize = 0;
char *relationpath;
char pathname[MAXPGPATH];
unsigned int segcount = 0;
2005-08-02 14:07:27 +00:00
relationpath = relpathbackend(*rfn, backend, forknum);
2005-08-02 14:07:27 +00:00
for (segcount = 0;; segcount++)
2005-08-02 14:07:27 +00:00
{
struct stat fst;
CHECK_FOR_INTERRUPTS();
2005-08-02 14:07:27 +00:00
if (segcount == 0)
snprintf(pathname, MAXPGPATH, "%s",
relationpath);
2005-08-02 14:07:27 +00:00
else
snprintf(pathname, MAXPGPATH, "%s.%u",
relationpath, segcount);
2005-08-02 14:07:27 +00:00
if (stat(pathname, &fst) < 0)
{
if (errno == ENOENT)
break;
else
ereport(ERROR,
(errcode_for_file_access(),
2005-10-29 00:31:52 +00:00
errmsg("could not stat file \"%s\": %m", pathname)));
2005-08-02 14:07:27 +00:00
}
totalsize += fst.st_size;
}
return totalsize;
}
Datum
pg_relation_size(PG_FUNCTION_ARGS)
2005-08-02 14:07:27 +00:00
{
Oid relOid = PG_GETARG_OID(0);
text *forkName = PG_GETARG_TEXT_PP(1);
Relation rel;
int64 size;
2005-10-15 02:49:52 +00:00
rel = try_relation_open(relOid, AccessShareLock);
/*
* Before 9.2, we used to throw an error if the relation didn't exist, but
* that makes queries like "SELECT pg_relation_size(oid) FROM pg_class"
* less robust, because while we scan pg_class with an MVCC snapshot,
* someone else might drop the table. It's better to return NULL for
* already-dropped tables than throw an error and abort the whole query.
*/
if (rel == NULL)
PG_RETURN_NULL();
2005-10-15 02:49:52 +00:00
size = calculate_relation_size(&(rel->rd_node), rel->rd_backend,
forkname_to_number(text_to_cstring(forkName)));
2005-10-15 02:49:52 +00:00
relation_close(rel, AccessShareLock);
2005-08-02 14:07:27 +00:00
PG_RETURN_INT64(size);
2005-08-02 14:07:27 +00:00
}
/*
* Calculate total on-disk size of a TOAST relation, including its indexes.
* Must not be applied to non-TOAST relations.
*/
static int64
calculate_toast_table_size(Oid toastrelid)
{
int64 size = 0;
Relation toastRel;
ForkNumber forkNum;
ListCell *lc;
List *indexlist;
toastRel = relation_open(toastrelid, AccessShareLock);
/* toast heap size, including FSM and VM size */
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
size += calculate_relation_size(&(toastRel->rd_node),
toastRel->rd_backend, forkNum);
/* toast index size, including FSM and VM size */
indexlist = RelationGetIndexList(toastRel);
/* Size is calculated using all the indexes available */
foreach(lc, indexlist)
{
Relation toastIdxRel;
toastIdxRel = relation_open(lfirst_oid(lc),
AccessShareLock);
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
size += calculate_relation_size(&(toastIdxRel->rd_node),
toastIdxRel->rd_backend, forkNum);
relation_close(toastIdxRel, AccessShareLock);
}
list_free(indexlist);
relation_close(toastRel, AccessShareLock);
return size;
}
2005-08-02 14:07:27 +00:00
/*
* Calculate total on-disk size of a given table,
* including FSM and VM, plus TOAST table if any.
* Indexes other than the TOAST table's index are not included.
*
* Note that this also behaves sanely if applied to an index or toast table;
* those won't have attached toast tables, but they can have multiple forks.
2005-08-02 14:07:27 +00:00
*/
static int64
calculate_table_size(Relation rel)
2005-08-02 14:07:27 +00:00
{
int64 size = 0;
ForkNumber forkNum;
/*
* heap size, including FSM and VM
*/
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
size += calculate_relation_size(&(rel->rd_node), rel->rd_backend,
forkNum);
/*
* Size of toast relation
*/
if (OidIsValid(rel->rd_rel->reltoastrelid))
size += calculate_toast_table_size(rel->rd_rel->reltoastrelid);
return size;
}
/*
* Calculate total on-disk size of all indexes attached to the given table.
*
* Can be applied safely to an index, but you'll just get zero.
*/
static int64
calculate_indexes_size(Relation rel)
{
int64 size = 0;
/*
* Aggregate all indexes on the given relation
*/
if (rel->rd_rel->relhasindex)
{
List *index_oids = RelationGetIndexList(rel);
ListCell *cell;
foreach(cell, index_oids)
{
Oid idxOid = lfirst_oid(cell);
Relation idxRel;
ForkNumber forkNum;
idxRel = relation_open(idxOid, AccessShareLock);
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
size += calculate_relation_size(&(idxRel->rd_node),
idxRel->rd_backend,
forkNum);
relation_close(idxRel, AccessShareLock);
2005-08-02 14:07:27 +00:00
}
list_free(index_oids);
2005-08-02 14:07:27 +00:00
}
return size;
}
Datum
pg_table_size(PG_FUNCTION_ARGS)
{
Oid relOid = PG_GETARG_OID(0);
Relation rel;
int64 size;
rel = try_relation_open(relOid, AccessShareLock);
if (rel == NULL)
PG_RETURN_NULL();
size = calculate_table_size(rel);
relation_close(rel, AccessShareLock);
PG_RETURN_INT64(size);
}
Datum
pg_indexes_size(PG_FUNCTION_ARGS)
{
Oid relOid = PG_GETARG_OID(0);
Relation rel;
int64 size;
rel = try_relation_open(relOid, AccessShareLock);
if (rel == NULL)
PG_RETURN_NULL();
size = calculate_indexes_size(rel);
relation_close(rel, AccessShareLock);
PG_RETURN_INT64(size);
}
/*
* Compute the on-disk size of all files for the relation,
* including heap data, index data, toast data, FSM, VM.
*/
static int64
calculate_total_relation_size(Relation rel)
{
int64 size;
/*
* Aggregate the table size, this includes size of the heap, toast and
* toast index with free space and visibility map
*/
size = calculate_table_size(rel);
2005-08-02 14:07:27 +00:00
/*
* Add size of all attached indexes as well
*/
size += calculate_indexes_size(rel);
2005-08-02 14:07:27 +00:00
return size;
}
Datum
pg_total_relation_size(PG_FUNCTION_ARGS)
2005-08-02 14:07:27 +00:00
{
Oid relOid = PG_GETARG_OID(0);
Relation rel;
int64 size;
rel = try_relation_open(relOid, AccessShareLock);
if (rel == NULL)
PG_RETURN_NULL();
size = calculate_total_relation_size(rel);
2005-08-02 14:07:27 +00:00
relation_close(rel, AccessShareLock);
PG_RETURN_INT64(size);
2005-08-02 14:07:27 +00:00
}
/*
* formatting with size units
*/
Datum
pg_size_pretty(PG_FUNCTION_ARGS)
{
int64 size = PG_GETARG_INT64(0);
char buf[64];
const struct size_pretty_unit *unit;
for (unit = size_pretty_units; unit->name != NULL; unit++)
2005-08-02 14:07:27 +00:00
{
uint8 bits;
/* use this unit if there are no more units or we're below the limit */
if (unit[1].name == NULL || Abs(size) < unit->limit)
2005-08-02 14:07:27 +00:00
{
if (unit->round)
size = half_rounded(size);
snprintf(buf, sizeof(buf), INT64_FORMAT " %s", size, unit->name);
break;
2005-08-02 14:07:27 +00:00
}
/*
* Determine the number of bits to use to build the divisor. We may
* need to use 1 bit less than the difference between this and the
* next unit if the next unit uses half rounding. Or we may need to
* shift an extra bit if this unit uses half rounding and the next one
* does not. We use division rather than shifting right by this
* number of bits to ensure positive and negative values are rounded
* in the same way.
*/
bits = (unit[1].unitbits - unit->unitbits - (unit[1].round == true)
+ (unit->round == true));
size /= ((int64) 1) << bits;
2005-08-02 14:07:27 +00:00
}
PG_RETURN_TEXT_P(cstring_to_text(buf));
2005-08-02 14:07:27 +00:00
}
static char *
numeric_to_cstring(Numeric n)
{
Datum d = NumericGetDatum(n);
return DatumGetCString(DirectFunctionCall1(numeric_out, d));
}
static bool
numeric_is_less(Numeric a, Numeric b)
{
Datum da = NumericGetDatum(a);
Datum db = NumericGetDatum(b);
return DatumGetBool(DirectFunctionCall2(numeric_lt, da, db));
}
static Numeric
numeric_absolute(Numeric n)
{
Datum d = NumericGetDatum(n);
Datum result;
result = DirectFunctionCall1(numeric_abs, d);
return DatumGetNumeric(result);
}
static Numeric
numeric_half_rounded(Numeric n)
{
Datum d = NumericGetDatum(n);
Datum zero;
Datum one;
Datum two;
Datum result;
zero = NumericGetDatum(int64_to_numeric(0));
one = NumericGetDatum(int64_to_numeric(1));
two = NumericGetDatum(int64_to_numeric(2));
if (DatumGetBool(DirectFunctionCall2(numeric_ge, d, zero)))
d = DirectFunctionCall2(numeric_add, d, one);
else
d = DirectFunctionCall2(numeric_sub, d, one);
result = DirectFunctionCall2(numeric_div_trunc, d, two);
return DatumGetNumeric(result);
}
static Numeric
Fix incorrect return value in pg_size_pretty(bigint) Due to how pg_size_pretty(bigint) was implemented, it's possible that when given a negative number of bytes that the returning value would not match the equivalent positive return value when given the equivalent positive number of bytes. This was due to two separate issues. 1. The function used bit shifting to convert the number of bytes into larger units. The rounding performed by bit shifting is not the same as dividing. For example -3 >> 1 = -2, but -3 / 2 = -1. These two operations are only equivalent with positive numbers. 2. The half_rounded() macro rounded towards positive infinity. This meant that negative numbers rounded towards zero and positive numbers rounded away from zero. Here we fix #1 by dividing the values instead of bit shifting. We fix #2 by adjusting the half_rounded macro always to round away from zero. Additionally, adjust the pg_size_pretty(numeric) function to be more explicit that it's using division rather than bit shifting. A casual observer might have believed bit shifting was used due to a static function being named numeric_shift_right. However, that function was calculating the divisor from the number of bits and performed division. Here we make that more clear. This change is just cosmetic and does not affect the return value of the numeric version of the function. Here we also add a set of regression tests both versions of pg_size_pretty() which test the values directly before and after the function switches to the next unit. This bug was introduced in 8a1fab36a. Prior to that negative values were always displayed in bytes. Author: Dean Rasheed, David Rowley Discussion: https://postgr.es/m/CAEZATCXnNW4HsmZnxhfezR5FuiGgp+mkY4AzcL5eRGO4fuadWg@mail.gmail.com Backpatch-through: 9.6, where the bug was introduced.
2021-07-09 14:04:30 +12:00
numeric_truncated_divide(Numeric n, int64 divisor)
{
Datum d = NumericGetDatum(n);
Datum divisor_numeric;
Datum result;
Fix incorrect return value in pg_size_pretty(bigint) Due to how pg_size_pretty(bigint) was implemented, it's possible that when given a negative number of bytes that the returning value would not match the equivalent positive return value when given the equivalent positive number of bytes. This was due to two separate issues. 1. The function used bit shifting to convert the number of bytes into larger units. The rounding performed by bit shifting is not the same as dividing. For example -3 >> 1 = -2, but -3 / 2 = -1. These two operations are only equivalent with positive numbers. 2. The half_rounded() macro rounded towards positive infinity. This meant that negative numbers rounded towards zero and positive numbers rounded away from zero. Here we fix #1 by dividing the values instead of bit shifting. We fix #2 by adjusting the half_rounded macro always to round away from zero. Additionally, adjust the pg_size_pretty(numeric) function to be more explicit that it's using division rather than bit shifting. A casual observer might have believed bit shifting was used due to a static function being named numeric_shift_right. However, that function was calculating the divisor from the number of bits and performed division. Here we make that more clear. This change is just cosmetic and does not affect the return value of the numeric version of the function. Here we also add a set of regression tests both versions of pg_size_pretty() which test the values directly before and after the function switches to the next unit. This bug was introduced in 8a1fab36a. Prior to that negative values were always displayed in bytes. Author: Dean Rasheed, David Rowley Discussion: https://postgr.es/m/CAEZATCXnNW4HsmZnxhfezR5FuiGgp+mkY4AzcL5eRGO4fuadWg@mail.gmail.com Backpatch-through: 9.6, where the bug was introduced.
2021-07-09 14:04:30 +12:00
divisor_numeric = NumericGetDatum(int64_to_numeric(divisor));
result = DirectFunctionCall2(numeric_div_trunc, d, divisor_numeric);
return DatumGetNumeric(result);
}
Datum
pg_size_pretty_numeric(PG_FUNCTION_ARGS)
{
Numeric size = PG_GETARG_NUMERIC(0);
char *result = NULL;
const struct size_pretty_unit *unit;
for (unit = size_pretty_units; unit->name != NULL; unit++)
{
unsigned int shiftby;
/* use this unit if there are no more units or we're below the limit */
if (unit[1].name == NULL ||
numeric_is_less(numeric_absolute(size),
int64_to_numeric(unit->limit)))
{
if (unit->round)
size = numeric_half_rounded(size);
result = psprintf("%s %s", numeric_to_cstring(size), unit->name);
break;
}
/*
* Determine the number of bits to use to build the divisor. We may
* need to use 1 bit less than the difference between this and the
* next unit if the next unit uses half rounding. Or we may need to
* shift an extra bit if this unit uses half rounding and the next one
* does not.
*/
shiftby = (unit[1].unitbits - unit->unitbits - (unit[1].round == true)
+ (unit->round == true));
size = numeric_truncated_divide(size, ((int64) 1) << shiftby);
}
PG_RETURN_TEXT_P(cstring_to_text(result));
}
/*
* Convert a human-readable size to a size in bytes
*/
Datum
pg_size_bytes(PG_FUNCTION_ARGS)
{
text *arg = PG_GETARG_TEXT_PP(0);
char *str,
*strptr,
*endptr;
char saved_char;
Numeric num;
int64 result;
bool have_digits = false;
str = text_to_cstring(arg);
/* Skip leading whitespace */
strptr = str;
while (isspace((unsigned char) *strptr))
strptr++;
/* Check that we have a valid number and determine where it ends */
endptr = strptr;
/* Part (1): sign */
if (*endptr == '-' || *endptr == '+')
endptr++;
/* Part (2): main digit string */
if (isdigit((unsigned char) *endptr))
{
have_digits = true;
do
endptr++;
while (isdigit((unsigned char) *endptr));
}
/* Part (3): optional decimal point and fractional digits */
if (*endptr == '.')
{
endptr++;
if (isdigit((unsigned char) *endptr))
{
have_digits = true;
do
endptr++;
while (isdigit((unsigned char) *endptr));
}
}
/* Complain if we don't have a valid number at this point */
if (!have_digits)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid size: \"%s\"", str)));
/* Part (4): optional exponent */
if (*endptr == 'e' || *endptr == 'E')
{
long exponent;
char *cp;
/*
Remove bogus dependencies on NUMERIC_MAX_PRECISION. NUMERIC_MAX_PRECISION is a purely arbitrary constraint on the precision and scale you can write in a numeric typmod. It might once have had something to do with the allowed range of a typmod-less numeric value, but at least since 9.1 we've allowed, and documented that we allowed, any value that would physically fit in the numeric storage format; which is something over 100000 decimal digits, not 1000. Hence, get rid of numeric_in()'s use of NUMERIC_MAX_PRECISION as a limit on the allowed range of the exponent in scientific-format input. That was especially silly in view of the fact that you can enter larger numbers as long as you don't use 'e' to do it. Just constrain the value enough to avoid localized overflow, and let make_result be the final arbiter of what is too large. Likewise adjust ecpg's equivalent of this code. Also get rid of numeric_recv()'s use of NUMERIC_MAX_PRECISION to limit the number of base-NBASE digits it would accept. That created a dump/restore hazard for binary COPY without doing anything useful; the wire-format limit on number of digits (65535) is about as tight as we would want. In HEAD, also get rid of pg_size_bytes()'s unnecessary intimacy with what the numeric range limit is. That code doesn't exist in the back branches. Per gripe from Aravind Kumar. Back-patch to all supported branches, since they all contain the documentation claim about allowed range of NUMERIC (cf commit cabf5d84b). Discussion: <2895.1471195721@sss.pgh.pa.us>
2016-08-14 15:06:01 -04:00
* Note we might one day support EB units, so if what follows 'E'
* isn't a number, just treat it all as a unit to be parsed.
*/
exponent = strtol(endptr + 1, &cp, 10);
(void) exponent; /* Silence -Wunused-result warnings */
if (cp > endptr + 1)
endptr = cp;
}
/*
* Parse the number, saving the next character, which may be the first
* character of the unit string.
*/
saved_char = *endptr;
*endptr = '\0';
num = DatumGetNumeric(DirectFunctionCall3(numeric_in,
CStringGetDatum(strptr),
ObjectIdGetDatum(InvalidOid),
Int32GetDatum(-1)));
*endptr = saved_char;
/* Skip whitespace between number and unit */
strptr = endptr;
while (isspace((unsigned char) *strptr))
strptr++;
/* Handle possible unit */
if (*strptr != '\0')
{
const struct size_pretty_unit *unit;
int64 multiplier = 0;
/* Trim any trailing whitespace */
endptr = str + VARSIZE_ANY_EXHDR(arg) - 1;
while (isspace((unsigned char) *endptr))
endptr--;
endptr++;
*endptr = '\0';
for (unit = size_pretty_units; unit->name != NULL; unit++)
{
/* Parse the unit case-insensitively */
if (pg_strcasecmp(strptr, unit->name) == 0)
{
multiplier = ((int64) 1) << unit->unitbits;
break;
}
}
/* Verify we found a valid unit in the loop above */
if (unit->name == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid size: \"%s\"", text_to_cstring(arg)),
errdetail("Invalid size unit: \"%s\".", strptr),
errhint("Valid units are \"bytes\", \"kB\", \"MB\", \"GB\", \"TB\", and \"PB\".")));
if (multiplier > 1)
{
Numeric mul_num;
mul_num = int64_to_numeric(multiplier);
num = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
NumericGetDatum(mul_num),
NumericGetDatum(num)));
}
}
result = DatumGetInt64(DirectFunctionCall1(numeric_int8,
NumericGetDatum(num)));
PG_RETURN_INT64(result);
}
/*
* Get the filenode of a relation
*
* This is expected to be used in queries like
* SELECT pg_relation_filenode(oid) FROM pg_class;
* That leads to a couple of choices. We work from the pg_class row alone
* rather than actually opening each relation, for efficiency. We don't
* fail if we can't find the relation --- some rows might be visible in
* the query's MVCC snapshot even though the relations have been dropped.
* (Note: we could avoid using the catcache, but there's little point
* because the relation mapper also works "in the now".) We also don't
* fail if the relation doesn't have storage. In all these cases it
* seems better to quietly return NULL.
*/
Datum
pg_relation_filenode(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
Oid result;
HeapTuple tuple;
Form_pg_class relform;
tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
if (!HeapTupleIsValid(tuple))
PG_RETURN_NULL();
relform = (Form_pg_class) GETSTRUCT(tuple);
if (RELKIND_HAS_STORAGE(relform->relkind))
{
if (relform->relfilenode)
result = relform->relfilenode;
else /* Consult the relation mapper */
result = RelationMapOidToFilenode(relid,
relform->relisshared);
}
else
{
/* no storage, return NULL */
result = InvalidOid;
}
ReleaseSysCache(tuple);
if (!OidIsValid(result))
PG_RETURN_NULL();
PG_RETURN_OID(result);
}
/*
* Get the relation via (reltablespace, relfilenode)
*
* This is expected to be used when somebody wants to match an individual file
* on the filesystem back to its table. That's not trivially possible via
* pg_class, because that doesn't contain the relfilenodes of shared and nailed
* tables.
*
* We don't fail but return NULL if we cannot find a mapping.
*
* InvalidOid can be passed instead of the current database's default
* tablespace.
*/
Datum
pg_filenode_relation(PG_FUNCTION_ARGS)
{
Oid reltablespace = PG_GETARG_OID(0);
Oid relfilenode = PG_GETARG_OID(1);
Oid heaprel;
/* test needed so RelidByRelfilenode doesn't misbehave */
if (!OidIsValid(relfilenode))
PG_RETURN_NULL();
heaprel = RelidByRelfilenode(reltablespace, relfilenode);
if (!OidIsValid(heaprel))
PG_RETURN_NULL();
else
PG_RETURN_OID(heaprel);
}
/*
* Get the pathname (relative to $PGDATA) of a relation
*
* See comments for pg_relation_filenode.
*/
Datum
pg_relation_filepath(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
HeapTuple tuple;
Form_pg_class relform;
RelFileNode rnode;
BackendId backend;
char *path;
tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
if (!HeapTupleIsValid(tuple))
PG_RETURN_NULL();
relform = (Form_pg_class) GETSTRUCT(tuple);
if (RELKIND_HAS_STORAGE(relform->relkind))
{
/* This logic should match RelationInitPhysicalAddr */
if (relform->reltablespace)
rnode.spcNode = relform->reltablespace;
else
rnode.spcNode = MyDatabaseTableSpace;
if (rnode.spcNode == GLOBALTABLESPACE_OID)
rnode.dbNode = InvalidOid;
else
rnode.dbNode = MyDatabaseId;
if (relform->relfilenode)
rnode.relNode = relform->relfilenode;
else /* Consult the relation mapper */
rnode.relNode = RelationMapOidToFilenode(relid,
relform->relisshared);
}
else
{
/* no storage, return NULL */
rnode.relNode = InvalidOid;
/* some compilers generate warnings without these next two lines */
rnode.dbNode = InvalidOid;
rnode.spcNode = InvalidOid;
}
if (!OidIsValid(rnode.relNode))
{
ReleaseSysCache(tuple);
PG_RETURN_NULL();
}
/* Determine owning backend. */
switch (relform->relpersistence)
{
case RELPERSISTENCE_UNLOGGED:
case RELPERSISTENCE_PERMANENT:
backend = InvalidBackendId;
break;
case RELPERSISTENCE_TEMP:
if (isTempOrTempToastNamespace(relform->relnamespace))
Improve the situation for parallel query versus temp relations. Transmit the leader's temp-namespace state to workers. This is important because without it, the workers do not really have the same search path as the leader. For example, there is no good reason (and no extant code either) to prevent a worker from executing a temp function that the leader created previously; but as things stood it would fail to find the temp function, and then either fail or execute the wrong function entirely. We still prohibit a worker from creating a temp namespace on its own. In effect, a worker can only see the session's temp namespace if the leader had created it before starting the worker, which seems like the right semantics. Also, transmit the leader's BackendId to workers, and arrange for workers to use that when determining the physical file path of a temp relation belonging to their session. While the original intent was to prevent such accesses entirely, there were a number of holes in that, notably in places like dbsize.c which assume they can safely access temp rels of other sessions anyway. We might as well get this right, as a small down payment on someday allowing workers to access the leader's temp tables. (With this change, directly using "MyBackendId" as a relation or buffer backend ID is deprecated; you should use BackendIdForTempRelations() instead. I left a couple of such uses alone though, as they're not going to be reachable in parallel workers until we do something about localbuf.c.) Move the thou-shalt-not-access-thy-leader's-temp-tables prohibition down into localbuf.c, which is where it actually matters, instead of having it in relation_open(). This amounts to recognizing that access to temp tables' catalog entries is perfectly safe in a worker, it's only the data in local buffers that is problematic. Having done all that, we can get rid of the test in has_parallel_hazard() that says that use of a temp table's rowtype is unsafe in parallel workers. That test was unduly expensive, and if we really did need such a prohibition, that was not even close to being a bulletproof guard for it. (For example, any user-defined function executed in a parallel worker might have attempted such access.)
2016-06-09 20:16:11 -04:00
backend = BackendIdForTempRelations();
else
{
/* Do it the hard way. */
backend = GetTempNamespaceBackendId(relform->relnamespace);
Assert(backend != InvalidBackendId);
}
break;
default:
elog(ERROR, "invalid relpersistence: %c", relform->relpersistence);
backend = InvalidBackendId; /* placate compiler */
break;
}
ReleaseSysCache(tuple);
path = relpathbackend(rnode, backend, MAIN_FORKNUM);
PG_RETURN_TEXT_P(cstring_to_text(path));
}