Introduce bloom_filter_size for BRIN bloom opclass
Move the calculation of Bloom filter parameters (for BRIN indexes) into a separate function to make reuse easier. At the moment we only call it from one place, but that may change and it's easier to read anyway. Reviewed-by: Heikki Linnakangas Discussion: https://postgr.es/m/0e1f3350-c9cf-ab62-43a5-5dae314de89c%40enterprisedb.com
This commit is contained in:
parent
28d03feac3
commit
2b8b2852bb
@ -259,6 +259,48 @@ typedef struct BloomFilter
|
|||||||
char data[FLEXIBLE_ARRAY_MEMBER];
|
char data[FLEXIBLE_ARRAY_MEMBER];
|
||||||
} BloomFilter;
|
} BloomFilter;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bloom_filter_size
|
||||||
|
* Calculate Bloom filter parameters (nbits, nbytes, nhashes).
|
||||||
|
*
|
||||||
|
* Given expected number of distinct values and desired false positive rate,
|
||||||
|
* calculates the optimal parameters of the Bloom filter.
|
||||||
|
*
|
||||||
|
* The resulting parameters are returned through nbytesp (number of bytes),
|
||||||
|
* nbitsp (number of bits) and nhashesp (number of hash functions). If a
|
||||||
|
* pointer is NULL, the parameter is not returned.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
bloom_filter_size(int ndistinct, double false_positive_rate,
|
||||||
|
int *nbytesp, int *nbitsp, int *nhashesp)
|
||||||
|
{
|
||||||
|
double k;
|
||||||
|
int nbits,
|
||||||
|
nbytes;
|
||||||
|
|
||||||
|
/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
|
||||||
|
nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
|
||||||
|
|
||||||
|
/* round m to whole bytes */
|
||||||
|
nbytes = ((nbits + 7) / 8);
|
||||||
|
nbits = nbytes * 8;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* round(log(2.0) * m / ndistinct), but assume round() may not be
|
||||||
|
* available on Windows
|
||||||
|
*/
|
||||||
|
k = log(2.0) * nbits / ndistinct;
|
||||||
|
k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
|
||||||
|
|
||||||
|
if (nbytesp)
|
||||||
|
*nbytesp = nbytes;
|
||||||
|
|
||||||
|
if (nbitsp)
|
||||||
|
*nbitsp = nbits;
|
||||||
|
|
||||||
|
if (nhashesp)
|
||||||
|
*nhashesp = (int) k;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* bloom_init
|
* bloom_init
|
||||||
@ -275,19 +317,15 @@ bloom_init(int ndistinct, double false_positive_rate)
|
|||||||
|
|
||||||
int nbits; /* size of filter / number of bits */
|
int nbits; /* size of filter / number of bits */
|
||||||
int nbytes; /* size of filter / number of bytes */
|
int nbytes; /* size of filter / number of bytes */
|
||||||
|
int nhashes; /* number of hash functions */
|
||||||
double k; /* number of hash functions */
|
|
||||||
|
|
||||||
Assert(ndistinct > 0);
|
Assert(ndistinct > 0);
|
||||||
Assert((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE) &&
|
Assert((false_positive_rate >= BLOOM_MIN_FALSE_POSITIVE_RATE) &&
|
||||||
(false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE));
|
(false_positive_rate < BLOOM_MAX_FALSE_POSITIVE_RATE));
|
||||||
|
|
||||||
/* sizing bloom filter: -(n * ln(p)) / (ln(2))^2 */
|
/* calculate bloom filter size / parameters */
|
||||||
nbits = ceil(-(ndistinct * log(false_positive_rate)) / pow(log(2.0), 2));
|
bloom_filter_size(ndistinct, false_positive_rate,
|
||||||
|
&nbytes, &nbits, &nhashes);
|
||||||
/* round m to whole bytes */
|
|
||||||
nbytes = ((nbits + 7) / 8);
|
|
||||||
nbits = nbytes * 8;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reject filters that are obviously too large to store on a page.
|
* Reject filters that are obviously too large to store on a page.
|
||||||
@ -310,13 +348,6 @@ bloom_init(int ndistinct, double false_positive_rate)
|
|||||||
elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes,
|
elog(ERROR, "the bloom filter is too large (%d > %zu)", nbytes,
|
||||||
BloomMaxFilterSize);
|
BloomMaxFilterSize);
|
||||||
|
|
||||||
/*
|
|
||||||
* round(log(2.0) * m / ndistinct), but assume round() may not be
|
|
||||||
* available on Windows
|
|
||||||
*/
|
|
||||||
k = log(2.0) * nbits / ndistinct;
|
|
||||||
k = (k - floor(k) >= 0.5) ? ceil(k) : floor(k);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We allocate the whole filter. Most of it is going to be 0 bits, so the
|
* We allocate the whole filter. Most of it is going to be 0 bits, so the
|
||||||
* varlena is easy to compress.
|
* varlena is easy to compress.
|
||||||
@ -326,7 +357,7 @@ bloom_init(int ndistinct, double false_positive_rate)
|
|||||||
filter = (BloomFilter *) palloc0(len);
|
filter = (BloomFilter *) palloc0(len);
|
||||||
|
|
||||||
filter->flags = 0;
|
filter->flags = 0;
|
||||||
filter->nhashes = (int) k;
|
filter->nhashes = nhashes;
|
||||||
filter->nbits = nbits;
|
filter->nbits = nbits;
|
||||||
|
|
||||||
SET_VARSIZE(filter, len);
|
SET_VARSIZE(filter, len);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user