2016-04-01 16:42:24 +03:00
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
*
|
|
|
|
* blscan.c
|
|
|
|
* Bloom index scan functions.
|
|
|
|
*
|
2025-01-01 11:21:55 -05:00
|
|
|
* Copyright (c) 2016-2025, PostgreSQL Global Development Group
|
2016-04-01 16:42:24 +03:00
|
|
|
*
|
|
|
|
* IDENTIFICATION
|
|
|
|
* contrib/bloom/blscan.c
|
|
|
|
*
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
*/
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
#include "access/relscan.h"
|
2019-10-23 09:26:22 +05:30
|
|
|
#include "bloom.h"
|
2016-04-01 16:42:24 +03:00
|
|
|
#include "miscadmin.h"
|
2024-11-12 20:57:45 -05:00
|
|
|
#include "pgstat.h"
|
2016-04-01 16:42:24 +03:00
|
|
|
#include "storage/bufmgr.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Begin scan of bloom index.
|
|
|
|
*/
|
|
|
|
IndexScanDesc
|
|
|
|
blbeginscan(Relation r, int nkeys, int norderbys)
|
|
|
|
{
|
|
|
|
IndexScanDesc scan;
|
2016-05-17 17:01:18 -04:00
|
|
|
BloomScanOpaque so;
|
2016-04-01 16:42:24 +03:00
|
|
|
|
|
|
|
scan = RelationGetIndexScan(r, nkeys, norderbys);
|
|
|
|
|
2016-05-17 17:01:18 -04:00
|
|
|
so = (BloomScanOpaque) palloc(sizeof(BloomScanOpaqueData));
|
|
|
|
initBloomState(&so->state, scan->indexRelation);
|
|
|
|
so->sign = NULL;
|
|
|
|
|
|
|
|
scan->opaque = so;
|
|
|
|
|
2016-04-01 16:42:24 +03:00
|
|
|
return scan;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Rescan a bloom index.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
blrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
|
|
|
|
ScanKey orderbys, int norderbys)
|
|
|
|
{
|
2016-05-17 17:01:18 -04:00
|
|
|
BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
|
2016-04-01 16:42:24 +03:00
|
|
|
|
2016-05-17 17:01:18 -04:00
|
|
|
if (so->sign)
|
|
|
|
pfree(so->sign);
|
2016-04-01 16:42:24 +03:00
|
|
|
so->sign = NULL;
|
|
|
|
|
|
|
|
if (scankey && scan->numberOfKeys > 0)
|
2024-09-11 15:15:49 +02:00
|
|
|
memcpy(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData));
|
2016-04-01 16:42:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* End scan of bloom index.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
blendscan(IndexScanDesc scan)
|
|
|
|
{
|
|
|
|
BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
|
|
|
|
|
|
|
|
if (so->sign)
|
|
|
|
pfree(so->sign);
|
|
|
|
so->sign = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-07-09 16:10:44 +03:00
|
|
|
* Insert all matching tuples into a bitmap.
|
2016-04-01 16:42:24 +03:00
|
|
|
*/
|
|
|
|
int64
|
|
|
|
blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|
|
|
{
|
|
|
|
int64 ntids = 0;
|
|
|
|
BlockNumber blkno = BLOOM_HEAD_BLKNO,
|
|
|
|
npages;
|
|
|
|
int i;
|
|
|
|
BufferAccessStrategy bas;
|
|
|
|
BloomScanOpaque so = (BloomScanOpaque) scan->opaque;
|
|
|
|
|
2016-04-03 14:17:20 -04:00
|
|
|
if (so->sign == NULL)
|
2016-04-01 16:42:24 +03:00
|
|
|
{
|
|
|
|
/* New search: have to calculate search signature */
|
|
|
|
ScanKey skey = scan->keyData;
|
|
|
|
|
2016-06-03 10:52:36 -04:00
|
|
|
so->sign = palloc0(sizeof(BloomSignatureWord) * so->state.opts.bloomLength);
|
2016-04-01 16:42:24 +03:00
|
|
|
|
|
|
|
for (i = 0; i < scan->numberOfKeys; i++)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Assume bloom-indexable operators to be strict, so nothing could
|
|
|
|
* be found for NULL key.
|
|
|
|
*/
|
|
|
|
if (skey->sk_flags & SK_ISNULL)
|
|
|
|
{
|
|
|
|
pfree(so->sign);
|
|
|
|
so->sign = NULL;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Add next value to the signature */
|
|
|
|
signValue(&so->state, so->sign, skey->sk_argument,
|
|
|
|
skey->sk_attno - 1);
|
|
|
|
|
|
|
|
skey++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We're going to read the whole index. This is why we use appropriate
|
|
|
|
* buffer access strategy.
|
|
|
|
*/
|
|
|
|
bas = GetAccessStrategy(BAS_BULKREAD);
|
|
|
|
npages = RelationGetNumberOfBlocks(scan->indexRelation);
|
2024-11-12 20:57:45 -05:00
|
|
|
pgstat_count_index_scan(scan->indexRelation);
|
Show index search count in EXPLAIN ANALYZE, take 2.
Expose the count of index searches/index descents in EXPLAIN ANALYZE's
output for index scan/index-only scan/bitmap index scan nodes. This
information is particularly useful with scans that use ScalarArrayOp
quals, where the number of index searches can be unpredictable due to
implementation details that interact with physical index characteristics
(at least with nbtree SAOP scans, since Postgres 17 commit 5bf748b8).
The information shown also provides useful context when EXPLAIN ANALYZE
runs a plan with an index scan node that successfully applied the skip
scan optimization (set to be added to nbtree by an upcoming patch).
The instrumentation works by teaching all index AMs to increment a new
nsearches counter whenever a new index search begins. The counter is
incremented at exactly the same point that index AMs already increment
the pg_stat_*_indexes.idx_scan counter (we're counting the same event,
but at the scan level rather than the relation level). Parallel queries
have workers copy their local counter struct into shared memory when an
index scan node ends -- even when it isn't a parallel aware scan node.
An earlier version of this patch that only worked with parallel aware
scans became commit 5ead85fb (though that was quickly reverted by commit
d00107cd following "debug_parallel_query=regress" buildfarm failures).
Our approach doesn't match the approach used when tracking other index
scan related costs (e.g., "Rows Removed by Filter:"). It is comparable
to the approach used in similar cases involving costs that are only
readily accessible inside an access method, not from the executor proper
(e.g., "Heap Blocks:" output for a Bitmap Heap Scan, which was recently
enhanced to show per-worker costs by commit 5a1e6df3, using essentially
the same scheme as the one used here). It is necessary for index AMs to
have direct responsibility for maintaining the new counter, since the
counter might need to be incremented multiple times per amgettuple call
(or per amgetbitmap call). But it is also necessary for the executor
proper to manage the shared memory now used to transfer each worker's
counter struct to the leader.
Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Robert Haas <robertmhaas@gmail.com>
Reviewed-By: Tomas Vondra <tomas@vondra.me>
Reviewed-By: Masahiro Ikeda <ikedamsh@oss.nttdata.com>
Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>
Discussion: https://postgr.es/m/CAH2-WzkRqvaqR2CTNqTZP0z6FuL4-3ED6eQB0yx38XBNj1v-4Q@mail.gmail.com
Discussion: https://postgr.es/m/CAH2-Wz=PKR6rB7qbx+Vnd7eqeB5VTcrW=iJvAsTsKbdG+kW_UA@mail.gmail.com
2025-03-11 09:20:50 -04:00
|
|
|
if (scan->instrument)
|
|
|
|
scan->instrument->nsearches++;
|
2016-04-01 16:42:24 +03:00
|
|
|
|
|
|
|
for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++)
|
|
|
|
{
|
|
|
|
Buffer buffer;
|
|
|
|
Page page;
|
|
|
|
|
|
|
|
buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM,
|
|
|
|
blkno, RBM_NORMAL, bas);
|
|
|
|
|
|
|
|
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
2016-04-20 08:31:19 -05:00
|
|
|
page = BufferGetPage(buffer);
|
2016-04-01 16:42:24 +03:00
|
|
|
|
Fix assorted bugs in contrib/bloom.
In blinsert(), cope with the possibility that a page we pull from the
notFullPage list is marked BLOOM_DELETED. This could happen if VACUUM
recently marked it deleted but hasn't (yet) updated the metapage.
We can re-use such a page safely, but we *must* reinitialize it so that
it's no longer marked deleted.
Fix blvacuum() so that it updates the notFullPage list even if it's
going to update it to empty. The previous "optimization" of skipping
the update seems pretty dubious, since it means that the next blinsert()
will uselessly visit whatever pages we left in the list.
Uniformly treat PageIsNew pages the same as deleted pages. This should
allow proper recovery if a crash occurs just after relation extension.
Properly use vacuum_delay_point, not assorted ad-hoc CHECK_FOR_INTERRUPTS
calls, in the blvacuum() main loop.
Fix broken tuple-counting logic: blvacuum.c counted the number of live
index tuples over again in each scan, leading to VACUUM VERBOSE reporting
some multiple of the actual number of surviving index tuples after any
vacuum that removed any tuples (since they'd be counted in blvacuum, maybe
more than once, and then again in blvacuumcleanup, without ever zeroing the
counter). It's sufficient to count them in blvacuumcleanup.
stats->estimated_count is a boolean, not a counter, and we don't want
to set it true, so don't add tuple counts to it.
Add a couple of Asserts that we don't overrun available space on a bloom
page. I don't think there's any bug there today, but the way the
FreeBlockNumberArray size calculation is set up is scarily fragile, and
BloomPageGetFreeSpace isn't much better. The Asserts should help catch
any future mistakes.
Per investigation of a report from Jeff Janes. I think the first item
above may explain his report; the other changes were things I noticed
while casting about for an explanation.
Report: <CAMkU=1xEUuBphDwDmB1WjN4+td4kpnEniFaTBxnk1xzHCw8_OQ@mail.gmail.com>
2016-08-13 22:24:48 -04:00
|
|
|
if (!PageIsNew(page) && !BloomPageIsDeleted(page))
|
2016-04-01 16:42:24 +03:00
|
|
|
{
|
|
|
|
OffsetNumber offset,
|
|
|
|
maxOffset = BloomPageGetMaxOffset(page);
|
|
|
|
|
|
|
|
for (offset = 1; offset <= maxOffset; offset++)
|
|
|
|
{
|
|
|
|
BloomTuple *itup = BloomPageGetTuple(&so->state, page, offset);
|
|
|
|
bool res = true;
|
|
|
|
|
|
|
|
/* Check index signature with scan signature */
|
2016-04-03 15:16:07 -04:00
|
|
|
for (i = 0; i < so->state.opts.bloomLength; i++)
|
2016-04-01 16:42:24 +03:00
|
|
|
{
|
|
|
|
if ((itup->sign[i] & so->sign[i]) != so->sign[i])
|
2016-04-03 14:17:20 -04:00
|
|
|
{
|
2016-04-01 16:42:24 +03:00
|
|
|
res = false;
|
2016-04-03 14:17:20 -04:00
|
|
|
break;
|
|
|
|
}
|
2016-04-01 16:42:24 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Add matching tuples to bitmap */
|
|
|
|
if (res)
|
|
|
|
{
|
|
|
|
tbm_add_tuples(tbm, &itup->heapPtr, 1, true);
|
|
|
|
ntids++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
UnlockReleaseBuffer(buffer);
|
|
|
|
CHECK_FOR_INTERRUPTS();
|
|
|
|
}
|
|
|
|
FreeAccessStrategy(bas);
|
|
|
|
|
|
|
|
return ntids;
|
|
|
|
}
|