Improve nbtree unsatisfiable RowCompare detection.

Move nbtree's detection of RowCompare quals that are unsatisfiable due
to having a NULL in their first row element: rather than detecting these
cases at the point where _bt_first builds its insertion scan key, do so
earlier, during preprocessing proper.  This brings the RowCompare case
in line every other case involving an unsatisfiable-due-to-NULL qual.

nbtree now consistently detects such unsatisfiable quals -- even when
they happen to involve a key that isn't examined by _bt_first at all.
Affected cases thereby avoid useless full index scans that cannot
possibly return any matching rows.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>
Discussion: https://postgr.es/m/CAH2-WzmySVXst2hFrOATC-zw1Byg1XC-jYUS314=mzuqsNwk+Q@mail.gmail.com
This commit is contained in:
Peter Geoghegan 2025-01-07 10:38:30 -05:00
parent 428a99b589
commit ec986020de
6 changed files with 248 additions and 16 deletions

View File

@ -1162,23 +1162,23 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (cur->sk_flags & SK_ROW_HEADER)
{
/*
* Row comparison header: look to the first row member instead.
*
* The member scankeys are already in insertion format (ie, they
* have sk_func = 3-way-comparison function), but we have to watch
* out for nulls, which _bt_preprocess_keys didn't check. A null
* in the first row member makes the condition unmatchable, just
* like qual_ok = false.
* Row comparison header: look to the first row member instead
*/
ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
/*
* Cannot be a NULL in the first row member: _bt_preprocess_keys
* would've marked the qual as unsatisfiable, preventing us from
* ever getting this far
*/
Assert(subkey->sk_flags & SK_ROW_MEMBER);
if (subkey->sk_flags & SK_ISNULL)
{
Assert(!so->needPrimScan);
_bt_parallel_done(scan);
return false;
}
Assert(subkey->sk_attno == cur->sk_attno);
Assert(!(subkey->sk_flags & SK_ISNULL));
/*
* The member scankeys are already in insertion format (ie, they
* have sk_func = 3-way-comparison function)
*/
memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData));
/*

View File

@ -3371,6 +3371,13 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
{
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
if (subkey->sk_flags & SK_ISNULL)
{
/* First row member is NULL, so RowCompare is unsatisfiable */
Assert(subkey->sk_flags & SK_ROW_MEMBER);
return false;
}
for (;;)
{
Assert(subkey->sk_flags & SK_ROW_MEMBER);
@ -3982,13 +3989,14 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
if (subkey->sk_flags & SK_ISNULL)
{
/*
* Unlike the simple-scankey case, this isn't a disallowed case.
* Unlike the simple-scankey case, this isn't a disallowed case
* (except when it's the first row element that has the NULL arg).
* But it can never match. If all the earlier row comparison
* columns are required for the scan direction, we can stop the
* scan, because there can't be another tuple that will succeed.
*/
if (subkey != (ScanKey) DatumGetPointer(skey->sk_argument))
subkey--;
Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
subkey--;
if ((subkey->sk_flags & SK_BT_REQFWD) &&
ScanDirectionIsForward(dir))
*continuescan = false;

View File

@ -142,6 +142,133 @@ SELECT b.*
4500 | 2080851358
(1 row)
--
-- Add coverage of RowCompare quals whose row omits a column ("proargtypes")
-- that's after the first column, but before the final column. The scan's
-- initial positioning strategy must become >= here (it's not the > strategy,
-- since the absence of "proargtypes" makes that tighter constraint unsafe).
--
explain (costs off)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE (proname, pronamespace) > ('abs', 0)
ORDER BY proname, proargtypes, pronamespace LIMIT 1;
QUERY PLAN
-------------------------------------------------------------------------------
Limit
-> Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc
Index Cond: (ROW(proname, pronamespace) > ROW('abs'::name, '0'::oid))
(3 rows)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE (proname, pronamespace) > ('abs', 0)
ORDER BY proname, proargtypes, pronamespace LIMIT 1;
proname | proargtypes | pronamespace
---------+-------------+--------------
abs | 20 | 11
(1 row)
--
-- Similar to the previous test case, but this time it's a backwards scan
-- using a < RowCompare. Must use the <= strategy (and not the < strategy).
--
explain (costs off)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE (proname, pronamespace) < ('abs', 1_000_000)
ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1;
QUERY PLAN
-------------------------------------------------------------------------------------
Limit
-> Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc
Index Cond: (ROW(proname, pronamespace) < ROW('abs'::name, '1000000'::oid))
(3 rows)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE (proname, pronamespace) < ('abs', 1_000_000)
ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1;
proname | proargtypes | pronamespace
---------+-------------+--------------
abs | 1700 | 11
(1 row)
--
-- Add coverage for RowCompare quals whose rhs row has a NULL that ends scan
--
explain (costs off)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL)
ORDER BY proname, proargtypes, pronamespace;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc
Index Cond: ((ROW(proname, proargtypes) < ROW('abs'::name, NULL::oidvector)) AND (proname = 'abs'::name))
(2 rows)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL)
ORDER BY proname, proargtypes, pronamespace;
proname | proargtypes | pronamespace
---------+-------------+--------------
(0 rows)
--
-- Add coverage for backwards scan RowCompare quals whose rhs row has a NULL
-- that ends scan
--
explain (costs off)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL)
ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc
Index Cond: ((ROW(proname, proargtypes) > ROW('abs'::name, NULL::oidvector)) AND (proname = 'abs'::name))
(2 rows)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL)
ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
proname | proargtypes | pronamespace
---------+-------------+--------------
(0 rows)
--
-- Add coverage for recheck of > key following array advancement on previous
-- (left sibling) page that used a high key whose attribute value corresponding
-- to the > key was -inf (due to being truncated when the high key was created).
--
-- XXX This relies on the assumption that tenk1_thous_tenthous has a truncated
-- high key "(183, -inf)" on the first page that we'll scan. The test will only
-- provide useful coverage when the default 8K BLCKSZ is in use.
--
explain (costs off)
SELECT thousand, tenthous
FROM tenk1
WHERE thousand IN (182, 183) AND tenthous > 7550;
QUERY PLAN
---------------------------------------------------------------------------------
Index Only Scan using tenk1_thous_tenthous on tenk1
Index Cond: ((thousand = ANY ('{182,183}'::integer[])) AND (tenthous > 7550))
(2 rows)
SELECT thousand, tenthous
FROM tenk1
WHERE thousand IN (182, 183) AND tenthous > 7550;
thousand | tenthous
----------+----------
182 | 8182
182 | 9182
183 | 8183
183 | 9183
(4 rows)
--
-- Add coverage for optimization of backwards scan index descents
--

View File

@ -2428,6 +2428,19 @@ SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 < (-1)::bigint
---------
(0 rows)
explain (costs off)
SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5);
QUERY PLAN
-----------------------------------------------------------------
Index Scan using tenk1_thous_tenthous on tenk1
Index Cond: (ROW(thousand, tenthous) > ROW(NULL::integer, 5))
(2 rows)
SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5);
unique1
---------
(0 rows)
--
-- Check elimination of constant-NULL subexpressions
--

View File

@ -110,6 +110,85 @@ SELECT b.*
FROM bt_f8_heap b
WHERE b.seqno = '4500'::float8;
--
-- Add coverage of RowCompare quals whose row omits a column ("proargtypes")
-- that's after the first column, but before the final column. The scan's
-- initial positioning strategy must become >= here (it's not the > strategy,
-- since the absence of "proargtypes" makes that tighter constraint unsafe).
--
explain (costs off)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE (proname, pronamespace) > ('abs', 0)
ORDER BY proname, proargtypes, pronamespace LIMIT 1;
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE (proname, pronamespace) > ('abs', 0)
ORDER BY proname, proargtypes, pronamespace LIMIT 1;
--
-- Similar to the previous test case, but this time it's a backwards scan
-- using a < RowCompare. Must use the <= strategy (and not the < strategy).
--
explain (costs off)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE (proname, pronamespace) < ('abs', 1_000_000)
ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1;
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE (proname, pronamespace) < ('abs', 1_000_000)
ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1;
--
-- Add coverage for RowCompare quals whose rhs row has a NULL that ends scan
--
explain (costs off)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL)
ORDER BY proname, proargtypes, pronamespace;
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL)
ORDER BY proname, proargtypes, pronamespace;
--
-- Add coverage for backwards scan RowCompare quals whose rhs row has a NULL
-- that ends scan
--
explain (costs off)
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL)
ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
SELECT proname, proargtypes, pronamespace
FROM pg_proc
WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL)
ORDER BY proname DESC, proargtypes DESC, pronamespace DESC;
--
-- Add coverage for recheck of > key following array advancement on previous
-- (left sibling) page that used a high key whose attribute value corresponding
-- to the > key was -inf (due to being truncated when the high key was created).
--
-- XXX This relies on the assumption that tenk1_thous_tenthous has a truncated
-- high key "(183, -inf)" on the first page that we'll scan. The test will only
-- provide useful coverage when the default 8K BLCKSZ is in use.
--
explain (costs off)
SELECT thousand, tenthous
FROM tenk1
WHERE thousand IN (182, 183) AND tenthous > 7550;
SELECT thousand, tenthous
FROM tenk1
WHERE thousand IN (182, 183) AND tenthous > 7550;
--
-- Add coverage for optimization of backwards scan index descents
--

View File

@ -931,6 +931,11 @@ SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 < (-1)::bigint
SELECT unique1 FROM tenk1 WHERE unique1 IN (1, 42, 7) and unique1 < (-1)::bigint;
explain (costs off)
SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5);
SELECT unique1 FROM tenk1 WHERE (thousand, tenthous) > (NULL, 5);
--
-- Check elimination of constant-NULL subexpressions
--