postgres/src/backend/executor/nodeTidscan.c
Tom Lane 06e10abc0b Fix problems with cached tuple descriptors disappearing while still in use
by creating a reference-count mechanism, similar to what we did a long time
ago for catcache entries.  The back branches have an ugly solution involving
lots of extra copies, but this way is more efficient.  Reference counting is
only applied to tupdescs that are actually in caches --- there seems no need
to use it for tupdescs that are generated in the executor, since they'll go
away during plan shutdown by virtue of being in the per-query memory context.
Neil Conway and Tom Lane
2006-06-16 18:42:24 +00:00

547 lines
14 KiB
C

/*-------------------------------------------------------------------------
*
* nodeTidscan.c
* Routines to support direct tid scans of relations
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeTidscan.c,v 1.49 2006/06/16 18:42:22 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
*
* ExecTidScan scans a relation using tids
* ExecInitTidScan creates and initializes state info.
* ExecTidReScan rescans the tid relation.
* ExecEndTidScan releases all storage.
* ExecTidMarkPos marks scan position.
* ExecTidRestrPos restores scan position.
*/
#include "postgres.h"
#include "access/heapam.h"
#include "catalog/pg_type.h"
#include "executor/execdebug.h"
#include "executor/nodeTidscan.h"
#include "optimizer/clauses.h"
#include "parser/parsetree.h"
#include "utils/array.h"
#define IsCTIDVar(node) \
((node) != NULL && \
IsA((node), Var) && \
((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
((Var *) (node))->varlevelsup == 0)
static void TidListCreate(TidScanState *tidstate);
static int itemptr_comparator(const void *a, const void *b);
static TupleTableSlot *TidNext(TidScanState *node);
/*
* Compute the list of TIDs to be visited, by evaluating the expressions
* for them.
*
* (The result is actually an array, not a list.)
*/
static void
TidListCreate(TidScanState *tidstate)
{
List *evalList = tidstate->tss_tidquals;
ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
ItemPointerData *tidList;
int numAllocTids;
int numTids;
ListCell *l;
/*
* We initialize the array with enough slots for the case that all
* quals are simple OpExprs. If there's any ScalarArrayOpExprs,
* we may have to enlarge the array.
*/
numAllocTids = list_length(evalList);
tidList = (ItemPointerData *)
palloc(numAllocTids * sizeof(ItemPointerData));
numTids = 0;
foreach(l, evalList)
{
ExprState *exstate = (ExprState *) lfirst(l);
Expr *expr = exstate->expr;
ItemPointer itemptr;
bool isNull;
if (is_opclause(expr))
{
FuncExprState *fexstate = (FuncExprState *) exstate;
Node *arg1;
Node *arg2;
arg1 = get_leftop(expr);
arg2 = get_rightop(expr);
if (IsCTIDVar(arg1))
exstate = (ExprState *) lsecond(fexstate->args);
else if (IsCTIDVar(arg2))
exstate = (ExprState *) linitial(fexstate->args);
else
elog(ERROR, "could not identify CTID variable");
itemptr = (ItemPointer)
DatumGetPointer(ExecEvalExprSwitchContext(exstate,
econtext,
&isNull,
NULL));
if (!isNull && ItemPointerIsValid(itemptr))
{
if (numTids >= numAllocTids)
{
numAllocTids *= 2;
tidList = (ItemPointerData *)
repalloc(tidList,
numAllocTids * sizeof(ItemPointerData));
}
tidList[numTids++] = *itemptr;
}
}
else if (expr && IsA(expr, ScalarArrayOpExpr))
{
ScalarArrayOpExprState *saexstate = (ScalarArrayOpExprState *) exstate;
Datum arraydatum;
ArrayType *itemarray;
Datum *ipdatums;
bool *ipnulls;
int ndatums;
int i;
exstate = (ExprState *) lsecond(saexstate->fxprstate.args);
arraydatum = ExecEvalExprSwitchContext(exstate,
econtext,
&isNull,
NULL);
if (isNull)
continue;
itemarray = DatumGetArrayTypeP(arraydatum);
deconstruct_array(itemarray,
TIDOID, SizeOfIptrData, false, 's',
&ipdatums, &ipnulls, &ndatums);
if (numTids + ndatums > numAllocTids)
{
numAllocTids = numTids + ndatums;
tidList = (ItemPointerData *)
repalloc(tidList,
numAllocTids * sizeof(ItemPointerData));
}
for (i = 0; i < ndatums; i++)
{
if (!ipnulls[i])
{
itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
if (ItemPointerIsValid(itemptr))
tidList[numTids++] = *itemptr;
}
}
pfree(ipdatums);
pfree(ipnulls);
}
else
elog(ERROR, "could not identify CTID expression");
}
/*
* Sort the array of TIDs into order, and eliminate duplicates.
* Eliminating duplicates is necessary since we want OR semantics
* across the list. Sorting makes it easier to detect duplicates,
* and as a bonus ensures that we will visit the heap in the most
* efficient way.
*/
if (numTids > 1)
{
int lastTid;
int i;
qsort((void *) tidList, numTids, sizeof(ItemPointerData),
itemptr_comparator);
lastTid = 0;
for (i = 1; i < numTids; i++)
{
if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
tidList[++lastTid] = tidList[i];
}
numTids = lastTid + 1;
}
tidstate->tss_TidList = tidList;
tidstate->tss_NumTids = numTids;
tidstate->tss_TidPtr = -1;
}
/*
* qsort comparator for ItemPointerData items
*/
static int
itemptr_comparator(const void *a, const void *b)
{
const ItemPointerData *ipa = (const ItemPointerData *) a;
const ItemPointerData *ipb = (const ItemPointerData *) b;
BlockNumber ba = ItemPointerGetBlockNumber(ipa);
BlockNumber bb = ItemPointerGetBlockNumber(ipb);
OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
if (ba < bb)
return -1;
if (ba > bb)
return 1;
if (oa < ob)
return -1;
if (oa > ob)
return 1;
return 0;
}
/* ----------------------------------------------------------------
* TidNext
*
* Retrieve a tuple from the TidScan node's currentRelation
* using the tids in the TidScanState information.
*
* ----------------------------------------------------------------
*/
static TupleTableSlot *
TidNext(TidScanState *node)
{
EState *estate;
ScanDirection direction;
Snapshot snapshot;
Relation heapRelation;
HeapTuple tuple;
TupleTableSlot *slot;
Index scanrelid;
Buffer buffer = InvalidBuffer;
ItemPointerData *tidList;
int numTids;
bool bBackward;
/*
* extract necessary information from tid scan node
*/
estate = node->ss.ps.state;
direction = estate->es_direction;
snapshot = estate->es_snapshot;
heapRelation = node->ss.ss_currentRelation;
slot = node->ss.ss_ScanTupleSlot;
scanrelid = ((TidScan *) node->ss.ps.plan)->scan.scanrelid;
/*
* Check if we are evaluating PlanQual for tuple of this relation.
* Additional checking is not good, but no other way for now. We could
* introduce new nodes for this case and handle TidScan --> NewNode
* switching in Init/ReScan plan...
*/
if (estate->es_evTuple != NULL &&
estate->es_evTuple[scanrelid - 1] != NULL)
{
if (estate->es_evTupleNull[scanrelid - 1])
return ExecClearTuple(slot);
/*
* XXX shouldn't we check here to make sure tuple matches TID list? In
* runtime-key case this is not certain, is it?
*/
ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
slot, InvalidBuffer, false);
/* Flag for the next call that no more tuples */
estate->es_evTupleNull[scanrelid - 1] = true;
return slot;
}
/*
* First time through, compute the list of TIDs to be visited
*/
if (node->tss_TidList == NULL)
TidListCreate(node);
tidList = node->tss_TidList;
numTids = node->tss_NumTids;
tuple = &(node->tss_htup);
/*
* Initialize or advance scan position, depending on direction.
*/
bBackward = ScanDirectionIsBackward(direction);
if (bBackward)
{
if (node->tss_TidPtr < 0)
{
/* initialize for backward scan */
node->tss_TidPtr = numTids - 1;
}
else
node->tss_TidPtr--;
}
else
{
if (node->tss_TidPtr < 0)
{
/* initialize for forward scan */
node->tss_TidPtr = 0;
}
else
node->tss_TidPtr++;
}
while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
{
tuple->t_self = tidList[node->tss_TidPtr];
if (heap_fetch(heapRelation, snapshot, tuple, &buffer, false, NULL))
{
/*
* store the scanned tuple in the scan tuple slot of the scan
* state. Eventually we will only do this and not return a tuple.
* Note: we pass 'false' because tuples returned by amgetnext are
* pointers onto disk pages and were not created with palloc() and
* so should not be pfree()'d.
*/
ExecStoreTuple(tuple, /* tuple to store */
slot, /* slot to store in */
buffer, /* buffer associated with tuple */
false); /* don't pfree */
/*
* At this point we have an extra pin on the buffer, because
* ExecStoreTuple incremented the pin count. Drop our local pin.
*/
ReleaseBuffer(buffer);
return slot;
}
/* Bad TID or failed snapshot qual; try next */
if (bBackward)
node->tss_TidPtr--;
else
node->tss_TidPtr++;
}
/*
* if we get here it means the tid scan failed so we are at the end of the
* scan..
*/
return ExecClearTuple(slot);
}
/* ----------------------------------------------------------------
* ExecTidScan(node)
*
* Scans the relation using tids and returns
* the next qualifying tuple in the direction specified.
* It calls ExecScan() and passes it the access methods which returns
* the next tuple using the tids.
*
* Conditions:
* -- the "cursor" maintained by the AMI is positioned at the tuple
* returned previously.
*
* Initial States:
* -- the relation indicated is opened for scanning so that the
* "cursor" is positioned before the first qualifying tuple.
* -- tidPtr is -1.
* ----------------------------------------------------------------
*/
TupleTableSlot *
ExecTidScan(TidScanState *node)
{
/*
* use TidNext as access method
*/
return ExecScan(&node->ss, (ExecScanAccessMtd) TidNext);
}
/* ----------------------------------------------------------------
* ExecTidReScan(node)
* ----------------------------------------------------------------
*/
void
ExecTidReScan(TidScanState *node, ExprContext *exprCtxt)
{
EState *estate;
Index scanrelid;
estate = node->ss.ps.state;
scanrelid = ((TidScan *) node->ss.ps.plan)->scan.scanrelid;
/* If we are being passed an outer tuple, save it for runtime key calc */
if (exprCtxt != NULL)
node->ss.ps.ps_ExprContext->ecxt_outertuple =
exprCtxt->ecxt_outertuple;
/* If this is re-scanning of PlanQual ... */
if (estate->es_evTuple != NULL &&
estate->es_evTuple[scanrelid - 1] != NULL)
{
estate->es_evTupleNull[scanrelid - 1] = false;
return;
}
if (node->tss_TidList)
pfree(node->tss_TidList);
node->tss_TidList = NULL;
node->tss_NumTids = 0;
node->tss_TidPtr = -1;
}
/* ----------------------------------------------------------------
* ExecEndTidScan
*
* Releases any storage allocated through C routines.
* Returns nothing.
* ----------------------------------------------------------------
*/
void
ExecEndTidScan(TidScanState *node)
{
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->ss.ps);
/*
* clear out tuple table slots
*/
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
ExecClearTuple(node->ss.ss_ScanTupleSlot);
/*
* close the heap relation.
*/
ExecCloseScanRelation(node->ss.ss_currentRelation);
}
/* ----------------------------------------------------------------
* ExecTidMarkPos
*
* Marks scan position by marking the current tid.
* Returns nothing.
* ----------------------------------------------------------------
*/
void
ExecTidMarkPos(TidScanState *node)
{
node->tss_MarkTidPtr = node->tss_TidPtr;
}
/* ----------------------------------------------------------------
* ExecTidRestrPos
*
* Restores scan position by restoring the current tid.
* Returns nothing.
*
* XXX Assumes previously marked scan position belongs to current tid
* ----------------------------------------------------------------
*/
void
ExecTidRestrPos(TidScanState *node)
{
node->tss_TidPtr = node->tss_MarkTidPtr;
}
/* ----------------------------------------------------------------
* ExecInitTidScan
*
* Initializes the tid scan's state information, creates
* scan keys, and opens the base and tid relations.
*
* Parameters:
* node: TidNode node produced by the planner.
* estate: the execution state initialized in InitPlan.
* ----------------------------------------------------------------
*/
TidScanState *
ExecInitTidScan(TidScan *node, EState *estate, int eflags)
{
TidScanState *tidstate;
Relation currentRelation;
/*
* create state structure
*/
tidstate = makeNode(TidScanState);
tidstate->ss.ps.plan = (Plan *) node;
tidstate->ss.ps.state = estate;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &tidstate->ss.ps);
/*
* initialize child expressions
*/
tidstate->ss.ps.targetlist = (List *)
ExecInitExpr((Expr *) node->scan.plan.targetlist,
(PlanState *) tidstate);
tidstate->ss.ps.qual = (List *)
ExecInitExpr((Expr *) node->scan.plan.qual,
(PlanState *) tidstate);
tidstate->tss_tidquals = (List *)
ExecInitExpr((Expr *) node->tidquals,
(PlanState *) tidstate);
#define TIDSCAN_NSLOTS 2
/*
* tuple table initialization
*/
ExecInitResultTupleSlot(estate, &tidstate->ss.ps);
ExecInitScanTupleSlot(estate, &tidstate->ss);
/*
* mark tid list as not computed yet
*/
tidstate->tss_TidList = NULL;
tidstate->tss_NumTids = 0;
tidstate->tss_TidPtr = -1;
/*
* open the base relation and acquire appropriate lock on it.
*/
currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
tidstate->ss.ss_currentRelation = currentRelation;
tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
/*
* get the scan type from the relation descriptor.
*/
ExecAssignScanType(&tidstate->ss, RelationGetDescr(currentRelation));
/*
* Initialize result tuple type and projection info.
*/
ExecAssignResultTypeFromTL(&tidstate->ss.ps);
ExecAssignScanProjectionInfo(&tidstate->ss);
/*
* all done.
*/
return tidstate;
}
int
ExecCountSlotsTidScan(TidScan *node)
{
return ExecCountSlotsNode(outerPlan((Plan *) node)) +
ExecCountSlotsNode(innerPlan((Plan *) node)) + TIDSCAN_NSLOTS;
}