Fix using too many LWLocks bug, reported by Craig Ringer
<craig@postnewspapers.com.au>. It was my mistake, I missed limitation of number of held locks, now GIN doesn't use continiuous locks, but still hold buffers pinned to prevent interference with vacuum's deletion algorithm.
This commit is contained in:
parent
cebc3c42bb
commit
dcd4075379
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.10 2008/01/01 19:45:46 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.10.2.1 2008/04/22 17:54:19 teodor Exp $
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -27,58 +27,16 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
|||||||
/* page was deleted by concurrent vacuum */
|
/* page was deleted by concurrent vacuum */
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (*off > maxoff || *off == InvalidOffsetNumber)
|
|
||||||
res = -1;
|
|
||||||
else
|
|
||||||
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
|
||||||
|
|
||||||
if (res == 0)
|
|
||||||
{
|
|
||||||
/* page isn't changed */
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
else if (res > 0)
|
|
||||||
{
|
|
||||||
/*
|
/*
|
||||||
* some items was added before our position, look further to find it
|
* scan page to find equal or first greater value
|
||||||
* or first greater
|
|
||||||
*/
|
|
||||||
|
|
||||||
(*off)++;
|
|
||||||
for (; *off <= maxoff; (*off)++)
|
|
||||||
{
|
|
||||||
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
|
||||||
|
|
||||||
if (res == 0)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
if (res < 0)
|
|
||||||
{
|
|
||||||
(*off)--;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* some items was deleted before our position, look from begining to
|
|
||||||
* find it or first greater
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
|
for (*off = FirstOffsetNumber; *off <= maxoff; (*off)++)
|
||||||
{
|
{
|
||||||
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
res = compareItemPointers(item, (ItemPointer) GinDataPageGetItem(page, *off));
|
||||||
|
|
||||||
if (res == 0)
|
if (res <= 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (res < 0)
|
|
||||||
{
|
|
||||||
(*off)--;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -89,24 +47,23 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
|
|||||||
* Stop* functions unlock buffer (but don't release!)
|
* Stop* functions unlock buffer (but don't release!)
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firstCall)
|
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
|
||||||
{
|
{
|
||||||
|
GinBtreeData btreeEntry;
|
||||||
|
GinBtreeStack *stackEntry;
|
||||||
|
Page page;
|
||||||
|
bool needUnlock = TRUE;
|
||||||
|
|
||||||
if (entry->master != NULL)
|
if (entry->master != NULL)
|
||||||
{
|
{
|
||||||
entry->isFinished = entry->master->isFinished;
|
entry->isFinished = entry->master->isFinished;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (firstCall)
|
|
||||||
{
|
|
||||||
/*
|
/*
|
||||||
* at first call we should find entry, and begin scan of posting tree
|
* we should find entry, and begin scan of posting tree
|
||||||
* or just store posting list in memory
|
* or just store posting list in memory
|
||||||
*/
|
*/
|
||||||
GinBtreeData btreeEntry;
|
|
||||||
GinBtreeStack *stackEntry;
|
|
||||||
Page page;
|
|
||||||
bool needUnlock = TRUE;
|
|
||||||
|
|
||||||
prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
|
prepareEntryScan(&btreeEntry, index, entry->entry, ginstate);
|
||||||
btreeEntry.searchMode = TRUE;
|
btreeEntry.searchMode = TRUE;
|
||||||
@ -136,11 +93,25 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs
|
|||||||
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
|
||||||
|
|
||||||
entry->buffer = scanBeginPostingTree(gdi);
|
entry->buffer = scanBeginPostingTree(gdi);
|
||||||
|
/*
|
||||||
|
* We keep buffer pinned because we need to prevent deletition
|
||||||
|
* page during scan. See GIN's vacuum implementation. RefCount
|
||||||
|
* is increased to keep buffer pinned after freeGinBtreeStack() call.
|
||||||
|
*/
|
||||||
IncrBufferRefCount(entry->buffer);
|
IncrBufferRefCount(entry->buffer);
|
||||||
|
|
||||||
page = BufferGetPage(entry->buffer);
|
page = BufferGetPage(entry->buffer);
|
||||||
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
|
entry->predictNumberResult = gdi->stack->predictNumber * GinPageGetOpaque(page)->maxoff;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Keep page content in memory to prevent durable page locking
|
||||||
|
*/
|
||||||
|
entry->list = (ItemPointerData *) palloc( BLCKSZ );
|
||||||
|
entry->nlist = GinPageGetOpaque(page)->maxoff;
|
||||||
|
memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
|
||||||
|
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
|
||||||
|
|
||||||
|
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||||
freeGinBtreeStack(gdi->stack);
|
freeGinBtreeStack(gdi->stack);
|
||||||
pfree(gdi);
|
pfree(gdi);
|
||||||
entry->isFinished = FALSE;
|
entry->isFinished = FALSE;
|
||||||
@ -157,51 +128,6 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry, bool firs
|
|||||||
if (needUnlock)
|
if (needUnlock)
|
||||||
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
|
||||||
freeGinBtreeStack(stackEntry);
|
freeGinBtreeStack(stackEntry);
|
||||||
}
|
|
||||||
else if (entry->buffer != InvalidBuffer)
|
|
||||||
{
|
|
||||||
/* we should find place where we was stopped */
|
|
||||||
BlockNumber blkno;
|
|
||||||
Page page;
|
|
||||||
|
|
||||||
LockBuffer(entry->buffer, GIN_SHARE);
|
|
||||||
|
|
||||||
if (!ItemPointerIsValid(&entry->curItem))
|
|
||||||
/* start position */
|
|
||||||
return;
|
|
||||||
Assert(entry->offset != InvalidOffsetNumber);
|
|
||||||
|
|
||||||
page = BufferGetPage(entry->buffer);
|
|
||||||
|
|
||||||
/* try to find curItem in current buffer */
|
|
||||||
if (findItemInPage(page, &entry->curItem, &entry->offset))
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* walk to right */
|
|
||||||
while ((blkno = GinPageGetOpaque(page)->rightlink) != InvalidBlockNumber)
|
|
||||||
{
|
|
||||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
|
||||||
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
|
|
||||||
LockBuffer(entry->buffer, GIN_SHARE);
|
|
||||||
page = BufferGetPage(entry->buffer);
|
|
||||||
|
|
||||||
entry->offset = InvalidOffsetNumber;
|
|
||||||
if (findItemInPage(page, &entry->curItem, &entry->offset))
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* curItem and any greated items was deleted by concurrent vacuum, so
|
|
||||||
* we finished scan with currrent entry
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
stopScanEntry(GinScanEntry entry)
|
|
||||||
{
|
|
||||||
if (entry->buffer != InvalidBuffer)
|
|
||||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -209,11 +135,12 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
|
|||||||
{
|
{
|
||||||
uint32 i;
|
uint32 i;
|
||||||
|
|
||||||
for (i = 0; i < key->nentries; i++)
|
if (!key->firstCall)
|
||||||
startScanEntry(index, ginstate, key->scanEntry + i, key->firstCall);
|
return;
|
||||||
|
|
||||||
|
for (i = 0; i < key->nentries; i++)
|
||||||
|
startScanEntry(index, ginstate, key->scanEntry + i);
|
||||||
|
|
||||||
if (key->firstCall)
|
|
||||||
{
|
|
||||||
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
|
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
|
||||||
key->isFinished = FALSE;
|
key->isFinished = FALSE;
|
||||||
key->firstCall = FALSE;
|
key->firstCall = FALSE;
|
||||||
@ -238,16 +165,6 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
|
|||||||
key->scanEntry[i].reduceResult = TRUE;
|
key->scanEntry[i].reduceResult = TRUE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
stopScanKey(GinScanKey key)
|
|
||||||
{
|
|
||||||
uint32 i;
|
|
||||||
|
|
||||||
for (i = 0; i < key->nentries; i++)
|
|
||||||
stopScanEntry(key->scanEntry + i);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -260,44 +177,82 @@ startScan(IndexScanDesc scan)
|
|||||||
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
|
startScanKey(scan->indexRelation, &so->ginstate, so->keys + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
/*
|
||||||
stopScan(IndexScanDesc scan)
|
* Gets next ItemPointer from PostingTree. Note, that we copy
|
||||||
{
|
* page into GinScanEntry->list array and unlock page, but keep it pinned
|
||||||
uint32 i;
|
* to prevent interference with vacuum
|
||||||
GinScanOpaque so = (GinScanOpaque) scan->opaque;
|
*/
|
||||||
|
|
||||||
for (i = 0; i < so->nkeys; i++)
|
|
||||||
stopScanKey(so->keys + i);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
entryGetNextItem(Relation index, GinScanEntry entry)
|
entryGetNextItem(Relation index, GinScanEntry entry)
|
||||||
{
|
{
|
||||||
Page page = BufferGetPage(entry->buffer);
|
Page page;
|
||||||
|
BlockNumber blkno;
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
entry->offset++;
|
entry->offset++;
|
||||||
if (entry->offset <= GinPageGetOpaque(page)->maxoff && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber)
|
|
||||||
|
if (entry->offset <= entry->nlist)
|
||||||
{
|
{
|
||||||
entry->curItem = *(ItemPointerData *) GinDataPageGetItem(page, entry->offset);
|
entry->curItem = entry->list[entry->offset - 1];
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
LockBuffer(entry->buffer, GIN_SHARE);
|
||||||
|
page = BufferGetPage(entry->buffer);
|
||||||
|
for(;;)
|
||||||
{
|
{
|
||||||
BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
|
/*
|
||||||
|
* It's needed to go by right link. During that we should refind
|
||||||
|
* first ItemPointer greater that stored
|
||||||
|
*/
|
||||||
|
|
||||||
|
blkno = GinPageGetOpaque(page)->rightlink;
|
||||||
|
|
||||||
LockBuffer(entry->buffer, GIN_UNLOCK);
|
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||||
if (blkno == InvalidBlockNumber)
|
if (blkno == InvalidBlockNumber)
|
||||||
{
|
{
|
||||||
ReleaseBuffer(entry->buffer);
|
ReleaseBuffer(entry->buffer);
|
||||||
|
ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
|
||||||
entry->buffer = InvalidBuffer;
|
entry->buffer = InvalidBuffer;
|
||||||
entry->isFinished = TRUE;
|
entry->isFinished = TRUE;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
|
entry->buffer = ReleaseAndReadBuffer(entry->buffer, index, blkno);
|
||||||
LockBuffer(entry->buffer, GIN_SHARE);
|
LockBuffer(entry->buffer, GIN_SHARE);
|
||||||
|
page = BufferGetPage(entry->buffer);
|
||||||
|
|
||||||
entry->offset = InvalidOffsetNumber;
|
entry->offset = InvalidOffsetNumber;
|
||||||
entryGetNextItem(index, entry);
|
if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Found position equal to or greater than stored
|
||||||
|
*/
|
||||||
|
entry->nlist = GinPageGetOpaque(page)->maxoff;
|
||||||
|
memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
|
||||||
|
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
|
||||||
|
|
||||||
|
LockBuffer(entry->buffer, GIN_UNLOCK);
|
||||||
|
|
||||||
|
if ( !ItemPointerIsValid(&entry->curItem) ||
|
||||||
|
compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* First pages are deleted or empty, or we found exact position,
|
||||||
|
* so break inner loop and continue outer one.
|
||||||
|
*/
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find greater than entry->curItem position, store it.
|
||||||
|
*/
|
||||||
|
entry->curItem = entry->list[entry->offset - 1];
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -317,7 +272,7 @@ entryGetItem(Relation index, GinScanEntry entry)
|
|||||||
entry->isFinished = entry->master->isFinished;
|
entry->isFinished = entry->master->isFinished;
|
||||||
entry->curItem = entry->master->curItem;
|
entry->curItem = entry->master->curItem;
|
||||||
}
|
}
|
||||||
else if (entry->list)
|
else if (!BufferIsValid(entry->buffer))
|
||||||
{
|
{
|
||||||
entry->offset++;
|
entry->offset++;
|
||||||
if (entry->offset <= entry->nlist)
|
if (entry->offset <= entry->nlist)
|
||||||
@ -501,8 +456,6 @@ gingetmulti(PG_FUNCTION_ARGS)
|
|||||||
break;
|
break;
|
||||||
} while (*returned_tids < max_tids);
|
} while (*returned_tids < max_tids);
|
||||||
|
|
||||||
stopScan(scan);
|
|
||||||
|
|
||||||
PG_RETURN_BOOL(*returned_tids == max_tids);
|
PG_RETURN_BOOL(*returned_tids == max_tids);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -524,7 +477,6 @@ gingettuple(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
startScan(scan);
|
startScan(scan);
|
||||||
res = scanGetItem(scan, &scan->xs_ctup.t_self);
|
res = scanGetItem(scan, &scan->xs_ctup.t_self);
|
||||||
stopScan(scan);
|
|
||||||
|
|
||||||
PG_RETURN_BOOL(res);
|
PG_RETURN_BOOL(res);
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright (c) 2006-2008, PostgreSQL Global Development Group
|
* Copyright (c) 2006-2008, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.16 2008/01/01 19:45:56 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/access/gin.h,v 1.16.2.1 2008/04/22 17:54:19 teodor Exp $
|
||||||
*--------------------------------------------------------------------------
|
*--------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -356,14 +356,16 @@ typedef struct GinScanEntryData
|
|||||||
/* entry, got from extractQueryFn */
|
/* entry, got from extractQueryFn */
|
||||||
Datum entry;
|
Datum entry;
|
||||||
|
|
||||||
/* current ItemPointer to heap, its offset in buffer and buffer */
|
/* Current page in posting tree */
|
||||||
ItemPointerData curItem;
|
|
||||||
OffsetNumber offset;
|
|
||||||
Buffer buffer;
|
Buffer buffer;
|
||||||
|
|
||||||
/* in case of Posing list */
|
/* current ItemPointer to heap */
|
||||||
|
ItemPointerData curItem;
|
||||||
|
|
||||||
|
/* used for Posting list and one page in Posting tree */
|
||||||
ItemPointerData *list;
|
ItemPointerData *list;
|
||||||
uint32 nlist;
|
uint32 nlist;
|
||||||
|
OffsetNumber offset;
|
||||||
|
|
||||||
bool isFinished;
|
bool isFinished;
|
||||||
bool reduceResult;
|
bool reduceResult;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user