diff --git a/src/backend/access/heap/README.HOT b/src/backend/access/heap/README.HOT index 82984efc756..dd34c540311 100644 --- a/src/backend/access/heap/README.HOT +++ b/src/backend/access/heap/README.HOT @@ -404,6 +404,12 @@ from the index, as well as ensuring that no one can see any inconsistent rows in a broken HOT chain (the first condition is stronger than the second). Finally, we can mark the index valid for searches. +Note that we do not need to set pg_index.indcheckxmin in this code path, +because we have outwaited any transactions that would need to avoid using +the index. (indcheckxmin is only needed because non-concurrent CREATE +INDEX doesn't want to wait; its stronger lock would create too much risk of +deadlock if it did.) + Limitations and Restrictions ---------------------------- diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 93eb2d1b495..4df2e758f8b 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -995,7 +995,7 @@ BuildIndexInfo(Relation index) /* other info */ ii->ii_Unique = indexStruct->indisunique; - ii->ii_ReadyForInserts = indexStruct->indisready; + ii->ii_ReadyForInserts = IndexIsReady(indexStruct); /* initialize index-build state to default */ ii->ii_Concurrent = false; @@ -1397,8 +1397,20 @@ index_build(Relation heapRelation, * index's usability horizon. Moreover, we *must not* try to change * the index's pg_index entry while reindexing pg_index itself, and this * optimization nicely prevents that. + * + * We also need not set indcheckxmin during a concurrent index build, + * because we won't set indisvalid true until all transactions that care + * about the broken HOT chains are gone. + * + * Therefore, this code path can only be taken during non-concurrent + * CREATE INDEX. Thus the fact that heap_update will set the pg_index + * tuple's xmin doesn't matter, because that tuple was created in the + * current transaction anyway. That also means we don't need to worry + * about any concurrent readers of the tuple; no other transaction can see + * it yet. */ - if (indexInfo->ii_BrokenHotChain && !isreindex) + if (indexInfo->ii_BrokenHotChain && !isreindex && + !indexInfo->ii_Concurrent) { Oid indexId = RelationGetRelid(indexRelation); Relation pg_index; @@ -2208,6 +2220,66 @@ validate_index_heapscan(Relation heapRelation, } +/* + * index_set_state_flags - adjust pg_index state flags + * + * This is used during CREATE INDEX CONCURRENTLY to adjust the pg_index + * flags that denote the index's state. We must use an in-place update of + * the pg_index tuple, because we do not have exclusive lock on the parent + * table and so other sessions might concurrently be doing SnapshotNow scans + * of pg_index to identify the table's indexes. A transactional update would + * risk somebody not seeing the index at all. Because the update is not + * transactional and will not roll back on error, this must only be used as + * the last step in a transaction that has not made any transactional catalog + * updates! + * + * Note that heap_inplace_update does send a cache inval message for the + * tuple, so other sessions will hear about the update as soon as we commit. + */ +void +index_set_state_flags(Oid indexId, IndexStateFlagsAction action) +{ + Relation pg_index; + HeapTuple indexTuple; + Form_pg_index indexForm; + + /* Assert that current xact hasn't done any transactional updates */ + Assert(GetTopTransactionIdIfAny() == InvalidTransactionId); + + /* Open pg_index and fetch a writable copy of the index's tuple */ + pg_index = heap_open(IndexRelationId, RowExclusiveLock); + + indexTuple = SearchSysCacheCopy(INDEXRELID, + ObjectIdGetDatum(indexId), + 0, 0, 0); + if (!HeapTupleIsValid(indexTuple)) + elog(ERROR, "cache lookup failed for index %u", indexId); + indexForm = (Form_pg_index) GETSTRUCT(indexTuple); + + /* Perform the requested state change on the copy */ + switch (action) + { + case INDEX_CREATE_SET_READY: + /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */ + Assert(!indexForm->indisready); + Assert(!indexForm->indisvalid); + indexForm->indisready = true; + break; + case INDEX_CREATE_SET_VALID: + /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */ + Assert(indexForm->indisready); + Assert(!indexForm->indisvalid); + indexForm->indisvalid = true; + break; + } + + /* ... and write it back in-place */ + heap_inplace_update(pg_index, indexTuple); + + heap_close(pg_index, RowExclusiveLock); +} + + /* * IndexGetRelation: given an index's relation OID, get the OID of the * relation it is an index on. Uses the system cache. @@ -2357,6 +2429,15 @@ reindex_index(Oid indexId) indexForm->indisready = true; simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); CatalogUpdateIndexes(pg_index, indexTuple); + + /* + * Invalidate the relcache for the table, so that after we commit + * all sessions will refresh the table's index list. This ensures + * that if anyone misses seeing the pg_index row during this + * update, they'll refresh their list before attempting any update + * on the table. + */ + CacheInvalidateRelcache(heapRelation); } heap_close(pg_index, RowExclusiveLock); diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 06a6bfc1a30..74a8c7f71a6 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -431,7 +431,7 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck) * might put recently-dead tuples out-of-order in the new table, and there * is little harm in that.) */ - if (!OldIndex->rd_index->indisvalid) + if (!IndexIsValid(OldIndex->rd_index)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster on invalid index \"%s\"", @@ -473,6 +473,11 @@ check_index_is_clusterable(Relation OldHeap, Oid indexOid, bool recheck) * mark_index_clustered: mark the specified index as the one clustered on * * With indexOid == InvalidOid, will mark all indexes of rel not-clustered. + * + * Note: we do transactional updates of the pg_index rows, which are unsafe + * against concurrent SnapshotNow scans of pg_index. Therefore this is unsafe + * to execute with less than full exclusive lock on the parent table; + * otherwise concurrent executions of RelationGetIndexList could miss indexes. */ void mark_index_clustered(Relation rel, Oid indexOid) @@ -533,6 +538,9 @@ mark_index_clustered(Relation rel, Oid indexOid) } else if (thisIndexOid == indexOid) { + /* this was checked earlier, but let's be real sure */ + if (!IndexIsValid(indexForm)) + elog(ERROR, "cannot cluster on invalid index %u", indexOid); indexForm->indisclustered = true; simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); CatalogUpdateIndexes(pg_index, indexTuple); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 6052eed7d9e..d7ce75308c6 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -131,9 +131,6 @@ DefineIndex(RangeVar *heapRelation, LockRelId heaprelid; LOCKTAG heaplocktag; Snapshot snapshot; - Relation pg_index; - HeapTuple indexTuple; - Form_pg_index indexForm; /* * count attributes in index @@ -559,24 +556,7 @@ DefineIndex(RangeVar *heapRelation, * commit this transaction, any new transactions that open the table must * insert new entries into the index for insertions and non-HOT updates. */ - pg_index = heap_open(IndexRelationId, RowExclusiveLock); - - indexTuple = SearchSysCacheCopy(INDEXRELID, - ObjectIdGetDatum(indexRelationId), - 0, 0, 0); - if (!HeapTupleIsValid(indexTuple)) - elog(ERROR, "cache lookup failed for index %u", indexRelationId); - indexForm = (Form_pg_index) GETSTRUCT(indexTuple); - - Assert(!indexForm->indisready); - Assert(!indexForm->indisvalid); - - indexForm->indisready = true; - - simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); - CatalogUpdateIndexes(pg_index, indexTuple); - - heap_close(pg_index, RowExclusiveLock); + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY); /* * Commit this transaction to make the indisready update visible. @@ -654,24 +634,7 @@ DefineIndex(RangeVar *heapRelation, /* * Index can now be marked valid -- update its pg_index entry */ - pg_index = heap_open(IndexRelationId, RowExclusiveLock); - - indexTuple = SearchSysCacheCopy(INDEXRELID, - ObjectIdGetDatum(indexRelationId), - 0, 0, 0); - if (!HeapTupleIsValid(indexTuple)) - elog(ERROR, "cache lookup failed for index %u", indexRelationId); - indexForm = (Form_pg_index) GETSTRUCT(indexTuple); - - Assert(indexForm->indisready); - Assert(!indexForm->indisvalid); - - indexForm->indisvalid = true; - - simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); - CatalogUpdateIndexes(pg_index, indexTuple); - - heap_close(pg_index, RowExclusiveLock); + index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID); /* * The pg_index update will cause backends (including this one) to update @@ -679,7 +642,7 @@ DefineIndex(RangeVar *heapRelation, * relcache inval on the parent table to force replanning of cached plans. * Otherwise existing sessions might fail to use the new index where it * would be useful. (Note that our earlier commits did not create reasons - * to replan; relcache flush on the index itself was sufficient.) + * to replan; so relcache flush on the index itself was sufficient.) */ CacheInvalidateRelcacheByRelid(heaprelid.relId); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 090ef8010aa..a5bd0babba5 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -3228,6 +3228,8 @@ ATExecDropNotNull(Relation rel, const char *colName) /* * Check that the attribute is not in a primary key + * + * Note: we'll throw error even if the pkey index is not valid. */ /* Loop over all indexes on the relation */ @@ -4226,7 +4228,7 @@ transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid, /* * Get the list of index OIDs for the table from the relcache, and look up * each one in the pg_index syscache until we find one marked primary key - * (hopefully there isn't more than one such). + * (hopefully there isn't more than one such). Insist it's valid, too. */ *indexOid = InvalidOid; @@ -4242,7 +4244,7 @@ transformFkeyGetPrimaryKey(Relation pkrel, Oid *indexOid, if (!HeapTupleIsValid(indexTuple)) elog(ERROR, "cache lookup failed for index %u", indexoid); indexStruct = (Form_pg_index) GETSTRUCT(indexTuple); - if (indexStruct->indisprimary) + if (indexStruct->indisprimary && IndexIsValid(indexStruct)) { *indexOid = indexoid; break; @@ -4330,10 +4332,12 @@ transformFkeyCheckAttrs(Relation pkrel, /* * Must have the right number of columns; must be unique and not a - * partial index; forget it if there are any expressions, too + * partial index; forget it if there are any expressions, too. Invalid + * indexes are out as well. */ if (indexStruct->indnatts == numattrs && indexStruct->indisunique && + IndexIsValid(indexStruct) && heap_attisnull(indexTuple, Anum_pg_index_indpred) && heap_attisnull(indexTuple, Anum_pg_index_indexprs)) { diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index b2dfa870875..a0b961fc1d2 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -3672,9 +3672,16 @@ vac_cmp_vtlinks(const void *left, const void *right) /* - * Open all the indexes of the given relation, obtaining the specified kind - * of lock on each. Return an array of Relation pointers for the indexes - * into *Irel, and the number of indexes into *nindexes. + * Open all the vacuumable indexes of the given relation, obtaining the + * specified kind of lock on each. Return an array of Relation pointers for + * the indexes into *Irel, and the number of indexes into *nindexes. + * + * We consider an index vacuumable if it is marked insertable (IndexIsReady). + * If it isn't, probably a CREATE INDEX CONCURRENTLY command failed early in + * execution, and what we have is too corrupt to be processable. We will + * vacuum even if the index isn't indisvalid; this is important because in a + * unique index, uniqueness checks will be performed anyway and had better not + * hit dangling index pointers. */ void vac_open_indexes(Relation relation, LOCKMODE lockmode, @@ -3688,21 +3695,30 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode, indexoidlist = RelationGetIndexList(relation); - *nindexes = list_length(indexoidlist); + /* allocate enough memory for all indexes */ + i = list_length(indexoidlist); - if (*nindexes > 0) - *Irel = (Relation *) palloc(*nindexes * sizeof(Relation)); + if (i > 0) + *Irel = (Relation *) palloc(i * sizeof(Relation)); else *Irel = NULL; + /* collect just the ready indexes */ i = 0; foreach(indexoidscan, indexoidlist) { Oid indexoid = lfirst_oid(indexoidscan); + Relation indrel; - (*Irel)[i++] = index_open(indexoid, lockmode); + indrel = index_open(indexoid, lockmode); + if (IndexIsReady(indrel->rd_index)) + (*Irel)[i++] = indrel; + else + index_close(indrel, lockmode); } + *nindexes = i; + list_free(indexoidlist); } diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index b6a82566a37..cde77d724c4 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -921,6 +921,9 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo) /* * For each index, open the index relation and save pg_index info. We * acquire RowExclusiveLock, signifying we will update the index. + * + * Note: we do this even if the index is not IndexIsReady; it's not worth + * the trouble to optimize for the case where it isn't. */ i = 0; foreach(l, indexoidlist) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index cbcdc5b8f40..bed18024309 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -159,9 +159,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * Ignore invalid indexes, since they can't safely be used for * queries. Note that this is OK because the data structure we * are constructing is only used by the planner --- the executor - * still needs to insert into "invalid" indexes! + * still needs to insert into "invalid" indexes, if they're marked + * IndexIsReady. */ - if (!index->indisvalid) + if (!IndexIsValid(index)) { index_close(indexRelation, NoLock); continue; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index c28db129381..f3d7fd53c45 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1700,9 +1700,20 @@ RelationReloadIndexInfo(Relation relation) RelationGetRelid(relation)); index = (Form_pg_index) GETSTRUCT(tuple); + /* + * Basically, let's just copy all the bool fields. There are one or + * two of these that can't actually change in the current code, but + * it's not worth it to track exactly which ones they are. None of + * the array fields are allowed to change, though. + */ + relation->rd_index->indisunique = index->indisunique; + relation->rd_index->indisprimary = index->indisprimary; + relation->rd_index->indisclustered = index->indisclustered; relation->rd_index->indisvalid = index->indisvalid; relation->rd_index->indcheckxmin = index->indcheckxmin; relation->rd_index->indisready = index->indisready; + + /* Copy xmin too, as that is needed to make sense of indcheckxmin */ HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data, HeapTupleHeaderGetXmin(tuple->t_data)); @@ -3077,7 +3088,8 @@ RelationGetIndexList(Relation relation) result = insert_ordered_oid(result, index->indexrelid); /* Check to see if it is a unique, non-partial btree index on OID */ - if (index->indnatts == 1 && + if (IndexIsValid(index) && + index->indnatts == 1 && index->indisunique && index->indkey.values[0] == ObjectIdAttributeNumber && index->indclass.values[0] == OID_BTREE_OPS_OID && @@ -3384,6 +3396,11 @@ RelationGetIndexAttrBitmap(Relation relation) /* * For each index, add referenced attributes to indexattrs. + * + * Note: we consider all indexes returned by RelationGetIndexList, even if + * they are not indisready or indisvalid. This is important because an + * index for which CREATE INDEX CONCURRENTLY has just started must be + * included in HOT-safety decisions (see README.HOT). */ indexattrs = NULL; foreach(l, indexoidlist) diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index a92201546b1..21123191a80 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -27,6 +27,13 @@ typedef void (*IndexBuildCallback) (Relation index, bool tupleIsAlive, void *state); +/* Action code for index_set_state_flags */ +typedef enum +{ + INDEX_CREATE_SET_READY, + INDEX_CREATE_SET_VALID +} IndexStateFlagsAction; + extern Oid index_create(Oid heapRelationId, const char *indexRelationName, @@ -70,6 +77,8 @@ extern double IndexBuildHeapScan(Relation heapRelation, extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); +extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action); + extern void reindex_index(Oid indexId); extern bool reindex_relation(Oid relid, bool toast_too); diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index e06505babb3..81bbc57c66e 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -91,4 +91,12 @@ typedef FormData_pg_index *Form_pg_index; #define INDOPTION_DESC 0x0001 /* values are in reverse order */ #define INDOPTION_NULLS_FIRST 0x0002 /* NULLs are first instead of last */ +/* + * Use of these macros is recommended over direct examination of the state + * flag columns where possible; this allows source code compatibility with + * 9.2 and up. + */ +#define IndexIsValid(indexForm) ((indexForm)->indisvalid) +#define IndexIsReady(indexForm) ((indexForm)->indisready) + #endif /* PG_INDEX_H */