diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c index 32c724e5ce2..f719059f0a7 100644 --- a/contrib/pg_prewarm/pg_prewarm.c +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -37,7 +37,7 @@ typedef enum PREWARM_BUFFER } PrewarmType; -static char blockbuffer[BLCKSZ]; +static PGAlignedBlock blockbuffer; /* * pg_prewarm(regclass, mode text, fork text, @@ -179,7 +179,7 @@ pg_prewarm(PG_FUNCTION_ARGS) for (block = first_block; block <= last_block; ++block) { CHECK_FOR_INTERRUPTS(); - smgrread(rel->rd_smgr, forkNumber, block, blockbuffer); + smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data); ++blocks_done; } } diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index bdf1f2e5889..f013b9fdeb2 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -633,7 +633,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, /* these must be static so they can be returned to caller */ static ginxlogSplitEntry data; - static char tupstore[2 * BLCKSZ]; + static PGAlignedBlock tupstore[2]; entryPreparePage(btree, lpage, off, insertData, updateblkno); @@ -642,7 +642,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, * one after another in a temporary workspace. */ maxoff = PageGetMaxOffsetNumber(lpage); - ptr = tupstore; + ptr = tupstore[0].data; for (i = FirstOffsetNumber; i <= maxoff; i++) { if (i == off) @@ -667,7 +667,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, ptr += size; totalsize += size + sizeof(ItemIdData); } - tupstoresize = ptr - tupstore; + tupstoresize = ptr - tupstore[0].data; /* * Initialize the left and right pages, and copy all the tuples back to @@ -676,7 +676,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); - ptr = tupstore; + ptr = tupstore[0].data; maxoff++; lsize = 0; @@ -715,7 +715,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf, rdata[0].next = &rdata[1]; rdata[1].buffer = InvalidBuffer; - rdata[1].data = tupstore; + rdata[1].data = tupstore[0].data; rdata[1].len = tupstoresize; rdata[1].next = NULL; diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index 7888b896ab3..a54d7c4f363 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -52,18 +52,15 @@ writeListPage(Relation index, Buffer buffer, size = 0; OffsetNumber l, off; - char *workspace; + PGAlignedBlock workspace; char *ptr; - /* workspace could be a local array; we use palloc for alignment */ - workspace = palloc(BLCKSZ); - START_CRIT_SECTION(); GinInitBuffer(buffer, GIN_LIST); off = FirstOffsetNumber; - ptr = workspace; + ptr = workspace.data; for (i = 0; i < ntuples; i++) { @@ -120,7 +117,7 @@ writeListPage(Relation index, Buffer buffer, rdata[0].next = rdata + 1; rdata[1].buffer = InvalidBuffer; - rdata[1].data = workspace; + rdata[1].data = workspace.data; rdata[1].len = size; rdata[1].next = NULL; @@ -135,8 +132,6 @@ writeListPage(Relation index, Buffer buffer, END_CRIT_SECTION(); - pfree(workspace); - return freesize; } diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c index 9ab2b06cc9f..38e719ecfac 100644 --- a/src/backend/access/hash/hashpage.c +++ b/src/backend/access/hash/hashpage.c @@ -710,7 +710,7 @@ static bool _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) { BlockNumber lastblock; - char zerobuf[BLCKSZ]; + PGAlignedBlock zerobuf; lastblock = firstblock + nblocks - 1; @@ -721,10 +721,10 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) if (lastblock < firstblock || lastblock == InvalidBlockNumber) return false; - MemSet(zerobuf, 0, sizeof(zerobuf)); + MemSet(zerobuf.data, 0, sizeof(zerobuf)); RelationOpenSmgr(rel); - smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false); + smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf.data, false); return true; } diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 99165b1e487..ed8dbae0d24 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2318,7 +2318,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, HeapTuple *heaptuples; int i; int ndone; - char *scratch = NULL; + PGAlignedBlock scratch; Page page; bool needwal; Size saveFreeSpace; @@ -2335,14 +2335,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, heaptuples[i] = heap_prepare_insert(relation, tuples[i], xid, cid, options); - /* - * Allocate some memory to use for constructing the WAL record. Using - * palloc() within a critical section is not safe, so we allocate this - * beforehand. - */ - if (needwal) - scratch = palloc(BLCKSZ); - /* * We're about to do the actual inserts -- but check for conflict first, * to minimize the possibility of having to roll back work we've just @@ -2427,7 +2419,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, uint8 info = XLOG_HEAP2_MULTI_INSERT; char *tupledata; int totaldatalen; - char *scratchptr = scratch; + char *scratchptr = scratch.data; bool init; /* @@ -2494,10 +2486,10 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, log_heap_new_cid(relation, heaptup); } totaldatalen = scratchptr - tupledata; - Assert((scratchptr - scratch) < BLCKSZ); + Assert((scratchptr - scratch.data) < BLCKSZ); rdata[0].data = (char *) xlrec; - rdata[0].len = tupledata - scratch; + rdata[0].len = tupledata - scratch.data; rdata[0].buffer = InvalidBuffer; rdata[0].next = &rdata[1]; diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 6301a773aaf..35b76967571 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -610,10 +610,9 @@ static void vm_extend(Relation rel, BlockNumber vm_nblocks) { BlockNumber vm_nblocks_now; - Page pg; + PGAlignedBlock pg; - pg = (Page) palloc(BLCKSZ); - PageInit(pg, BLCKSZ, 0); + PageInit((Page) pg.data, BLCKSZ, 0); /* * We use the relation extension lock to lock out other backends trying to @@ -644,10 +643,10 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) /* Now extend the file */ while (vm_nblocks_now < vm_nblocks) { - PageSetChecksumInplace(pg, vm_nblocks_now); + PageSetChecksumInplace((Page) pg.data, vm_nblocks_now); smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now, - (char *) pg, false); + pg.data, false); vm_nblocks_now++; } @@ -664,6 +663,4 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now; UnlockRelationForExtension(rel, ExclusiveLock); - - pfree(pg); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 621dcb451fd..5e3b6caecf5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -3173,8 +3173,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; - char zbuffer_raw[XLOG_BLCKSZ + MAXIMUM_ALIGNOF]; - char *zbuffer; + PGAlignedXLogBlock zbuffer; XLogSegNo installed_segno; int max_advance; int fd; @@ -3228,16 +3227,12 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) * fsync below) that all the indirect blocks are down on disk. Therefore, * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the * log file. - * - * Note: ensure the buffer is reasonably well-aligned; this may save a few - * cycles transferring data to the kernel. */ - zbuffer = (char *) MAXALIGN(zbuffer_raw); - memset(zbuffer, 0, XLOG_BLCKSZ); + memset(zbuffer.data, 0, XLOG_BLCKSZ); for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) { errno = 0; - if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) + if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) { int save_errno = errno; @@ -3328,7 +3323,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno) { char path[MAXPGPATH]; char tmppath[MAXPGPATH]; - char buffer[XLOG_BLCKSZ]; + PGAlignedXLogBlock buffer; int srcfd; int fd; int nbytes; @@ -3364,7 +3359,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno) for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer)) { errno = 0; - if ((int) read(srcfd, buffer, sizeof(buffer)) != (int) sizeof(buffer)) + if ((int) read(srcfd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer)) { if (errno != 0) ereport(ERROR, @@ -3375,7 +3370,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno) (errmsg("not enough data in file \"%s\"", path))); } errno = 0; - if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer)) + if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer)) { int save_errno = errno; @@ -9200,7 +9195,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) */ if (XLogCheckBuffer(rdata, false, &lsn, &bkpb)) { - char copied_buffer[BLCKSZ]; + PGAlignedBlock copied_buffer; char *origdata = (char *) BufferGetBlock(buffer); /* @@ -9212,8 +9207,8 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) * and hole_offset to 0; so the following code is safe for either * case. */ - memcpy(copied_buffer, origdata, bkpb.hole_offset); - memcpy(copied_buffer + bkpb.hole_offset, + memcpy(copied_buffer.data, origdata, bkpb.hole_offset); + memcpy(copied_buffer.data + bkpb.hole_offset, origdata + bkpb.hole_offset + bkpb.hole_length, BLCKSZ - bkpb.hole_offset - bkpb.hole_length); @@ -9228,7 +9223,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std) /* * Save copy of the buffer. */ - rdata[1].data = copied_buffer; + rdata[1].data = copied_buffer.data; rdata[1].len = BLCKSZ - bkpb.hole_length; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index f2b6a3fb6aa..584995e6f5c 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -9486,21 +9486,14 @@ static void copy_relation_data(SMgrRelation src, SMgrRelation dst, ForkNumber forkNum, char relpersistence) { - char *buf; + PGAlignedBlock buf; Page page; bool use_wal; bool copying_initfork; BlockNumber nblocks; BlockNumber blkno; - /* - * palloc the buffer so that it's MAXALIGN'd. If it were just a local - * char[] array, the compiler might align it on any byte boundary, which - * can seriously hurt transfer speed to and from the kernel; not to - * mention possibly making log_newpage's accesses to the page header fail. - */ - buf = (char *) palloc(BLCKSZ); - page = (Page) buf; + page = (Page) buf.data; /* * The init fork for an unlogged relation in many respects has to be @@ -9524,7 +9517,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, /* If we got a cancel signal during the copy of the data, quit */ CHECK_FOR_INTERRUPTS(); - smgrread(src, forkNum, blkno, buf); + smgrread(src, forkNum, blkno, buf.data); if (!PageIsVerified(page, blkno)) ereport(ERROR, @@ -9550,11 +9543,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst, * rel, because there's no need for smgr to schedule an fsync for this * write; we'll do it ourselves below. */ - smgrextend(dst, forkNum, blkno, buf, true); + smgrextend(dst, forkNum, blkno, buf.data, true); } - pfree(buf); - /* * If the rel is WAL-logged, must fsync before commit. We use heap_sync * to ensure that the toast table gets fsync'd too. (For a temp or diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 7737c4ddbef..4c7d55488ed 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -494,16 +494,16 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd) bytesleft = histfilelen; while (bytesleft > 0) { - char rbuf[BLCKSZ]; + PGAlignedBlock rbuf; int nread; - nread = read(fd, rbuf, sizeof(rbuf)); + nread = read(fd, rbuf.data, sizeof(rbuf)); if (nread <= 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", path))); - pq_sendbytes(&buf, rbuf, nread); + pq_sendbytes(&buf, rbuf.data, nread); bytesleft -= nread; } CloseTransientFile(fd); diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 0f007c82122..bc0f59f783c 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -86,7 +86,7 @@ struct BufFile off_t curOffset; /* offset part of current pos */ int pos; /* next read/write position in buffer */ int nbytes; /* total # of valid bytes in buffer */ - char buffer[BLCKSZ]; + PGAlignedBlock buffer; }; static BufFile *makeBufFile(File firstfile); @@ -254,7 +254,7 @@ BufFileLoadBuffer(BufFile *file) /* * Read whatever we can get, up to a full bufferload. */ - file->nbytes = FileRead(thisfile, file->buffer, sizeof(file->buffer)); + file->nbytes = FileRead(thisfile, file->buffer.data, sizeof(file->buffer)); if (file->nbytes < 0) file->nbytes = 0; file->offsets[file->curFile] += file->nbytes; @@ -317,7 +317,7 @@ BufFileDumpBuffer(BufFile *file) return; /* seek failed, give up */ file->offsets[file->curFile] = file->curOffset; } - bytestowrite = FileWrite(thisfile, file->buffer + wpos, bytestowrite); + bytestowrite = FileWrite(thisfile, file->buffer.data + wpos, bytestowrite); if (bytestowrite <= 0) return; /* failed to write */ file->offsets[file->curFile] += bytestowrite; @@ -385,7 +385,7 @@ BufFileRead(BufFile *file, void *ptr, size_t size) nthistime = size; Assert(nthistime > 0); - memcpy(ptr, file->buffer + file->pos, nthistime); + memcpy(ptr, file->buffer.data + file->pos, nthistime); file->pos += nthistime; ptr = (void *) ((char *) ptr + nthistime); @@ -432,7 +432,7 @@ BufFileWrite(BufFile *file, void *ptr, size_t size) nthistime = size; Assert(nthistime > 0); - memcpy(file->buffer + file->pos, ptr, nthistime); + memcpy(file->buffer.data + file->pos, ptr, nthistime); file->dirty = true; file->pos += nthistime; diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 7eb9b4b09e8..1fa7679a4a0 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -588,10 +588,9 @@ static void fsm_extend(Relation rel, BlockNumber fsm_nblocks) { BlockNumber fsm_nblocks_now; - Page pg; + PGAlignedBlock pg; - pg = (Page) palloc(BLCKSZ); - PageInit(pg, BLCKSZ, 0); + PageInit((Page) pg.data, BLCKSZ, 0); /* * We use the relation extension lock to lock out other backends trying to @@ -621,10 +620,10 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) while (fsm_nblocks_now < fsm_nblocks) { - PageSetChecksumInplace(pg, fsm_nblocks_now); + PageSetChecksumInplace((Page) pg.data, fsm_nblocks_now); smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now, - (char *) pg, false); + pg.data, false); fsm_nblocks_now++; } @@ -632,8 +631,6 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now; UnlockRelationForExtension(rel, ExclusiveLock); - - pfree(pg); } /* diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index 5e1f3d40496..2fbd25c0fd0 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -88,7 +88,7 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, int f; char fn[MAXPGPATH]; struct stat statbuf; - char *zerobuf; + PGAlignedXLogBlock zerobuf; int bytes; XLogSegNo segno; @@ -134,11 +134,11 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, } /* New, empty, file. So pad it to 16Mb with zeroes */ - zerobuf = pg_malloc0(XLOG_BLCKSZ); + memset(zerobuf.data, 0, XLOG_BLCKSZ); for (bytes = 0; bytes < XLogSegSize; bytes += XLOG_BLCKSZ) { errno = 0; - if (write(f, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (write(f, zerobuf.data, XLOG_BLCKSZ) != XLOG_BLCKSZ) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) @@ -146,13 +146,11 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, fprintf(stderr, _("%s: could not pad transaction log file \"%s\": %s\n"), progname, fn, strerror(errno)); - free(zerobuf); close(f); unlink(fn); return false; } } - free(zerobuf); if (lseek(f, SEEK_SET, 0) != 0) { diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c index 78ef243ad99..d037325c1d3 100644 --- a/src/bin/pg_resetxlog/pg_resetxlog.c +++ b/src/bin/pg_resetxlog/pg_resetxlog.c @@ -1060,7 +1060,7 @@ KillExistingArchiveStatus(void) static void WriteEmptyXLOG(void) { - char *buffer; + PGAlignedXLogBlock buffer; XLogPageHeader page; XLogLongPageHeader longpage; XLogRecord *record; @@ -1069,12 +1069,10 @@ WriteEmptyXLOG(void) int fd; int nbytes; - /* Use malloc() to ensure buffer is MAXALIGNED */ - buffer = (char *) pg_malloc(XLOG_BLCKSZ); - page = (XLogPageHeader) buffer; - memset(buffer, 0, XLOG_BLCKSZ); + memset(buffer.data, 0, XLOG_BLCKSZ); /* Set up the XLOG page header */ + page = (XLogPageHeader) buffer.data; page->xlp_magic = XLOG_PAGE_MAGIC; page->xlp_info = XLP_LONG_HEADER; page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID; @@ -1116,7 +1114,7 @@ WriteEmptyXLOG(void) } errno = 0; - if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ) { /* if write didn't set errno, assume problem is no disk space */ if (errno == 0) @@ -1127,11 +1125,11 @@ WriteEmptyXLOG(void) } /* Fill the rest of the file with zeroes */ - memset(buffer, 0, XLOG_BLCKSZ); + memset(buffer.data, 0, XLOG_BLCKSZ); for (nbytes = XLOG_BLCKSZ; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) { errno = 0; - if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ) + if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ) { if (errno == 0) errno = ENOSPC; diff --git a/src/include/c.h b/src/include/c.h index e6a1f4b3291..6836b0a0f56 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -867,6 +867,32 @@ typedef NameData *Name; * ---------------------------------------------------------------- */ +/* + * Use this, not "char buf[BLCKSZ]", to declare a field or local variable + * holding a page buffer, if that page might be accessed as a page and not + * just a string of bytes. Otherwise the variable might be under-aligned, + * causing problems on alignment-picky hardware. (In some places, we use + * this to declare buffers even though we only pass them to read() and + * write(), because copying to/from aligned buffers is usually faster than + * using unaligned buffers.) We include both "double" and "int64" in the + * union to ensure that the compiler knows the value must be MAXALIGN'ed + * (cf. configure's computation of MAXIMUM_ALIGNOF). + */ +typedef union PGAlignedBlock +{ + char data[BLCKSZ]; + double force_align_d; + int64 force_align_i64; +} PGAlignedBlock; + +/* Same, but for an XLOG_BLCKSZ-sized buffer */ +typedef union PGAlignedXLogBlock +{ + char data[XLOG_BLCKSZ]; + double force_align_d; + int64 force_align_i64; +} PGAlignedXLogBlock; + /* msb for char */ #define HIGHBIT (0x80) #define IS_HIGHBIT_SET(ch) ((unsigned char)(ch) & HIGHBIT)