diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index 3590c8bad9a..bf93433b78d 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -190,19 +190,132 @@ extern MemoryContext BumpContextCreate(MemoryContext parent, #define SLAB_LARGE_BLOCK_SIZE (8 * 1024 * 1024) /* + * pg_memory_is_all_zeros + * * Test if a memory region starting at "ptr" and of size "len" is full of * zeroes. + * + * The test is divided into multiple cases for safety reason and multiple + * phases for efficiency. + * + * Case 1: len < sizeof(size_t) bytes, then byte-by-byte comparison. + * Case 2: len < (sizeof(size_t) * 8 - 1) bytes: + * - Phase 1: byte-by-byte comparison, until the pointer is aligned. + * - Phase 2: size_t comparisons, with aligned pointers, up to the last + * location possible. + * - Phase 3: byte-by-byte comparison, until the end location. + * Case 3: len >= (sizeof(size_t) * 8) bytes, same as case 2 except that an + * additional phase is placed between Phase 1 and Phase 2, with + * (8 * sizeof(size_t)) comparisons using bitwise OR to encourage + * compilers to use SIMD instructions if available, up to the last + * aligned location possible. + * + * Case 1 and Case 2 are mandatory to ensure that we won't read beyond the + * memory area. This is portable for 32-bit and 64-bit architectures. + * + * Caller must ensure that "ptr" is not NULL. */ static inline bool pg_memory_is_all_zeros(const void *ptr, size_t len) { - const char *p = (const char *) ptr; + const unsigned char *p = (const unsigned char *) ptr; + const unsigned char *end = &p[len]; + const unsigned char *aligned_end = (const unsigned char *) + ((uintptr_t) end & (~(sizeof(size_t) - 1))); - for (size_t i = 0; i < len; i++) + if (len < sizeof(size_t)) { - if (p[i] != 0) + while (p < end) + { + if (*p++ != 0) + return false; + } + return true; + } + + /* "len" in the [sizeof(size_t), sizeof(size_t) * 8 - 1] range */ + if (len < sizeof(size_t) * 8) + { + /* Compare bytes until the pointer "p" is aligned */ + while (((uintptr_t) p & (sizeof(size_t) - 1)) != 0) + { + if (p == end) + return true; + if (*p++ != 0) + return false; + } + + /* + * Compare remaining size_t-aligned chunks. + * + * There is no risk to read beyond the memory area, as "aligned_end" + * cannot be higher than "end". + */ + for (; p < aligned_end; p += sizeof(size_t)) + { + if (*(size_t *) p != 0) + return false; + } + + /* Compare remaining bytes until the end */ + while (p < end) + { + if (*p++ != 0) + return false; + } + return true; + } + + /* "len" in the [sizeof(size_t) * 8, inf) range */ + + /* Compare bytes until the pointer "p" is aligned */ + while (((uintptr_t) p & (sizeof(size_t) - 1)) != 0) + { + if (p == end) + return true; + + if (*p++ != 0) return false; } + + /* + * Compare 8 * sizeof(size_t) chunks at once. + * + * For performance reasons, we manually unroll this loop and purposefully + * use bitwise-ORs to combine each comparison. This prevents boolean + * short-circuiting and lets the compiler know that it's safe to access + * all 8 elements regardless of the result of the other comparisons. This + * seems to be enough to coax a few compilers into using SIMD + * instructions. + */ + for (; p < aligned_end - (sizeof(size_t) * 7); p += sizeof(size_t) * 8) + { + if ((((size_t *) p)[0] != 0) | (((size_t *) p)[1] != 0) | + (((size_t *) p)[2] != 0) | (((size_t *) p)[3] != 0) | + (((size_t *) p)[4] != 0) | (((size_t *) p)[5] != 0) | + (((size_t *) p)[6] != 0) | (((size_t *) p)[7] != 0)) + return false; + } + + /* + * Compare remaining size_t-aligned chunks. + * + * There is no risk to read beyond the memory area, as "aligned_end" + * cannot be higher than "end". + */ + for (; p < aligned_end; p += sizeof(size_t)) + { + if (*(size_t *) p != 0) + return false; + } + + /* Compare remaining bytes until the end */ + while (p < end) + { + if (*p++ != 0) + return false; + } + return true; }