diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 3590c8bad9a..bf93433b78d 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -190,19 +190,132 @@ extern MemoryContext BumpContextCreate(MemoryContext parent,
 #define SLAB_LARGE_BLOCK_SIZE		(8 * 1024 * 1024)
 
 /*
+ * pg_memory_is_all_zeros
+ *
  * Test if a memory region starting at "ptr" and of size "len" is full of
  * zeroes.
+ *
+ * The test is divided into multiple cases for safety reason and multiple
+ * phases for efficiency.
+ *
+ * Case 1: len < sizeof(size_t) bytes, then byte-by-byte comparison.
+ * Case 2: len < (sizeof(size_t) * 8 - 1) bytes:
+ *       - Phase 1: byte-by-byte comparison, until the pointer is aligned.
+ *       - Phase 2: size_t comparisons, with aligned pointers, up to the last
+ *                  location possible.
+ *       - Phase 3: byte-by-byte comparison, until the end location.
+ * Case 3: len >= (sizeof(size_t) * 8) bytes, same as case 2 except that an
+ *         additional phase is placed between Phase 1 and Phase 2, with
+ *         (8 * sizeof(size_t)) comparisons using bitwise OR to encourage
+ *         compilers to use SIMD instructions if available, up to the last
+ *         aligned location possible.
+ *
+ * Case 1 and Case 2 are mandatory to ensure that we won't read beyond the
+ * memory area.  This is portable for 32-bit and 64-bit architectures.
+ *
+ * Caller must ensure that "ptr" is not NULL.
  */
 static inline bool
 pg_memory_is_all_zeros(const void *ptr, size_t len)
 {
-	const char *p = (const char *) ptr;
+	const unsigned char *p = (const unsigned char *) ptr;
+	const unsigned char *end = &p[len];
+	const unsigned char *aligned_end = (const unsigned char *)
+		((uintptr_t) end & (~(sizeof(size_t) - 1)));
 
-	for (size_t i = 0; i < len; i++)
+	if (len < sizeof(size_t))
 	{
-		if (p[i] != 0)
+		while (p < end)
+		{
+			if (*p++ != 0)
+				return false;
+		}
+		return true;
+	}
+
+	/* "len" in the [sizeof(size_t), sizeof(size_t) * 8 - 1] range */
+	if (len < sizeof(size_t) * 8)
+	{
+		/* Compare bytes until the pointer "p" is aligned */
+		while (((uintptr_t) p & (sizeof(size_t) - 1)) != 0)
+		{
+			if (p == end)
+				return true;
+			if (*p++ != 0)
+				return false;
+		}
+
+		/*
+		 * Compare remaining size_t-aligned chunks.
+		 *
+		 * There is no risk to read beyond the memory area, as "aligned_end"
+		 * cannot be higher than "end".
+		 */
+		for (; p < aligned_end; p += sizeof(size_t))
+		{
+			if (*(size_t *) p != 0)
+				return false;
+		}
+
+		/* Compare remaining bytes until the end */
+		while (p < end)
+		{
+			if (*p++ != 0)
+				return false;
+		}
+		return true;
+	}
+
+	/* "len" in the [sizeof(size_t) * 8, inf) range */
+
+	/* Compare bytes until the pointer "p" is aligned */
+	while (((uintptr_t) p & (sizeof(size_t) - 1)) != 0)
+	{
+		if (p == end)
+			return true;
+
+		if (*p++ != 0)
 			return false;
 	}
+
+	/*
+	 * Compare 8 * sizeof(size_t) chunks at once.
+	 *
+	 * For performance reasons, we manually unroll this loop and purposefully
+	 * use bitwise-ORs to combine each comparison.  This prevents boolean
+	 * short-circuiting and lets the compiler know that it's safe to access
+	 * all 8 elements regardless of the result of the other comparisons.  This
+	 * seems to be enough to coax a few compilers into using SIMD
+	 * instructions.
+	 */
+	for (; p < aligned_end - (sizeof(size_t) * 7); p += sizeof(size_t) * 8)
+	{
+		if ((((size_t *) p)[0] != 0) | (((size_t *) p)[1] != 0) |
+			(((size_t *) p)[2] != 0) | (((size_t *) p)[3] != 0) |
+			(((size_t *) p)[4] != 0) | (((size_t *) p)[5] != 0) |
+			(((size_t *) p)[6] != 0) | (((size_t *) p)[7] != 0))
+			return false;
+	}
+
+	/*
+	 * Compare remaining size_t-aligned chunks.
+	 *
+	 * There is no risk to read beyond the memory area, as "aligned_end"
+	 * cannot be higher than "end".
+	 */
+	for (; p < aligned_end; p += sizeof(size_t))
+	{
+		if (*(size_t *) p != 0)
+			return false;
+	}
+
+	/* Compare remaining bytes until the end */
+	while (p < end)
+	{
+		if (*p++ != 0)
+			return false;
+	}
+
 	return true;
 }