diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 9e909758166..93b7367d8fa 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -68,12 +68,12 @@ IF(MSVC_INTEL) ENDIF() ELSEIF(MSVC_ARM64) SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_arm64.c) - ADD_DEFINITIONS(-DHAVE_ARMV8_CRC -DHAVE_ARMV8_CRC_CRYPTO_INTRINSICS) + ADD_DEFINITIONS(-DHAVE_ARMV8_CRC -DHAVE_ARMV8_CRC_CRYPTO_INTRINSICS -DHAVE_ARMV8_CRYPTO) IF(CLANG_CL) SET_SOURCE_FILES_PROPERTIES( crc32/crc32_arm64.c PROPERTIES - COMPILE_FLAGS "-march=armv8-a+crc" + COMPILE_FLAGS "-march=armv8-a+crc+crypto" ) ENDIF() ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686") diff --git a/mysys/crc32/crc32_arm64.c b/mysys/crc32/crc32_arm64.c index 1f2f023e6ca..7c25d383173 100644 --- a/mysys/crc32/crc32_arm64.c +++ b/mysys/crc32/crc32_arm64.c @@ -29,7 +29,9 @@ my_crc32_t crc32c_aarch64_available(void) { if (crc32_aarch64_available() == 0) return NULL; - /* TODO : pmull seems supported, but does not compile*/ + + if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) + return crc32c_aarch64_pmull; return crc32c_aarch64; } @@ -181,11 +183,19 @@ asm(".arch_extension crypto"); CRC32C3X8(buffer, ((ITR) * 7 + 6)) \ } while(0) +#if defined _MSC_VER && !defined __clang__ +#define PREF4X64L1(buffer, offset, itr)\ + __prefetch(buffer + (offset) + ((itr) + 0)*64);\ + __prefetch(buffer + (offset) + ((itr) + 1)*64);\ + __prefetch(buffer + (offset) + ((itr) + 2)*64);\ + __prefetch(buffer + (offset) + ((itr) + 3)*64); +#else #define PREF4X64L1(buffer, PREF_OFFSET, ITR) \ __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\ __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\ __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\ __asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64)); +#endif #define PREF1KL1(buffer, PREF_OFFSET) \ PREF4X64L1(buffer,(PREF_OFFSET), 0) \ @@ -193,11 +203,20 @@ asm(".arch_extension crypto"); PREF4X64L1(buffer,(PREF_OFFSET), 8) \ PREF4X64L1(buffer,(PREF_OFFSET), 12) +#if defined _MSC_VER && !defined __clang__ +#define MY_PLDL2KEEP 2 /* PLDL2KEEP is 2 in ARMv8 */ +#define PREF4X64L2(buffer,offset,itr)\ + __prefetch2(buffer + offset + ((itr) + 0) * 64, MY_PLDL2KEEP);\ + __prefetch2(buffer + offset + ((itr) + 1) * 64, MY_PLDL2KEEP);\ + __prefetch2(buffer + offset + ((itr) + 2) * 64, MY_PLDL2KEEP);\ + __prefetch2(buffer + offset + ((itr) + 3) * 64, MY_PLDL2KEEP); +#else #define PREF4X64L2(buffer, PREF_OFFSET, ITR) \ __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\ __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\ __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\ __asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64)); +#endif #define PREF1KL2(buffer, PREF_OFFSET) \ PREF4X64L2(buffer,(PREF_OFFSET), 0) \ @@ -240,6 +259,16 @@ static unsigned crc32c_aarch64(unsigned crc, const void *buf, size_t len) #endif #ifdef HAVE_ARMV8_CRYPTO + +static inline uint64_t poly_mul(uint64_t a, uint64_t b) +{ +#if defined _MSC_VER && !defined __clang__ + return vgetq_lane_u64(vreinterpretq_u64_p128(neon_pmull_64(vcreate_p64(a), vcreate_p64(b))),0); +#else + return (uint64_t) vmull_p64(a, b); +#endif +} + static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len) { int64_t length= (int64_t)len; @@ -286,8 +315,8 @@ static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len) * crc1 multiply by K2 * crc0 multiply by K1 */ - t1= (uint64_t)vmull_p64(crc1, k2); - t0= (uint64_t)vmull_p64(crc0, k1); + t1= poly_mul(crc1, k2); + t0= poly_mul(crc0, k1); crc= __crc32cd(crc2, *(const uint64_t *)buffer); crc1= __crc32cd(0, t1); crc^= crc1;