MDEV-25870 followup : pmull support on Windows ARM64
casting vmull_p64 is possible on MSVC, although with much more verbose code. The reason are missing neon types (no compiler support for 128bit ints).
This commit is contained in:
parent
fe10645eb7
commit
8c6cbb3360
@ -68,12 +68,12 @@ IF(MSVC_INTEL)
|
||||
ENDIF()
|
||||
ELSEIF(MSVC_ARM64)
|
||||
SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_arm64.c)
|
||||
ADD_DEFINITIONS(-DHAVE_ARMV8_CRC -DHAVE_ARMV8_CRC_CRYPTO_INTRINSICS)
|
||||
ADD_DEFINITIONS(-DHAVE_ARMV8_CRC -DHAVE_ARMV8_CRC_CRYPTO_INTRINSICS -DHAVE_ARMV8_CRYPTO)
|
||||
IF(CLANG_CL)
|
||||
SET_SOURCE_FILES_PROPERTIES(
|
||||
crc32/crc32_arm64.c
|
||||
PROPERTIES
|
||||
COMPILE_FLAGS "-march=armv8-a+crc"
|
||||
COMPILE_FLAGS "-march=armv8-a+crc+crypto"
|
||||
)
|
||||
ENDIF()
|
||||
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686")
|
||||
|
@ -29,7 +29,9 @@ my_crc32_t crc32c_aarch64_available(void)
|
||||
{
|
||||
if (crc32_aarch64_available() == 0)
|
||||
return NULL;
|
||||
/* TODO : pmull seems supported, but does not compile*/
|
||||
|
||||
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE))
|
||||
return crc32c_aarch64_pmull;
|
||||
return crc32c_aarch64;
|
||||
}
|
||||
|
||||
@ -181,11 +183,19 @@ asm(".arch_extension crypto");
|
||||
CRC32C3X8(buffer, ((ITR) * 7 + 6)) \
|
||||
} while(0)
|
||||
|
||||
#if defined _MSC_VER && !defined __clang__
|
||||
#define PREF4X64L1(buffer, offset, itr)\
|
||||
__prefetch(buffer + (offset) + ((itr) + 0)*64);\
|
||||
__prefetch(buffer + (offset) + ((itr) + 1)*64);\
|
||||
__prefetch(buffer + (offset) + ((itr) + 2)*64);\
|
||||
__prefetch(buffer + (offset) + ((itr) + 3)*64);
|
||||
#else
|
||||
#define PREF4X64L1(buffer, PREF_OFFSET, ITR) \
|
||||
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
|
||||
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
|
||||
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
|
||||
__asm__("PRFM PLDL1KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
|
||||
#endif
|
||||
|
||||
#define PREF1KL1(buffer, PREF_OFFSET) \
|
||||
PREF4X64L1(buffer,(PREF_OFFSET), 0) \
|
||||
@ -193,11 +203,20 @@ asm(".arch_extension crypto");
|
||||
PREF4X64L1(buffer,(PREF_OFFSET), 8) \
|
||||
PREF4X64L1(buffer,(PREF_OFFSET), 12)
|
||||
|
||||
#if defined _MSC_VER && !defined __clang__
|
||||
#define MY_PLDL2KEEP 2 /* PLDL2KEEP is 2 in ARMv8 */
|
||||
#define PREF4X64L2(buffer,offset,itr)\
|
||||
__prefetch2(buffer + offset + ((itr) + 0) * 64, MY_PLDL2KEEP);\
|
||||
__prefetch2(buffer + offset + ((itr) + 1) * 64, MY_PLDL2KEEP);\
|
||||
__prefetch2(buffer + offset + ((itr) + 2) * 64, MY_PLDL2KEEP);\
|
||||
__prefetch2(buffer + offset + ((itr) + 3) * 64, MY_PLDL2KEEP);
|
||||
#else
|
||||
#define PREF4X64L2(buffer, PREF_OFFSET, ITR) \
|
||||
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 0)*64));\
|
||||
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 1)*64));\
|
||||
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 2)*64));\
|
||||
__asm__("PRFM PLDL2KEEP, [%x[v],%[c]]"::[v]"r"(buffer), [c]"I"((PREF_OFFSET) + ((ITR) + 3)*64));
|
||||
#endif
|
||||
|
||||
#define PREF1KL2(buffer, PREF_OFFSET) \
|
||||
PREF4X64L2(buffer,(PREF_OFFSET), 0) \
|
||||
@ -240,6 +259,16 @@ static unsigned crc32c_aarch64(unsigned crc, const void *buf, size_t len)
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_ARMV8_CRYPTO
|
||||
|
||||
static inline uint64_t poly_mul(uint64_t a, uint64_t b)
|
||||
{
|
||||
#if defined _MSC_VER && !defined __clang__
|
||||
return vgetq_lane_u64(vreinterpretq_u64_p128(neon_pmull_64(vcreate_p64(a), vcreate_p64(b))),0);
|
||||
#else
|
||||
return (uint64_t) vmull_p64(a, b);
|
||||
#endif
|
||||
}
|
||||
|
||||
static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len)
|
||||
{
|
||||
int64_t length= (int64_t)len;
|
||||
@ -286,8 +315,8 @@ static unsigned crc32c_aarch64_pmull(unsigned crc, const void *buf, size_t len)
|
||||
* crc1 multiply by K2
|
||||
* crc0 multiply by K1
|
||||
*/
|
||||
t1= (uint64_t)vmull_p64(crc1, k2);
|
||||
t0= (uint64_t)vmull_p64(crc0, k1);
|
||||
t1= poly_mul(crc1, k2);
|
||||
t0= poly_mul(crc0, k1);
|
||||
crc= __crc32cd(crc2, *(const uint64_t *)buffer);
|
||||
crc1= __crc32cd(0, t1);
|
||||
crc^= crc1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user