From 146ba945e23c2266d5e31035135b5d158d1256e8 Mon Sep 17 00:00:00 2001 From: Dimitri Staessens Date: Wed, 29 Apr 2026 22:23:09 +0200 Subject: lib: Fix and clean up crc implementation Fixes detection of PMULL on aarch64 without crypto extensions. Adds a crc64_nvme_step helper function in CRC64 to avoid code duplication and cleans up the comments. Signed-off-by: Dimitri Staessens Signed-off-by: Sander Vrijders --- cmake/utils/CPUUtils.cmake | 64 +++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 18 deletions(-) (limited to 'cmake/utils/CPUUtils.cmake') diff --git a/cmake/utils/CPUUtils.cmake b/cmake/utils/CPUUtils.cmake index e158792a..8ca7683a 100644 --- a/cmake/utils/CPUUtils.cmake +++ b/cmake/utils/CPUUtils.cmake @@ -1,32 +1,51 @@ -include(CheckCSourceCompiles) +include(CheckCSourceRuns) -# Detect a CPU feature by attempting to compile a small program with -# the matching compiler flag. +# Compile + run a probe so we only enable a feature the host CPU +# actually implements (toolchains accept flags the silicon may lack). +# Cross-compile without an emulator: feature off. function(detect_cpu_feature _result_var _flags _source) set(_save_flags "${CMAKE_REQUIRED_FLAGS}") set(_save_quiet "${CMAKE_REQUIRED_QUIET}") set(CMAKE_REQUIRED_FLAGS "${_save_flags} ${_flags}") set(CMAKE_REQUIRED_QUIET TRUE) - check_c_source_compiles("${_source}" ${_result_var}) + if(CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR) + set(${_result_var} FALSE CACHE INTERNAL + "${_result_var} (cross-compile without emulator: off)") + else() + check_c_source_runs("${_source}" ${_result_var}) + endif() set(CMAKE_REQUIRED_FLAGS "${_save_flags}") set(CMAKE_REQUIRED_QUIET "${_save_quiet}") endfunction() -# x86 PCLMULQDQ (carry-less multiply) + SSE4.1. Sets HAVE_PCLMUL only -# when both intrinsic groups compile. +# x86 PCLMULQDQ + SSE4.1. argc-derived input defeats constant folding; +# SIGILL handler exits cleanly so the kernel skips the core dump. function(detect_pclmul) detect_cpu_feature(_HAVE_PCLMUL "-mpclmul" "#include -int main(void) { - __m128i a = _mm_setzero_si128(); - __m128i b = _mm_clmulepi64_si128(a, a, 0); - return (int) _mm_cvtsi128_si32(b); +#include +#include +static void on_sigill(int sig) { (void) sig; _exit(1); } +int main(int argc, char ** argv) { + __m128i a; + __m128i b; + (void) argv; + signal(SIGILL, on_sigill); + a = _mm_set1_epi32(argc); + b = _mm_clmulepi64_si128(a, a, 0); + return _mm_cvtsi128_si32(b) & 0; }") detect_cpu_feature(_HAVE_SSE41 "-msse4.1" "#include -int main(void) { - __m128i a = _mm_setzero_si128(); - return _mm_extract_epi32(a, 0); +#include +#include +static void on_sigill(int sig) { (void) sig; _exit(1); } +int main(int argc, char ** argv) { + __m128i a; + (void) argv; + signal(SIGILL, on_sigill); + a = _mm_set1_epi32(argc); + return _mm_extract_epi32(a, 0) & 0; }") if(_HAVE_PCLMUL AND _HAVE_SSE41) set(HAVE_PCLMUL TRUE CACHE INTERNAL @@ -36,14 +55,23 @@ int main(void) { endif() endfunction() -# aarch64 FEAT_PMULL (vmull_p64 / vmull_high_p64) carry-less multiply. +# aarch64 FEAT_PMULL (vmull_p64). Pi 4's BCM2711 accepts +crypto at +# compile time but lacks the hardware — the runtime probe catches that. function(detect_pmull) detect_cpu_feature(_HAVE_PMULL "-march=armv8-a+crypto" "#include -int main(void) { - poly64_t a = (poly64_t) 0; - poly128_t c = vmull_p64(a, a); - return (int) vgetq_lane_u64((uint64x2_t) c, 0); +#include +#include +#include +static void on_sigill(int sig) { (void) sig; _exit(1); } +int main(int argc, char ** argv) { + poly64_t a; + poly128_t c; + (void) argv; + signal(SIGILL, on_sigill); + a = (poly64_t) (uint64_t) argc; + c = vmull_p64(a, a); + return (int) (vgetq_lane_u64((uint64x2_t) c, 0) & 0); }") if(_HAVE_PMULL) set(HAVE_PMULL TRUE CACHE INTERNAL -- cgit v1.2.3