summaryrefslogtreecommitdiff
path: root/cmake
diff options
context:
space:
mode:
authorDimitri Staessens <dimitri@ouroboros.rocks>2026-04-29 22:23:09 +0200
committerSander Vrijders <sander@ouroboros.rocks>2026-05-06 09:05:08 +0200
commit146ba945e23c2266d5e31035135b5d158d1256e8 (patch)
tree02b93c9cfb5a1528a8efd79255c4940eb09c8655 /cmake
parent4cfc607ebbff840991d893a8c0fa3a004caeb416 (diff)
downloadouroboros-146ba945e23c2266d5e31035135b5d158d1256e8.tar.gz
ouroboros-146ba945e23c2266d5e31035135b5d158d1256e8.zip
lib: Fix and clean up crc implementation
Fixes detection of PMULL on aarch64 without crypto extensions. Adds a crc64_nvme_step helper function in CRC64 to avoid code duplication and cleans up the comments. Signed-off-by: Dimitri Staessens <dimitri@ouroboros.rocks> Signed-off-by: Sander Vrijders <sander@ouroboros.rocks>
Diffstat (limited to 'cmake')
-rw-r--r--cmake/utils/CPUUtils.cmake64
1 files changed, 46 insertions, 18 deletions
diff --git a/cmake/utils/CPUUtils.cmake b/cmake/utils/CPUUtils.cmake
index e158792a..8ca7683a 100644
--- a/cmake/utils/CPUUtils.cmake
+++ b/cmake/utils/CPUUtils.cmake
@@ -1,32 +1,51 @@
-include(CheckCSourceCompiles)
+include(CheckCSourceRuns)
-# Detect a CPU feature by attempting to compile a small program with
-# the matching compiler flag.
+# Compile + run a probe so we only enable a feature the host CPU
+# actually implements (toolchains accept flags the silicon may lack).
+# Cross-compile without an emulator: feature off.
function(detect_cpu_feature _result_var _flags _source)
set(_save_flags "${CMAKE_REQUIRED_FLAGS}")
set(_save_quiet "${CMAKE_REQUIRED_QUIET}")
set(CMAKE_REQUIRED_FLAGS "${_save_flags} ${_flags}")
set(CMAKE_REQUIRED_QUIET TRUE)
- check_c_source_compiles("${_source}" ${_result_var})
+ if(CMAKE_CROSSCOMPILING AND NOT CMAKE_CROSSCOMPILING_EMULATOR)
+ set(${_result_var} FALSE CACHE INTERNAL
+ "${_result_var} (cross-compile without emulator: off)")
+ else()
+ check_c_source_runs("${_source}" ${_result_var})
+ endif()
set(CMAKE_REQUIRED_FLAGS "${_save_flags}")
set(CMAKE_REQUIRED_QUIET "${_save_quiet}")
endfunction()
-# x86 PCLMULQDQ (carry-less multiply) + SSE4.1. Sets HAVE_PCLMUL only
-# when both intrinsic groups compile.
+# x86 PCLMULQDQ + SSE4.1. argc-derived input defeats constant folding;
+# SIGILL handler exits cleanly so the kernel skips the core dump.
function(detect_pclmul)
detect_cpu_feature(_HAVE_PCLMUL "-mpclmul"
"#include <wmmintrin.h>
-int main(void) {
- __m128i a = _mm_setzero_si128();
- __m128i b = _mm_clmulepi64_si128(a, a, 0);
- return (int) _mm_cvtsi128_si32(b);
+#include <signal.h>
+#include <unistd.h>
+static void on_sigill(int sig) { (void) sig; _exit(1); }
+int main(int argc, char ** argv) {
+ __m128i a;
+ __m128i b;
+ (void) argv;
+ signal(SIGILL, on_sigill);
+ a = _mm_set1_epi32(argc);
+ b = _mm_clmulepi64_si128(a, a, 0);
+ return _mm_cvtsi128_si32(b) & 0;
}")
detect_cpu_feature(_HAVE_SSE41 "-msse4.1"
"#include <smmintrin.h>
-int main(void) {
- __m128i a = _mm_setzero_si128();
- return _mm_extract_epi32(a, 0);
+#include <signal.h>
+#include <unistd.h>
+static void on_sigill(int sig) { (void) sig; _exit(1); }
+int main(int argc, char ** argv) {
+ __m128i a;
+ (void) argv;
+ signal(SIGILL, on_sigill);
+ a = _mm_set1_epi32(argc);
+ return _mm_extract_epi32(a, 0) & 0;
}")
if(_HAVE_PCLMUL AND _HAVE_SSE41)
set(HAVE_PCLMUL TRUE CACHE INTERNAL
@@ -36,14 +55,23 @@ int main(void) {
endif()
endfunction()
-# aarch64 FEAT_PMULL (vmull_p64 / vmull_high_p64) carry-less multiply.
+# aarch64 FEAT_PMULL (vmull_p64). Pi 4's BCM2711 accepts +crypto at
+# compile time but lacks the hardware — the runtime probe catches that.
function(detect_pmull)
detect_cpu_feature(_HAVE_PMULL "-march=armv8-a+crypto"
"#include <arm_neon.h>
-int main(void) {
- poly64_t a = (poly64_t) 0;
- poly128_t c = vmull_p64(a, a);
- return (int) vgetq_lane_u64((uint64x2_t) c, 0);
+#include <signal.h>
+#include <stdint.h>
+#include <unistd.h>
+static void on_sigill(int sig) { (void) sig; _exit(1); }
+int main(int argc, char ** argv) {
+ poly64_t a;
+ poly128_t c;
+ (void) argv;
+ signal(SIGILL, on_sigill);
+ a = (poly64_t) (uint64_t) argc;
+ c = vmull_p64(a, a);
+ return (int) (vgetq_lane_u64((uint64x2_t) c, 0) & 0);
}")
if(_HAVE_PMULL)
set(HAVE_PMULL TRUE CACHE INTERNAL