summaryrefslogtreecommitdiff
path: root/src/lib/crc
diff options
context:
space:
mode:
authorDimitri Staessens <dimitri@ouroboros.rocks>2026-04-29 22:23:09 +0200
committerSander Vrijders <sander@ouroboros.rocks>2026-05-06 09:05:08 +0200
commit146ba945e23c2266d5e31035135b5d158d1256e8 (patch)
tree02b93c9cfb5a1528a8efd79255c4940eb09c8655 /src/lib/crc
parent4cfc607ebbff840991d893a8c0fa3a004caeb416 (diff)
downloadouroboros-146ba945e23c2266d5e31035135b5d158d1256e8.tar.gz
ouroboros-146ba945e23c2266d5e31035135b5d158d1256e8.zip
lib: Fix and clean up crc implementation
Fixes detection of PMULL on aarch64 without crypto extensions. Adds a crc64_nvme_step helper function in CRC64 to avoid code duplication and cleans up the comments. Signed-off-by: Dimitri Staessens <dimitri@ouroboros.rocks> Signed-off-by: Sander Vrijders <sander@ouroboros.rocks>
Diffstat (limited to 'src/lib/crc')
-rw-r--r--src/lib/crc/crc16.c5
-rw-r--r--src/lib/crc/crc64.c76
-rw-r--r--src/lib/crc/crc8.c6
3 files changed, 38 insertions, 49 deletions
diff --git a/src/lib/crc/crc16.c b/src/lib/crc/crc16.c
index 55af8647..9dc59429 100644
--- a/src/lib/crc/crc16.c
+++ b/src/lib/crc/crc16.c
@@ -34,10 +34,7 @@
#include <ouroboros/crc16.h>
-/* Bit-by-bit MSB-first CRC. Header-check use case rarely exceeds a
- * few hundred bytes; a 256-entry uint16_t table costs 512 B of
- * .rodata and is easy to add later if profiling demands it.
- */
+/* Bit-by-bit MSB-first CRC. */
void crc16_ccitt_false(uint16_t * crc,
const void * buf,
size_t len)
diff --git a/src/lib/crc/crc64.c b/src/lib/crc/crc64.c
index 3ee5e798..1b6fb5f6 100644
--- a/src/lib/crc/crc64.c
+++ b/src/lib/crc/crc64.c
@@ -34,7 +34,8 @@
#include <ouroboros/crc64.h>
-/* Reflected CRC-64/NVMe table. Polynomial in reflected form:
+/*
+ * Reflected CRC-64/NVMe table. Polynomial in reflected form:
* 0x9a6c9329ac4bc9b5 (bitrev of 0xad93d23594c93659).
*/
static const uint64_t crc64_nvme_tab[256] = {
@@ -168,20 +169,28 @@ static const uint64_t crc64_nvme_tab[256] = {
0x55b4a08fdfd90e51ULL, 0x2ada5047efec8728ULL
};
+static __inline__ uint64_t crc64_nvme_step(uint64_t c,
+ const uint8_t * p,
+ size_t len)
+{
+ size_t n;
+
+ for (n = 0; n < len; n++)
+ c = crc64_nvme_tab[(c ^ p[n]) & 0xff] ^ (c >> 8);
+
+ return c;
+}
+
void crc64_nvme_table(uint64_t * crc,
const void * buf,
size_t len)
{
- size_t n;
+ uint64_t c;
- *crc = *crc ^ 0xffffffffffffffffULL;
+ c = crc64_nvme_step(*crc ^ UINT64_MAX,
+ (const uint8_t *) buf, len);
- for (n = 0; n < len; n++)
- *crc = crc64_nvme_tab[(*crc ^ ((uint8_t *) buf)[n])
- & 0xff]
- ^ (*crc >> 8);
-
- *crc = *crc ^ 0xffffffffffffffffULL;
+ *crc = c ^ UINT64_MAX;
}
#ifdef HAVE_PCLMUL
@@ -189,7 +198,8 @@ void crc64_nvme_table(uint64_t * crc,
#include <smmintrin.h>
#include <wmmintrin.h>
-/* Fold-by-16 constants for reflected CRC-64/NVMe. Properties of the
+/*
+ * Fold-by-16 constants for reflected CRC-64/NVMe. Properties of the
* polynomial; identical between the PCLMUL and PMULL backends.
* k3 = bitrev64(x^(128+64) mod P) << 1
* k4 = bitrev64(x^(128+0) mod P) << 1
@@ -209,9 +219,10 @@ static __m128i fold16(__m128i x,
return _mm_xor_si128(lo, hi);
}
-/* Fold-by-16 over 16-byte chunks; the 128-bit folded state is then
+/*
+ * Fold-by-16 over 16-byte chunks; the 128-bit folded state is then
* emitted as 16 little-endian bytes and run through the byte-table
- * loop together with any tail (<=15 bytes). The 16-byte minimum on
+ * loop together with any tail (<=15 bytes). The 16-byte minimum on
* the bulk loop is why the short-input path uses the table directly.
*/
__attribute__((target("pclmul,sse4.1")))
@@ -223,7 +234,6 @@ static void crc64_nvme_clmul(uint64_t * crc,
uint64_t seed;
uint64_t c;
size_t off;
- size_t n;
__m128i x;
__m128i k;
uint8_t post[16];
@@ -232,17 +242,14 @@ static void crc64_nvme_clmul(uint64_t * crc,
seed = *crc;
if (len < 16) {
- c = seed ^ 0xffffffffffffffffULL;
- for (n = 0; n < len; n++)
- c = crc64_nvme_tab[(c ^ p[n]) & 0xff]
- ^ (c >> 8);
- *crc = c ^ 0xffffffffffffffffULL;
+ c = crc64_nvme_step(seed ^ UINT64_MAX, p, len);
+ *crc = c ^ UINT64_MAX;
return;
}
x = _mm_loadu_si128((const __m128i *) p);
x = _mm_xor_si128(x, _mm_cvtsi64_si128((int64_t)
- (seed ^ 0xffffffffffffffffULL)));
+ (seed ^ UINT64_MAX)));
k = _mm_set_epi64x((int64_t) k4_clmul, (int64_t) k3_clmul);
@@ -257,14 +264,10 @@ static void crc64_nvme_clmul(uint64_t * crc,
_mm_storeu_si128((__m128i *) post, x);
- c = 0;
- for (n = 0; n < 16; n++)
- c = crc64_nvme_tab[(c ^ post[n]) & 0xff] ^ (c >> 8);
+ c = crc64_nvme_step(0, post, 16);
+ c = crc64_nvme_step(c, p + off, len - off);
- for (n = off; n < len; n++)
- c = crc64_nvme_tab[(c ^ p[n]) & 0xff] ^ (c >> 8);
-
- *crc = c ^ 0xffffffffffffffffULL;
+ *crc = c ^ UINT64_MAX;
}
#endif /* HAVE_PCLMUL */
@@ -304,7 +307,6 @@ static void crc64_nvme_pmull(uint64_t * crc,
uint64_t seed;
uint64_t c;
size_t off;
- size_t n;
uint64x2_t x;
uint64x2_t k;
uint64_t seed_lane[2];
@@ -315,16 +317,13 @@ static void crc64_nvme_pmull(uint64_t * crc,
seed = *crc;
if (len < 16) {
- c = seed ^ 0xffffffffffffffffULL;
- for (n = 0; n < len; n++)
- c = crc64_nvme_tab[(c ^ p[n]) & 0xff]
- ^ (c >> 8);
- *crc = c ^ 0xffffffffffffffffULL;
+ c = crc64_nvme_step(seed ^ UINT64_MAX, p, len);
+ *crc = c ^ UINT64_MAX;
return;
}
x = vld1q_u64((const uint64_t *) p);
- seed_lane[0] = seed ^ 0xffffffffffffffffULL;
+ seed_lane[0] = seed ^ UINT64_MAX;
seed_lane[1] = 0;
x = veorq_u64(x, vld1q_u64(seed_lane));
@@ -343,16 +342,11 @@ static void crc64_nvme_pmull(uint64_t * crc,
vst1q_u8(post, vreinterpretq_u8_u64(x));
- c = 0;
- for (n = 0; n < 16; n++)
- c = crc64_nvme_tab[(c ^ post[n]) & 0xff] ^ (c >> 8);
-
- for (n = off; n < len; n++)
- c = crc64_nvme_tab[(c ^ p[n]) & 0xff] ^ (c >> 8);
+ c = crc64_nvme_step(0, post, 16);
+ c = crc64_nvme_step(c, p + off, len - off);
- *crc = c ^ 0xffffffffffffffffULL;
+ *crc = c ^ UINT64_MAX;
}
-
#endif /* HAVE_PMULL */
void crc64_nvme(uint64_t * crc,
diff --git a/src/lib/crc/crc8.c b/src/lib/crc/crc8.c
index e8b9685a..20976b29 100644
--- a/src/lib/crc/crc8.c
+++ b/src/lib/crc/crc8.c
@@ -34,10 +34,8 @@
#include <ouroboros/crc8.h>
-/* Bit-by-bit MSB-first CRC. The expected use case is header check
- * sequences of a handful of bytes; a 256-byte lookup table would not
- * pay for the extra .rodata footprint at typical input sizes.
- */
+
+ /* Bit-by-bit MSB-first CRC. */
void crc8_autosar(uint8_t * crc,
const void * buf,
size_t len)