diff options
| author | Dimitri Staessens <dimitri@ouroboros.rocks> | 2026-04-29 22:23:09 +0200 |
|---|---|---|
| committer | Sander Vrijders <sander@ouroboros.rocks> | 2026-05-06 09:05:08 +0200 |
| commit | 146ba945e23c2266d5e31035135b5d158d1256e8 (patch) | |
| tree | 02b93c9cfb5a1528a8efd79255c4940eb09c8655 /src/lib | |
| parent | 4cfc607ebbff840991d893a8c0fa3a004caeb416 (diff) | |
| download | ouroboros-146ba945e23c2266d5e31035135b5d158d1256e8.tar.gz ouroboros-146ba945e23c2266d5e31035135b5d158d1256e8.zip | |
lib: Fix and clean up crc implementation
Fixes detection of PMULL on aarch64 without crypto extensions. Adds a
crc64_nvme_step helper function in CRC64 to avoid code
duplication and cleans up the comments.
Signed-off-by: Dimitri Staessens <dimitri@ouroboros.rocks>
Signed-off-by: Sander Vrijders <sander@ouroboros.rocks>
Diffstat (limited to 'src/lib')
| -rw-r--r-- | src/lib/crc/crc16.c | 5 | ||||
| -rw-r--r-- | src/lib/crc/crc64.c | 76 | ||||
| -rw-r--r-- | src/lib/crc/crc8.c | 6 |
3 files changed, 38 insertions, 49 deletions
diff --git a/src/lib/crc/crc16.c b/src/lib/crc/crc16.c index 55af8647..9dc59429 100644 --- a/src/lib/crc/crc16.c +++ b/src/lib/crc/crc16.c @@ -34,10 +34,7 @@ #include <ouroboros/crc16.h> -/* Bit-by-bit MSB-first CRC. Header-check use case rarely exceeds a - * few hundred bytes; a 256-entry uint16_t table costs 512 B of - * .rodata and is easy to add later if profiling demands it. - */ +/* Bit-by-bit MSB-first CRC. */ void crc16_ccitt_false(uint16_t * crc, const void * buf, size_t len) diff --git a/src/lib/crc/crc64.c b/src/lib/crc/crc64.c index 3ee5e798..1b6fb5f6 100644 --- a/src/lib/crc/crc64.c +++ b/src/lib/crc/crc64.c @@ -34,7 +34,8 @@ #include <ouroboros/crc64.h> -/* Reflected CRC-64/NVMe table. Polynomial in reflected form: +/* + * Reflected CRC-64/NVMe table. Polynomial in reflected form: * 0x9a6c9329ac4bc9b5 (bitrev of 0xad93d23594c93659). */ static const uint64_t crc64_nvme_tab[256] = { @@ -168,20 +169,28 @@ static const uint64_t crc64_nvme_tab[256] = { 0x55b4a08fdfd90e51ULL, 0x2ada5047efec8728ULL }; +static __inline__ uint64_t crc64_nvme_step(uint64_t c, + const uint8_t * p, + size_t len) +{ + size_t n; + + for (n = 0; n < len; n++) + c = crc64_nvme_tab[(c ^ p[n]) & 0xff] ^ (c >> 8); + + return c; +} + void crc64_nvme_table(uint64_t * crc, const void * buf, size_t len) { - size_t n; + uint64_t c; - *crc = *crc ^ 0xffffffffffffffffULL; + c = crc64_nvme_step(*crc ^ UINT64_MAX, + (const uint8_t *) buf, len); - for (n = 0; n < len; n++) - *crc = crc64_nvme_tab[(*crc ^ ((uint8_t *) buf)[n]) - & 0xff] - ^ (*crc >> 8); - - *crc = *crc ^ 0xffffffffffffffffULL; + *crc = c ^ UINT64_MAX; } #ifdef HAVE_PCLMUL @@ -189,7 +198,8 @@ void crc64_nvme_table(uint64_t * crc, #include <smmintrin.h> #include <wmmintrin.h> -/* Fold-by-16 constants for reflected CRC-64/NVMe. Properties of the +/* + * Fold-by-16 constants for reflected CRC-64/NVMe. Properties of the * polynomial; identical between the PCLMUL and PMULL backends. * k3 = bitrev64(x^(128+64) mod P) << 1 * k4 = bitrev64(x^(128+0) mod P) << 1 @@ -209,9 +219,10 @@ static __m128i fold16(__m128i x, return _mm_xor_si128(lo, hi); } -/* Fold-by-16 over 16-byte chunks; the 128-bit folded state is then +/* + * Fold-by-16 over 16-byte chunks; the 128-bit folded state is then * emitted as 16 little-endian bytes and run through the byte-table - * loop together with any tail (<=15 bytes). The 16-byte minimum on + * loop together with any tail (<=15 bytes). The 16-byte minimum on * the bulk loop is why the short-input path uses the table directly. */ __attribute__((target("pclmul,sse4.1"))) @@ -223,7 +234,6 @@ static void crc64_nvme_clmul(uint64_t * crc, uint64_t seed; uint64_t c; size_t off; - size_t n; __m128i x; __m128i k; uint8_t post[16]; @@ -232,17 +242,14 @@ static void crc64_nvme_clmul(uint64_t * crc, seed = *crc; if (len < 16) { - c = seed ^ 0xffffffffffffffffULL; - for (n = 0; n < len; n++) - c = crc64_nvme_tab[(c ^ p[n]) & 0xff] - ^ (c >> 8); - *crc = c ^ 0xffffffffffffffffULL; + c = crc64_nvme_step(seed ^ UINT64_MAX, p, len); + *crc = c ^ UINT64_MAX; return; } x = _mm_loadu_si128((const __m128i *) p); x = _mm_xor_si128(x, _mm_cvtsi64_si128((int64_t) - (seed ^ 0xffffffffffffffffULL))); + (seed ^ UINT64_MAX))); k = _mm_set_epi64x((int64_t) k4_clmul, (int64_t) k3_clmul); @@ -257,14 +264,10 @@ static void crc64_nvme_clmul(uint64_t * crc, _mm_storeu_si128((__m128i *) post, x); - c = 0; - for (n = 0; n < 16; n++) - c = crc64_nvme_tab[(c ^ post[n]) & 0xff] ^ (c >> 8); + c = crc64_nvme_step(0, post, 16); + c = crc64_nvme_step(c, p + off, len - off); - for (n = off; n < len; n++) - c = crc64_nvme_tab[(c ^ p[n]) & 0xff] ^ (c >> 8); - - *crc = c ^ 0xffffffffffffffffULL; + *crc = c ^ UINT64_MAX; } #endif /* HAVE_PCLMUL */ @@ -304,7 +307,6 @@ static void crc64_nvme_pmull(uint64_t * crc, uint64_t seed; uint64_t c; size_t off; - size_t n; uint64x2_t x; uint64x2_t k; uint64_t seed_lane[2]; @@ -315,16 +317,13 @@ static void crc64_nvme_pmull(uint64_t * crc, seed = *crc; if (len < 16) { - c = seed ^ 0xffffffffffffffffULL; - for (n = 0; n < len; n++) - c = crc64_nvme_tab[(c ^ p[n]) & 0xff] - ^ (c >> 8); - *crc = c ^ 0xffffffffffffffffULL; + c = crc64_nvme_step(seed ^ UINT64_MAX, p, len); + *crc = c ^ UINT64_MAX; return; } x = vld1q_u64((const uint64_t *) p); - seed_lane[0] = seed ^ 0xffffffffffffffffULL; + seed_lane[0] = seed ^ UINT64_MAX; seed_lane[1] = 0; x = veorq_u64(x, vld1q_u64(seed_lane)); @@ -343,16 +342,11 @@ static void crc64_nvme_pmull(uint64_t * crc, vst1q_u8(post, vreinterpretq_u8_u64(x)); - c = 0; - for (n = 0; n < 16; n++) - c = crc64_nvme_tab[(c ^ post[n]) & 0xff] ^ (c >> 8); - - for (n = off; n < len; n++) - c = crc64_nvme_tab[(c ^ p[n]) & 0xff] ^ (c >> 8); + c = crc64_nvme_step(0, post, 16); + c = crc64_nvme_step(c, p + off, len - off); - *crc = c ^ 0xffffffffffffffffULL; + *crc = c ^ UINT64_MAX; } - #endif /* HAVE_PMULL */ void crc64_nvme(uint64_t * crc, diff --git a/src/lib/crc/crc8.c b/src/lib/crc/crc8.c index e8b9685a..20976b29 100644 --- a/src/lib/crc/crc8.c +++ b/src/lib/crc/crc8.c @@ -34,10 +34,8 @@ #include <ouroboros/crc8.h> -/* Bit-by-bit MSB-first CRC. The expected use case is header check - * sequences of a handful of bytes; a 256-byte lookup table would not - * pay for the extra .rodata footprint at typical input sizes. - */ + + /* Bit-by-bit MSB-first CRC. */ void crc8_autosar(uint8_t * crc, const void * buf, size_t len) |
