From bdfea870024b8efd0a0bd16f8978482d8bbf9ed9 Mon Sep 17 00:00:00 2001 From: Dimitri Staessens Date: Mon, 1 Jun 2026 08:41:34 +0200 Subject: lib: Reclaim aged leaked ssm pool blocks The reclaim_pid_from_sc() function reaped any block allocated with refcount from a dead PID, but cross-process hand-offs can leave a block briefly allocated by the producer while a live consumer still holds it. This skips reclaim within SSM_POOL_RECLAIM_AGE_S (default 60s) so in-flight hand-offs survive a producer crash. Signed-off-by: Dimitri Staessens Signed-off-by: Sander Vrijders --- cmake/config/ssm.cmake | 2 ++ src/lib/ssm/pool.c | 40 ++++++++++++++++++++++++++++++++-------- src/lib/ssm/ssm.h.in | 2 ++ 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/cmake/config/ssm.cmake b/cmake/config/ssm.cmake index 26604f70..589171ea 100644 --- a/cmake/config/ssm.cmake +++ b/cmake/config/ssm.cmake @@ -29,6 +29,8 @@ set(SSM_FLOW_SET_PREFIX "/${SHM_PREFIX}.set." CACHE INTERNAL # Number of shards per size class for reducing contention set(SSM_POOL_SHARDS 4 CACHE STRING "Number of allocator shards per size class") +set(SSM_POOL_RECLAIM_AGE_S 60 CACHE STRING + "Minimum age in seconds before a block is presumed stale and reclaimed") # Global Shared Packet Pool (GSPP) - for privileged processes # Shared by all processes in 'ouroboros' group (~60 MB total) diff --git a/src/lib/ssm/pool.c b/src/lib/ssm/pool.c index 5607a360..705de147 100644 --- a/src/lib/ssm/pool.c +++ b/src/lib/ssm/pool.c @@ -38,10 +38,20 @@ #include #include #include +#include #include #include #include +static __inline__ uint64_t pool_now_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + + return (uint64_t) ts.tv_sec * 1000000000ULL + (uint64_t) ts.tv_nsec; +} + /* Global Shared Packet Pool (GSPP) configuration */ static const struct ssm_size_class_cfg ssm_gspp_cfg[SSM_POOL_MAX_CLASSES] = { { (1 << 8), SSM_GSPP_256_BLOCKS }, @@ -236,6 +246,7 @@ static void init_size_classes(struct ssm_pool * pool) STORE(&blk->refcount, 0); blk->allocator_pid = 0; + blk->alloc_ts = 0; STORE(&blk->next_offset, 0); list_add_head(&sc->shards[0].free_list, blk, @@ -266,19 +277,31 @@ static size_t reclaim_pid_from_sc(struct _ssm_size_class * sc, size_t i; size_t recovered = 0; struct ssm_pk_buff * blk; + uint64_t now; + uint64_t min_age_ns; - region = (uint8_t *) pool_base + sc->pool_start; + region = (uint8_t *) pool_base + sc->pool_start; + now = pool_now_ns(); + min_age_ns = (uint64_t) SSM_POOL_RECLAIM_AGE_S * 1000000000ULL; for (i = 0; i < sc->object_count; ++i) { blk = (struct ssm_pk_buff *)(region + i * sc->object_size); - if (blk->allocator_pid == pid && LOAD(&blk->refcount) > 0) { - STORE(&blk->refcount, 0); - blk->allocator_pid = 0; - list_add_head(&shard->free_list, blk, pool_base); - FETCH_ADD(&shard->free_count, 1); - recovered++; - } + if (blk->allocator_pid != pid) + continue; + + if (LOAD(&blk->refcount) == 0) + continue; + + /* Recent: a live consumer may still hold the handoff. */ + if (now - blk->alloc_ts < min_age_ns) + continue; + + STORE(&blk->refcount, 0); + blk->allocator_pid = 0; + list_add_head(&shard->free_list, blk, pool_base); + FETCH_ADD(&shard->free_count, 1); + recovered++; } return recovered; @@ -339,6 +362,7 @@ static __inline__ ssize_t init_block(struct ssm_pool * pool, { STORE(&blk->refcount, 1); blk->allocator_pid = getpid(); + blk->alloc_ts = pool_now_ns(); blk->size = (uint32_t) (sc->object_size - sizeof(struct ssm_pk_buff)); blk->pk_head = SSM_PK_BUFF_HEADSPACE; diff --git a/src/lib/ssm/ssm.h.in b/src/lib/ssm/ssm.h.in index b86327a1..57febae4 100644 --- a/src/lib/ssm/ssm.h.in +++ b/src/lib/ssm/ssm.h.in @@ -83,6 +83,7 @@ /* Size class configuration */ #define SSM_POOL_MAX_CLASSES 9 #define SSM_POOL_SHARDS @SSM_POOL_SHARDS@ +#define SSM_POOL_RECLAIM_AGE_S @SSM_POOL_RECLAIM_AGE_S@ /* Internal structures - exposed for testing */ #ifdef __cplusplus @@ -125,6 +126,7 @@ struct ssm_pk_buff { uint32_t pk_head; /* Head offset into data */ uint32_t pk_tail; /* Tail offset into data */ uint32_t off; /* Block offset in pool */ + uint64_t alloc_ts; /* CLOCK_MONOTONIC ns at alloc */ uint8_t data[]; /* Packet data */ }; -- cgit v1.2.3