Skip to content

Commit 46e25b2

Browse files
committed
Avoid potential deadlocks from using std::* operations in signal handlers (#293)
* Avoid potential deadlocks from using std::* operations in signal handlers * Avoid intermittent failures in traceid generation tests * Mark fields with concurrent access as volatile (cherry picked from commit 8070f3c)
1 parent 0138b72 commit 46e25b2

File tree

8 files changed

+287
-136
lines changed

8 files changed

+287
-136
lines changed

ddprof-lib/src/main/cpp/callTraceStorage.cpp

Lines changed: 227 additions & 96 deletions
Large diffs are not rendered by default.

ddprof-lib/src/main/cpp/callTraceStorage.h

Lines changed: 37 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,29 +28,50 @@ class CallTraceHashTable;
2828
// Using reference parameter avoids malloc() for vector creation and copying
2929
typedef std::function<void(std::unordered_set<u64>&)> LivenessChecker;
3030

31+
/**
32+
* Cache-aligned hazard pointer slot to eliminate false sharing.
33+
* Each slot occupies a full cache line (64 bytes) to ensure that
34+
* updates by one thread do not invalidate cache lines for other threads.
35+
*/
36+
struct alignas(DEFAULT_CACHE_LINE_SIZE) HazardSlot {
37+
volatile CallTraceHashTable* pointer;
38+
char padding[DEFAULT_CACHE_LINE_SIZE - sizeof(pointer)];
39+
40+
HazardSlot() : pointer(nullptr), padding{} {
41+
static_assert(sizeof(HazardSlot) == DEFAULT_CACHE_LINE_SIZE,
42+
"HazardSlot must be exactly one cache line");
43+
}
44+
};
45+
3146
/**
3247
* RAII guard for hazard pointer management.
33-
*
48+
*
3449
* This class encapsulates the hazard pointer mechanism used to protect CallTraceHashTable
3550
* instances from being deleted while they are being accessed by concurrent threads.
3651
*
3752
* Usage:
38-
* HazardPointer guard(active_table);
39-
* // active_table is now protected from deletion
53+
* HazardPointer guard(_active_table);
54+
* // _active_table is now protected from deletion
4055
* // Automatic cleanup when guard goes out of scope
4156
*/
4257
class HazardPointer {
4358
public:
4459
static constexpr int MAX_THREADS = 8192;
4560
static constexpr int MAX_PROBE_DISTANCE = 32; // Maximum probing attempts
46-
47-
static std::atomic<CallTraceHashTable*> global_hazard_list[MAX_THREADS];
48-
static std::atomic<int> slot_owners[MAX_THREADS]; // Thread ID ownership verification
61+
static constexpr int BITMAP_WORDS = MAX_THREADS / 64; // 8192 / 64 = 128 words
62+
63+
static HazardSlot global_hazard_list[MAX_THREADS];
64+
static int slot_owners[MAX_THREADS]; // Thread ID ownership verification
65+
66+
// Occupied slot bitmap for efficient scanning (avoids iterating 8192 empty slots)
67+
// Uses raw uint64_t with GCC atomic builtins to avoid any potential malloc on musl
68+
// Each bit represents whether the corresponding slot is occupied
69+
static uint64_t occupied_bitmap[BITMAP_WORDS];
4970

5071
private:
51-
bool active_;
52-
int my_slot_; // This instance's assigned slot
53-
72+
bool _active;
73+
int _my_slot; // This instance's assigned slot
74+
5475
// Signal-safe slot assignment using thread ID hash
5576
static int getThreadHazardSlot();
5677

@@ -66,7 +87,7 @@ class HazardPointer {
6687
HazardPointer& operator=(HazardPointer&& other) noexcept;
6788

6889
// Check if hazard pointer is active (slot allocation succeeded)
69-
bool isActive() const { return active_; }
90+
bool isActive() const { return _active; }
7091

7192
// Wait for hazard pointers pointing to specific table to clear (used during shutdown)
7293
static void waitForHazardPointersToClear(CallTraceHashTable* table_to_delete);
@@ -89,20 +110,21 @@ class CallTraceStorage {
89110
// Triple-buffered storage with atomic pointers
90111
// Rotation: tmp=scratch, scratch=active, active=standby, standby=tmp
91112
// New active inherits preserved traces for continuity
92-
std::atomic<CallTraceHashTable*> _active_storage;
93-
std::atomic<CallTraceHashTable*> _standby_storage;
94-
std::atomic<CallTraceHashTable*> _scratch_storage;
113+
volatile CallTraceHashTable* _active_storage;
114+
volatile CallTraceHashTable* _standby_storage;
115+
volatile CallTraceHashTable* _scratch_storage;
95116

96117
// Generation counter for ABA protection during table swaps
97-
std::atomic<u32> _generation_counter;
118+
u32 _generation_counter;
98119

99120
// Liveness checkers - protected by simple spinlock during registration/clear
100121
// Using vector instead of unordered_set since std::function cannot be hashed
101122
std::vector<LivenessChecker> _liveness_checkers;
102123
SpinLock _liveness_lock; // Simple atomic lock for rare liveness operations
103124

104125
// Static atomic for instance ID generation - avoids function-local static initialization issues
105-
static std::atomic<u64> _next_instance_id;
126+
127+
static u64 _next_instance_id;
106128

107129
// Lazy initialization helper to avoid global constructor
108130
static u64 getNextInstanceId();

ddprof-lib/src/main/cpp/criticalSection.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include "thread.h"
1010

1111
// Static bitmap storage for fallback cases
12-
std::atomic<uint64_t> CriticalSection::_fallback_bitmap[CriticalSection::FALLBACK_BITMAP_WORDS] = {};
12+
uint64_t CriticalSection::_fallback_bitmap[CriticalSection::FALLBACK_BITMAP_WORDS] = {};
1313

1414
CriticalSection::CriticalSection() : _entered(false), _using_fallback(false), _word_index(0), _bit_mask(0) {
1515
ProfiledThread* current = ProfiledThread::current();
@@ -27,8 +27,7 @@ CriticalSection::CriticalSection() : _entered(false), _using_fallback(false), _w
2727
uint32_t bit_index = tid % 64;
2828
_bit_mask = 1ULL << bit_index;
2929

30-
// Atomically try to set the bit
31-
uint64_t old_word = _fallback_bitmap[_word_index].fetch_or(_bit_mask, std::memory_order_relaxed);
30+
uint64_t old_word = __atomic_fetch_or(&_fallback_bitmap[_word_index], _bit_mask, __ATOMIC_RELAXED);
3231
_entered = !(old_word & _bit_mask); // Success if bit was previously 0
3332
}
3433
}
@@ -37,7 +36,7 @@ CriticalSection::~CriticalSection() {
3736
if (_entered) {
3837
if (_using_fallback) {
3938
// Clear the bit atomically for fallback bitmap
40-
_fallback_bitmap[_word_index].fetch_and(~_bit_mask, std::memory_order_relaxed);
39+
__atomic_fetch_and(&_fallback_bitmap[_word_index], ~_bit_mask, __ATOMIC_RELAXED);
4140
} else {
4241
// Release ProfiledThread flag
4342
ProfiledThread* current = ProfiledThread::current();

ddprof-lib/src/main/cpp/criticalSection.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
#ifndef _CRITICALSECTION_H
77
#define _CRITICALSECTION_H
88

9-
#include <atomic>
109
#include <cstdint>
1110
#include <cstddef>
1211

@@ -37,7 +36,7 @@ class CriticalSection {
3736
// Must be atomic because multiple signal handlers can run concurrently across
3837
// different threads and attempt to set/clear bits simultaneously. Compare-and-swap
3938
// operations ensure race-free bit manipulation even during signal interruption.
40-
static std::atomic<uint64_t> _fallback_bitmap[FALLBACK_BITMAP_WORDS];
39+
static uint64_t _fallback_bitmap[FALLBACK_BITMAP_WORDS];
4140

4241
bool _entered; // Track if this instance successfully entered
4342
bool _using_fallback; // Track which storage mechanism we're using

ddprof-lib/src/main/cpp/ctimer.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626

2727
class CTimer : public Engine {
2828
private:
29-
static std::atomic<bool> _enabled;
29+
// This is accessed from signal handlers, so must be async-signal-safe
30+
static bool _enabled;
3031
static long _interval;
3132
static CStack _cstack;
3233
static int _signal;
@@ -52,7 +53,7 @@ class CTimer : public Engine {
5253
void stop();
5354

5455
inline void enableEvents(bool enabled) {
55-
_enabled.store(enabled, std::memory_order_release);
56+
__atomic_store_n(&_enabled, enabled, __ATOMIC_RELEASE);
5657
}
5758
};
5859

ddprof-lib/src/main/cpp/ctimer_linux.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ long CTimer::_interval;
8383
int CTimer::_max_timers = 0;
8484
int *CTimer::_timers = NULL;
8585
CStack CTimer::_cstack;
86-
std::atomic<bool> CTimer::_enabled{false};
86+
bool CTimer::_enabled = false;
8787
int CTimer::_signal;
8888

8989
int CTimer::registerThread(int tid) {
@@ -207,7 +207,7 @@ void CTimer::signalHandler(int signo, siginfo_t *siginfo, void *ucontext) {
207207
int saved_errno = errno;
208208
// we want to ensure memory order because of the possibility the instance gets
209209
// cleared
210-
if (!_enabled.load(std::memory_order_acquire))
210+
if (!__atomic_load_n(&_enabled, __ATOMIC_ACQUIRE))
211211
return;
212212
int tid = 0;
213213
ProfiledThread *current = ProfiledThread::current();

ddprof-lib/src/main/cpp/thread.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,14 @@ class ProfiledThread : public ThreadLocalData {
131131
void setFilterSlotId(int slotId) { _filter_slot_id = slotId; }
132132

133133
// Signal handler reentrancy protection
134-
bool tryEnterCriticalSection() {
135-
return !_in_critical_section.exchange(true, std::memory_order_acquire);
134+
bool tryEnterCriticalSection() {
135+
// Uses GCC atomic builtin (no malloc, async-signal-safe)
136+
bool expected = false;
137+
return __atomic_compare_exchange_n(&_in_critical_section, &expected, true, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
136138
}
137-
void exitCriticalSection() {
138-
_in_critical_section.store(false, std::memory_order_release);
139+
void exitCriticalSection() {
140+
// Uses GCC atomic builtin (no malloc, async-signal-safe)
141+
__atomic_store_n(&_in_critical_section, false, __ATOMIC_RELEASE);
139142
}
140143

141144
// Hazard pointer management for lock-free memory reclamation (signal-safe)
@@ -162,7 +165,7 @@ class ProfiledThread : public ThreadLocalData {
162165
// and both contexts may attempt to enter the critical section. Without atomic exchange(),
163166
// both could see the flag as false and both would think they successfully entered.
164167
// The atomic exchange() is uninterruptible, ensuring only one context succeeds.
165-
std::atomic<bool> _in_critical_section{false};
168+
bool _in_critical_section{false};
166169

167170
// Hazard pointer instance for signal-safe access (not atomic since only accessed by same thread)
168171
void* _hazard_instance{nullptr};

ddprof-lib/src/test/cpp/stress_callTraceStorage.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,21 +1673,17 @@ TEST_F(StressTestSuite, InstanceIdTraceIdStressTest) {
16731673
std::vector<std::thread> workers;
16741674
for (int t = 0; t < NUM_THREADS; ++t) {
16751675
workers.emplace_back([&, t]() {
1676-
std::mt19937 gen(std::random_device{}() + t);
1677-
// No longer need storage distribution since we use single instance
1678-
std::uniform_int_distribution<uint32_t> bci_dis(1, 100000);
1679-
std::uniform_int_distribution<uintptr_t> method_dis(0x10000, 0xFFFFFF);
1680-
16811676
for (int op = 0; op < OPERATIONS_PER_THREAD && !test_failed.load(); ++op) {
16821677
try {
16831678
// Use the single shared storage instance
16841679
CallTraceStorage* storage = storage_instance;
1685-
1686-
// Create a unique frame to avoid hash collisions masking trace ID issues
1680+
1681+
// Create a DETERMINISTIC unique frame - no randomness
1682+
// Each (thread, operation) pair generates a unique frame
16871683
ASGCT_CallFrame frame;
1688-
frame.bci = bci_dis(gen) + t * 1000000 + op; // Ensure uniqueness
1689-
frame.method_id = reinterpret_cast<jmethodID>(method_dis(gen) + t * 0x1000000);
1690-
1684+
frame.bci = t * 1000000 + op; // Deterministic: thread_id * 1M + op_index
1685+
frame.method_id = reinterpret_cast<jmethodID>(0x10000000ULL + (u64)t * 10000000ULL + (u64)op);
1686+
16911687
// Calculate stack trace hash for analysis (simplified hash of frame data)
16921688
u64 stack_hash = (u64)frame.bci ^ ((u64)frame.method_id << 32);
16931689

0 commit comments

Comments
 (0)