Skip to content

Commit 06130aa

Browse files
committed
Fix another hidden use-after-free in CallTraceStorage (#301)
(cherry picked from commit e800295)
1 parent 3f1ab0e commit 06130aa

File tree

6 files changed

+322
-24
lines changed

6 files changed

+322
-24
lines changed

ddprof-lib/src/main/cpp/callTraceHashTable.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,24 +102,32 @@ CallTraceHashTable::~CallTraceHashTable() {
102102
}
103103

104104

105-
void CallTraceHashTable::clear() {
106-
// Wait for all hazard pointers to clear before deallocation to prevent races
105+
ChunkList CallTraceHashTable::clearTableOnly() {
106+
// Wait for all hazard pointers to clear before detaching chunks
107107
HazardPointer::waitForAllHazardPointersToClear();
108-
108+
109109
// Clear previous chain pointers to prevent traversal during deallocation
110110
for (LongHashTable *table = _table; table != nullptr; table = table->prev()) {
111111
LongHashTable *prev_table = table->prev();
112112
if (prev_table != nullptr) {
113113
table->setPrev(nullptr); // Clear link before deallocation
114114
}
115115
}
116-
117-
// Now safe to deallocate all memory
118-
_allocator.clear();
119-
120-
// Reinitialize with fresh table
116+
117+
// Detach chunks for deferred deallocation - keeps trace memory alive
118+
ChunkList detached_chunks = _allocator.detachChunks();
119+
120+
// Reinitialize with fresh table (using the new chunk from detachChunks)
121121
_table = LongHashTable::allocate(nullptr, INITIAL_CAPACITY, &_allocator);
122122
_overflow = 0;
123+
124+
return detached_chunks;
125+
}
126+
127+
void CallTraceHashTable::clear() {
128+
// Clear table and immediately free chunks (original behavior)
129+
ChunkList chunks = clearTableOnly();
130+
LinearAllocator::freeChunks(chunks);
123131
}
124132

125133
// Adaptation of MurmurHash64A by Austin Appleby

ddprof-lib/src/main/cpp/callTraceHashTable.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,19 @@ class CallTraceHashTable {
7979
~CallTraceHashTable();
8080

8181
void clear();
82+
83+
/**
84+
* Resets the hash table structure but defers memory deallocation.
85+
* Returns a ChunkList containing the detached memory chunks.
86+
* The caller must call LinearAllocator::freeChunks() on the returned
87+
* ChunkList after processing is complete.
88+
*
89+
* This is used to fix use-after-free in processTraces(): the table
90+
* structure is reset immediately (allowing rotation), but trace memory
91+
* remains valid until the processor finishes accessing it.
92+
*/
93+
ChunkList clearTableOnly();
94+
8295
void collect(std::unordered_set<CallTrace *> &traces, std::function<void(CallTrace*)> trace_hook = nullptr);
8396

8497
u64 put(int num_frames, ASGCT_CallFrame *frames, bool truncated, u64 weight);

ddprof-lib/src/main/cpp/callTraceStorage.cpp

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -371,10 +371,11 @@ CallTraceStorage::CallTraceStorage() : _active_storage(nullptr), _standby_storag
371371

372372
CallTraceStorage::~CallTraceStorage() {
373373
// Atomically invalidate storage pointers to prevent new put() operations
374-
// Relaxed ordering is sufficient since no concurrent readers exist during destruction
375-
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_RELAXED));
376-
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_RELAXED));
377-
CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_RELAXED));
374+
// ACQ_REL ordering: RELEASE ensures nullptr is visible to put()'s ACQUIRE load,
375+
// ACQUIRE ensures we see the latest pointer value for subsequent deletion
376+
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_ACQ_REL));
377+
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_ACQ_REL));
378+
CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_ACQ_REL));
378379

379380
// Wait for any ongoing hazard pointer usage to complete and delete each unique table
380381
// Note: In triple-buffering, all three pointers should be unique, but check anyway
@@ -472,12 +473,13 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
472473
}
473474
}
474475

475-
// PHASE 2: 8-Step Triple-Buffer Rotation
476+
// PHASE 2: 10-Step Triple-Buffer Rotation
476477

477-
// Load storage pointers - relaxed ordering sufficient in single-threaded processTraces context
478-
CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
479-
CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
480-
CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_RELAXED));
478+
// Load storage pointers - ACQUIRE ordering synchronizes with RELEASE stores from
479+
// previous processTraces() calls and constructor initialization
480+
CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
481+
CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));
482+
CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_ACQUIRE));
481483

482484
// Clear process collection for reuse (no malloc/free)
483485
_traces_buffer.clear();
@@ -492,8 +494,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
492494
}
493495
});
494496

495-
// Step 3: Clear original_standby after collection -> it will become the new active
496-
original_standby->clear();
497+
// Step 3: Clear standby table structure but DEFER memory deallocation
498+
// The standby traces are now in _traces_buffer as raw pointers.
499+
// We must keep the underlying memory alive until processor() finishes.
500+
// clearTableOnly() resets the table for reuse but returns the chunks for later freeing.
501+
ChunkList standby_chunks = original_standby->clearTableOnly();
497502

498503
{
499504
// Critical section for table swap operations - disallow signals to interrupt
@@ -531,7 +536,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
531536

532537
processor(_traces_buffer);
533538

534-
// Step 9: Clear the original active area (now scratch)
539+
// Step 9: NOW safe to free standby chunks - processor is done accessing those traces
540+
// This completes the deferred deallocation that prevents use-after-free
541+
LinearAllocator::freeChunks(standby_chunks);
542+
543+
// Step 10: Clear the original active area (now scratch)
535544
original_active->clear();
536545

537546
// Triple-buffer rotation maintains trace continuity with thread-safe malloc-free operations:
@@ -546,9 +555,10 @@ void CallTraceStorage::clear() {
546555
// Mark critical section during clear operation for consistency
547556
CriticalSection cs;
548557

549-
// Load current table pointers - relaxed ordering sufficient within critical section
550-
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
551-
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
558+
// Load current table pointers - ACQUIRE ordering synchronizes with RELEASE stores
559+
// from processTraces() rotation and constructor initialization
560+
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
561+
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));
552562

553563
// Direct clear operations with critical section protection
554564
if (active) {

ddprof-lib/src/main/cpp/linearAllocator.cpp

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,23 @@
1717
#include "linearAllocator.h"
1818
#include "counters.h"
1919
#include "os_dd.h"
20+
#include "common.h"
21+
#include <stdio.h>
22+
23+
// Enable ASan memory poisoning for better use-after-free detection
24+
#ifdef __has_feature
25+
#if __has_feature(address_sanitizer)
26+
#define ASAN_ENABLED 1
27+
#endif
28+
#endif
29+
30+
#ifdef __SANITIZE_ADDRESS__
31+
#define ASAN_ENABLED 1
32+
#endif
33+
34+
#ifdef ASAN_ENABLED
35+
#include <sanitizer/asan_interface.h>
36+
#endif
2037

2138
LinearAllocator::LinearAllocator(size_t chunk_size) {
2239
_chunk_size = chunk_size;
@@ -32,13 +49,85 @@ void LinearAllocator::clear() {
3249
if (_reserve->prev == _tail) {
3350
freeChunk(_reserve);
3451
}
52+
53+
// ASAN POISONING: Mark all allocated memory as poisoned BEFORE freeing chunks
54+
// This catches use-after-free even when memory isn't munmap'd (kept in _tail)
55+
#ifdef ASAN_ENABLED
56+
int chunk_count = 0;
57+
size_t total_poisoned = 0;
58+
for (Chunk *chunk = _tail; chunk != NULL; chunk = chunk->prev) {
59+
// Poison from the start of usable data to the current offset
60+
size_t used_size = chunk->offs - sizeof(Chunk);
61+
if (used_size > 0) {
62+
void* data_start = (char*)chunk + sizeof(Chunk);
63+
ASAN_POISON_MEMORY_REGION(data_start, used_size);
64+
chunk_count++;
65+
total_poisoned += used_size;
66+
}
67+
}
68+
if (chunk_count > 0) {
69+
TEST_LOG("[LinearAllocator::clear] ASan poisoned %d chunks, %zu bytes total", chunk_count, total_poisoned);
70+
}
71+
#endif
72+
3573
while (_tail->prev != NULL) {
3674
Chunk *current = _tail;
3775
_tail = _tail->prev;
3876
freeChunk(current);
3977
}
4078
_reserve = _tail;
4179
_tail->offs = sizeof(Chunk);
80+
81+
// DON'T UNPOISON HERE - let alloc() do it on-demand!
82+
// This ensures ASan can catch use-after-free bugs when code accesses
83+
// memory that was cleared but not yet reallocated.
84+
}
85+
86+
ChunkList LinearAllocator::detachChunks() {
87+
// Capture current state before detaching
88+
ChunkList result(_tail, _chunk_size);
89+
90+
// Handle reserve chunk: if it's ahead of tail, it needs special handling
91+
if (_reserve->prev == _tail) {
92+
// Reserve is a separate chunk ahead of tail - it becomes part of detached list
93+
// We need to include it in the chain by making it the new head
94+
result.head = _reserve;
95+
}
96+
97+
// Allocate a fresh chunk for new allocations
98+
Chunk* fresh = allocateChunk(NULL);
99+
if (fresh != NULL) {
100+
_tail = fresh;
101+
_reserve = fresh;
102+
} else {
103+
// Allocation failed - restore original state and return empty list
104+
// This maintains the invariant that the allocator is always usable
105+
_reserve = _tail;
106+
_tail->offs = sizeof(Chunk);
107+
return ChunkList();
108+
}
109+
110+
return result;
111+
}
112+
113+
void LinearAllocator::freeChunks(ChunkList& chunks) {
114+
if (chunks.head == nullptr || chunks.chunk_size == 0) {
115+
return;
116+
}
117+
118+
// Walk the chain and free each chunk
119+
Chunk* current = chunks.head;
120+
while (current != nullptr) {
121+
Chunk* prev = current->prev;
122+
OS::safeFree(current, chunks.chunk_size);
123+
Counters::decrement(LINEAR_ALLOCATOR_BYTES, chunks.chunk_size);
124+
Counters::decrement(LINEAR_ALLOCATOR_CHUNKS);
125+
current = prev;
126+
}
127+
128+
// Mark as freed to prevent double-free
129+
chunks.head = nullptr;
130+
chunks.chunk_size = 0;
42131
}
43132

44133
void *LinearAllocator::alloc(size_t size) {
@@ -49,11 +138,20 @@ void *LinearAllocator::alloc(size_t size) {
49138
offs + size <= _chunk_size;
50139
offs = __atomic_load_n(&chunk->offs, __ATOMIC_ACQUIRE)) {
51140
if (__sync_bool_compare_and_swap(&chunk->offs, offs, offs + size)) {
141+
void* allocated_ptr = (char *)chunk + offs;
142+
143+
// ASAN UNPOISONING: Unpoison ONLY the allocated region on-demand
144+
// This allows ASan to detect use-after-free of memory that was cleared
145+
// but not yet reallocated
146+
#ifdef ASAN_ENABLED
147+
ASAN_UNPOISON_MEMORY_REGION(allocated_ptr, size);
148+
#endif
149+
52150
if (_chunk_size / 2 - offs < size) {
53151
// Stepped over a middle of the chunk - it's time to prepare a new one
54152
reserveChunk(chunk);
55153
}
56-
return (char *)chunk + offs;
154+
return allocated_ptr;
57155
}
58156
}
59157
} while ((chunk = getNextChunk(chunk)) != NULL);
@@ -66,6 +164,16 @@ Chunk *LinearAllocator::allocateChunk(Chunk *current) {
66164
if (chunk != NULL) {
67165
chunk->prev = current;
68166
chunk->offs = sizeof(Chunk);
167+
168+
// ASAN UNPOISONING: New chunks from mmap are clean, unpoison them for use
169+
// mmap returns memory that ASan may track as unallocated, so we need to
170+
// explicitly unpoison it to allow allocations
171+
#ifdef ASAN_ENABLED
172+
size_t usable_size = _chunk_size - sizeof(Chunk);
173+
void* data_start = (char*)chunk + sizeof(Chunk);
174+
ASAN_UNPOISON_MEMORY_REGION(data_start, usable_size);
175+
#endif
176+
69177
Counters::increment(LINEAR_ALLOCATOR_BYTES, _chunk_size);
70178
Counters::increment(LINEAR_ALLOCATOR_CHUNKS);
71179
}

ddprof-lib/src/main/cpp/linearAllocator.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,19 @@ struct Chunk {
2626
char _padding[56];
2727
};
2828

29+
/**
30+
* Holds detached chunks from a LinearAllocator for deferred deallocation.
31+
* Used to keep trace memory alive during processor execution while allowing
32+
* the allocator to be reset for new allocations.
33+
*/
34+
struct ChunkList {
35+
Chunk *head;
36+
size_t chunk_size;
37+
38+
ChunkList() : head(nullptr), chunk_size(0) {}
39+
ChunkList(Chunk *h, size_t sz) : head(h), chunk_size(sz) {}
40+
};
41+
2942
class LinearAllocator {
3043
private:
3144
size_t _chunk_size;
@@ -43,6 +56,22 @@ class LinearAllocator {
4356

4457
void clear();
4558

59+
/**
60+
* Detaches all chunks from this allocator, returning them as a ChunkList.
61+
* The allocator is reset to an empty state with a fresh chunk for new allocations.
62+
* The detached chunks remain allocated and valid until freeChunks() is called.
63+
*
64+
* This enables deferred deallocation: reset the allocator immediately while
65+
* keeping old data alive until it's no longer needed.
66+
*/
67+
ChunkList detachChunks();
68+
69+
/**
70+
* Frees all chunks in a previously detached ChunkList.
71+
* Call this after processing is complete to deallocate the memory.
72+
*/
73+
static void freeChunks(ChunkList& chunks);
74+
4675
void *alloc(size_t size);
4776
};
4877

0 commit comments

Comments
 (0)