Skip to content

Commit e8109e8

Browse files
authored
Merge pull request #305 from DataDog/jb/1.34.4_backports
Backport critical fixes to 1.34.4
2 parents 31de25d + 06130aa commit e8109e8

File tree

13 files changed

+497
-120
lines changed

13 files changed

+497
-120
lines changed

ddprof-lib/src/main/cpp/callTraceHashTable.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,24 +102,32 @@ CallTraceHashTable::~CallTraceHashTable() {
102102
}
103103

104104

105-
void CallTraceHashTable::clear() {
106-
// Wait for all hazard pointers to clear before deallocation to prevent races
105+
ChunkList CallTraceHashTable::clearTableOnly() {
106+
// Wait for all hazard pointers to clear before detaching chunks
107107
HazardPointer::waitForAllHazardPointersToClear();
108-
108+
109109
// Clear previous chain pointers to prevent traversal during deallocation
110110
for (LongHashTable *table = _table; table != nullptr; table = table->prev()) {
111111
LongHashTable *prev_table = table->prev();
112112
if (prev_table != nullptr) {
113113
table->setPrev(nullptr); // Clear link before deallocation
114114
}
115115
}
116-
117-
// Now safe to deallocate all memory
118-
_allocator.clear();
119-
120-
// Reinitialize with fresh table
116+
117+
// Detach chunks for deferred deallocation - keeps trace memory alive
118+
ChunkList detached_chunks = _allocator.detachChunks();
119+
120+
// Reinitialize with fresh table (using the new chunk from detachChunks)
121121
_table = LongHashTable::allocate(nullptr, INITIAL_CAPACITY, &_allocator);
122122
_overflow = 0;
123+
124+
return detached_chunks;
125+
}
126+
127+
void CallTraceHashTable::clear() {
128+
// Clear table and immediately free chunks (original behavior)
129+
ChunkList chunks = clearTableOnly();
130+
LinearAllocator::freeChunks(chunks);
123131
}
124132

125133
// Adaptation of MurmurHash64A by Austin Appleby

ddprof-lib/src/main/cpp/callTraceHashTable.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,19 @@ class CallTraceHashTable {
7979
~CallTraceHashTable();
8080

8181
void clear();
82+
83+
/**
84+
* Resets the hash table structure but defers memory deallocation.
85+
* Returns a ChunkList containing the detached memory chunks.
86+
* The caller must call LinearAllocator::freeChunks() on the returned
87+
* ChunkList after processing is complete.
88+
*
89+
* This is used to fix use-after-free in processTraces(): the table
90+
* structure is reset immediately (allowing rotation), but trace memory
91+
* remains valid until the processor finishes accessing it.
92+
*/
93+
ChunkList clearTableOnly();
94+
8295
void collect(std::unordered_set<CallTrace *> &traces, std::function<void(CallTrace*)> trace_hook = nullptr);
8396

8497
u64 put(int num_frames, ASGCT_CallFrame *frames, bool truncated, u64 weight);

ddprof-lib/src/main/cpp/callTraceStorage.cpp

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -371,10 +371,11 @@ CallTraceStorage::CallTraceStorage() : _active_storage(nullptr), _standby_storag
371371

372372
CallTraceStorage::~CallTraceStorage() {
373373
// Atomically invalidate storage pointers to prevent new put() operations
374-
// Relaxed ordering is sufficient since no concurrent readers exist during destruction
375-
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_RELAXED));
376-
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_RELAXED));
377-
CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_RELAXED));
374+
// ACQ_REL ordering: RELEASE ensures nullptr is visible to put()'s ACQUIRE load,
375+
// ACQUIRE ensures we see the latest pointer value for subsequent deletion
376+
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_active_storage, nullptr, __ATOMIC_ACQ_REL));
377+
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_standby_storage, nullptr, __ATOMIC_ACQ_REL));
378+
CallTraceHashTable* scratch = const_cast<CallTraceHashTable*>(__atomic_exchange_n(&_scratch_storage, nullptr, __ATOMIC_ACQ_REL));
378379

379380
// Wait for any ongoing hazard pointer usage to complete and delete each unique table
380381
// Note: In triple-buffering, all three pointers should be unique, but check anyway
@@ -472,12 +473,13 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
472473
}
473474
}
474475

475-
// PHASE 2: 8-Step Triple-Buffer Rotation
476+
// PHASE 2: 10-Step Triple-Buffer Rotation
476477

477-
// Load storage pointers - relaxed ordering sufficient in single-threaded processTraces context
478-
CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
479-
CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
480-
CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_RELAXED));
478+
// Load storage pointers - ACQUIRE ordering synchronizes with RELEASE stores from
479+
// previous processTraces() calls and constructor initialization
480+
CallTraceHashTable* original_active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
481+
CallTraceHashTable* original_standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));
482+
CallTraceHashTable* original_scratch = const_cast<CallTraceHashTable*>(__atomic_load_n(&_scratch_storage, __ATOMIC_ACQUIRE));
481483

482484
// Clear process collection for reuse (no malloc/free)
483485
_traces_buffer.clear();
@@ -492,8 +494,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
492494
}
493495
});
494496

495-
// Step 3: Clear original_standby after collection -> it will become the new active
496-
original_standby->clear();
497+
// Step 3: Clear standby table structure but DEFER memory deallocation
498+
// The standby traces are now in _traces_buffer as raw pointers.
499+
// We must keep the underlying memory alive until processor() finishes.
500+
// clearTableOnly() resets the table for reuse but returns the chunks for later freeing.
501+
ChunkList standby_chunks = original_standby->clearTableOnly();
497502

498503
{
499504
// Critical section for table swap operations - disallow signals to interrupt
@@ -531,7 +536,11 @@ void CallTraceStorage::processTraces(std::function<void(const std::unordered_set
531536

532537
processor(_traces_buffer);
533538

534-
// Step 9: Clear the original active area (now scratch)
539+
// Step 9: NOW safe to free standby chunks - processor is done accessing those traces
540+
// This completes the deferred deallocation that prevents use-after-free
541+
LinearAllocator::freeChunks(standby_chunks);
542+
543+
// Step 10: Clear the original active area (now scratch)
535544
original_active->clear();
536545

537546
// Triple-buffer rotation maintains trace continuity with thread-safe malloc-free operations:
@@ -546,9 +555,10 @@ void CallTraceStorage::clear() {
546555
// Mark critical section during clear operation for consistency
547556
CriticalSection cs;
548557

549-
// Load current table pointers - relaxed ordering sufficient within critical section
550-
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_RELAXED));
551-
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_RELAXED));
558+
// Load current table pointers - ACQUIRE ordering synchronizes with RELEASE stores
559+
// from processTraces() rotation and constructor initialization
560+
CallTraceHashTable* active = const_cast<CallTraceHashTable*>(__atomic_load_n(&_active_storage, __ATOMIC_ACQUIRE));
561+
CallTraceHashTable* standby = const_cast<CallTraceHashTable*>(__atomic_load_n(&_standby_storage, __ATOMIC_ACQUIRE));
552562

553563
// Direct clear operations with critical section protection
554564
if (active) {

ddprof-lib/src/main/cpp/codeCache.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
#include "codeCache.h"
77
#include "dwarf_dd.h"
88
#include "os_dd.h"
9+
910
#include <stdint.h>
1011
#include <stdlib.h>
1112
#include <string.h>
1213
#include <sys/mman.h>
1314

1415
char *NativeFunc::create(const char *name, short lib_index) {
15-
NativeFunc *f = (NativeFunc *)malloc(sizeof(NativeFunc) + 1 + strlen(name));
16+
size_t size = align_up(sizeof(NativeFunc) + 1 + strlen(name), sizeof(NativeFunc*));
17+
NativeFunc *f = (NativeFunc *)aligned_alloc(sizeof(NativeFunc*), size);
1618
f->_lib_index = lib_index;
1719
f->_mark = 0;
1820
// cppcheck-suppress memleak

ddprof-lib/src/main/cpp/codeCache.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#ifndef _CODECACHE_H
77
#define _CODECACHE_H
88

9+
#include "utils.h"
10+
911
#include <jvmti.h>
1012
#include <stdlib.h>
1113
#include <string.h>
@@ -62,7 +64,7 @@ class NativeFunc {
6264

6365
static short libIndex(const char *name) {
6466
NativeFunc* func = from(name);
65-
if (posix_memalign((void**)(&func), sizeof(NativeFunc*), sizeof(NativeFunc)) != 0) {
67+
if (!is_aligned(func, sizeof(func))) {
6668
return -1;
6769
}
6870
return func->_lib_index;

0 commit comments

Comments
 (0)