Skip to content

Commit 7de6b47

Browse files
authored
Relax memory constraint on independent counters (#275)
* Relax memory constraint on counter * test * Revert test * Fixed heap * additional counters * One more
1 parent 21523a3 commit 7de6b47

File tree

7 files changed

+42
-29
lines changed

7 files changed

+42
-29
lines changed

ddprof-lib/src/main/cpp/arch_dd.h

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,32 @@ static inline long long atomicInc(volatile long long &var,
1414
return __sync_fetch_and_add(&var, increment);
1515
}
1616

17-
static inline u64 loadAcquire(volatile u64 &var) {
18-
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
17+
template <typename T>
18+
static inline long long atomicIncRelaxed(volatile T &var,
19+
T increment = 1) {
20+
return __atomic_fetch_add(&var, increment, __ATOMIC_RELAXED);
1921
}
2022

21-
static inline size_t loadAcquire(volatile size_t &var) {
22-
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
23+
// Atomic load/store (unordered)
24+
template <typename T>
25+
static inline T load(volatile T& var) {
26+
return __atomic_load_n(&var, __ATOMIC_RELAXED);
2327
}
2428

25-
static inline void storeRelease(volatile long long &var, long long value) {
26-
return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
29+
template <typename T>
30+
static inline void store(volatile T& var, T value) {
31+
return __atomic_store_n(&var, value, __ATOMIC_RELAXED);
32+
}
33+
34+
35+
// Atomic load-acquire/release-store
36+
template <typename T>
37+
static inline T loadAcquire(volatile T& var) {
38+
return __atomic_load_n(&var, __ATOMIC_ACQUIRE);
2739
}
2840

29-
static inline void storeRelease(volatile size_t &var, size_t value) {
41+
template <typename T>
42+
static inline void storeRelease(volatile T& var, T value) {
3043
return __atomic_store_n(&var, value, __ATOMIC_RELEASE);
3144
}
3245

ddprof-lib/src/main/cpp/callTraceHashTable.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ u64 CallTraceHashTable::put(int num_frames, ASGCT_CallFrame *frames,
301301

302302
if (++step >= capacity) {
303303
// Very unlikely case of a table overflow
304-
atomicInc(_overflow);
304+
atomicIncRelaxed(_overflow);
305305
return OVERFLOW_TRACE_ID;
306306
}
307307
// Improved version of linear probing
@@ -359,7 +359,7 @@ void CallTraceHashTable::collectAndCopySelective(std::unordered_set<CallTrace *>
359359
traces.insert(&_overflow_trace);
360360
if (trace_ids_to_preserve.find(OVERFLOW_TRACE_ID) != trace_ids_to_preserve.end()) {
361361
// Copy overflow trace to target - it's a static trace so just increment overflow counter
362-
atomicInc(target->_overflow);
362+
atomicIncRelaxed(target->_overflow);
363363
}
364364
}
365365
}

ddprof-lib/src/main/cpp/callTraceHashTable.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class CallTraceHashTable {
5252

5353
LinearAllocator _allocator;
5454
LongHashTable *_current_table;
55-
u64 _overflow;
55+
volatile u64 _overflow;
5656

5757
u64 calcHash(int num_frames, ASGCT_CallFrame *frames, bool truncated);
5858
CallTrace *storeCallTrace(int num_frames, ASGCT_CallFrame *frames,

ddprof-lib/src/main/cpp/counters.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,9 @@ class Counters {
124124
static void increment(CounterId counter, long long delta = 1,
125125
int offset = 0) {
126126
#ifdef COUNTERS
127-
atomicInc(Counters::instance()
128-
._counters[address(static_cast<int>(counter) + offset)],
129-
delta);
127+
atomicIncRelaxed(Counters::instance()
128+
._counters[address(static_cast<int>(counter) + offset)],
129+
delta);
130130
#endif // COUNTERS
131131
}
132132

ddprof-lib/src/main/cpp/livenessTracker.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ constexpr int LivenessTracker::MAX_TRACKING_TABLE_SIZE;
2626
constexpr int LivenessTracker::MIN_SAMPLING_INTERVAL;
2727

2828
void LivenessTracker::cleanup_table(bool forced) {
29-
u64 current = loadAcquire(_last_gc_epoch);
30-
u64 target_gc_epoch = loadAcquire(_gc_epoch);
29+
u64 current = load(_last_gc_epoch);
30+
u64 target_gc_epoch = load(_gc_epoch);
3131

3232
if ((target_gc_epoch == _last_gc_epoch ||
33-
!__sync_bool_compare_and_swap(&_last_gc_epoch, current,
34-
target_gc_epoch)) &&
33+
!__atomic_compare_exchange_n(&_last_gc_epoch, &current,
34+
target_gc_epoch, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) &&
3535
!forced) {
3636
// if the last processed GC epoch hasn't changed, or if we failed to update
3737
// it, there's nothing to do
@@ -383,10 +383,10 @@ void LivenessTracker::onGC() {
383383
}
384384

385385
// just increment the epoch
386-
atomicInc(_gc_epoch, 1);
386+
atomicIncRelaxed(_gc_epoch,u64(1));
387387

388388
if (!ddprof::HeapUsage::isLastGCUsageSupported()) {
389-
storeRelease(_used_after_last_gc, ddprof::HeapUsage::get(false)._used);
389+
store(_used_after_last_gc, ddprof::HeapUsage::get(false)._used);
390390
}
391391
}
392392

ddprof-lib/src/main/cpp/profiler.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ int Profiler::getJavaTraceAsync(void *ucontext, ASGCT_CallFrame *frames,
560560
return 0;
561561
}
562562

563-
atomicInc(_failures[-trace.num_frames]);
563+
atomicIncRelaxed(_failures[-trace.num_frames]);
564564
trace.frames->bci = BCI_ERROR;
565565
trace.frames->method_id = (jmethodID)err_string;
566566
return trace.frames - frames + 1;
@@ -608,14 +608,14 @@ void Profiler::fillFrameTypes(ASGCT_CallFrame *frames, int num_frames,
608608
}
609609

610610
u64 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event_type, Event *event, bool deferred) {
611-
atomicInc(_total_samples);
611+
atomicIncRelaxed(_total_samples);
612612

613613
u32 lock_index = getLockIndex(tid);
614614
if (!_locks[lock_index].tryLock() &&
615615
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
616616
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
617617
// Too many concurrent signals already
618-
atomicInc(_failures[-ticks_skipped]);
618+
atomicIncRelaxed(_failures[-ticks_skipped]);
619619

620620
return 0;
621621
}
@@ -655,14 +655,14 @@ u64 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event
655655
}
656656

657657
void Profiler::recordDeferredSample(int tid, u64 call_trace_id, jint event_type, Event *event) {
658-
atomicInc(_total_samples);
658+
atomicIncRelaxed(_total_samples);
659659

660660
u32 lock_index = getLockIndex(tid);
661661
if (!_locks[lock_index].tryLock() &&
662662
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
663663
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
664664
// Too many concurrent signals already
665-
atomicInc(_failures[-ticks_skipped]);
665+
atomicIncRelaxed(_failures[-ticks_skipped]);
666666
return;
667667
}
668668

@@ -673,14 +673,14 @@ void Profiler::recordDeferredSample(int tid, u64 call_trace_id, jint event_type,
673673

674674
void Profiler::recordSample(void *ucontext, u64 counter, int tid,
675675
jint event_type, u64 call_trace_id, Event *event) {
676-
atomicInc(_total_samples);
676+
atomicIncRelaxed(_total_samples);
677677

678678
u32 lock_index = getLockIndex(tid);
679679
if (!_locks[lock_index].tryLock() &&
680680
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
681681
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
682682
// Too many concurrent signals already
683-
atomicInc(_failures[-ticks_skipped]);
683+
atomicIncRelaxed(_failures[-ticks_skipped]);
684684

685685
if (event_type == BCI_CPU && _cpu_engine == &perf_events) {
686686
// Need to reset PerfEvents ring buffer, even though we discard the
@@ -789,7 +789,7 @@ void Profiler::recordQueueTime(int tid, QueueTimeEvent *event) {
789789
void Profiler::recordExternalSample(u64 weight, int tid, int num_frames,
790790
ASGCT_CallFrame *frames, bool truncated,
791791
jint event_type, Event *event) {
792-
atomicInc(_total_samples);
792+
atomicIncRelaxed(_total_samples);
793793

794794
u64 call_trace_id =
795795
_call_trace_storage.put(num_frames, frames, truncated, weight);
@@ -799,7 +799,7 @@ void Profiler::recordExternalSample(u64 weight, int tid, int num_frames,
799799
!_locks[lock_index = (lock_index + 1) % CONCURRENCY_LEVEL].tryLock() &&
800800
!_locks[lock_index = (lock_index + 2) % CONCURRENCY_LEVEL].tryLock()) {
801801
// Too many concurrent signals already
802-
atomicInc(_failures[-ticks_skipped]);
802+
atomicIncRelaxed(_failures[-ticks_skipped]);
803803
return;
804804
}
805805

ddprof-lib/src/main/cpp/profiler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class Profiler {
9696
WaitableMutex _timer_lock;
9797
void *_timer_id;
9898

99-
u64 _total_samples;
99+
volatile u64 _total_samples;
100100
u64 _failures[ASGCT_FAILURE_TYPES];
101101

102102
SpinLock _class_map_lock;

0 commit comments

Comments
 (0)