Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
8e2bf94
Implement custom exception approach for errors from C code
bouwkast Oct 17, 2025
27067c0
Update to support constant_exception_message and ProfilingError
bouwkast Nov 7, 2025
3d908d7
Implement two-tier exception strategy for profiler errors
bouwkast Nov 7, 2025
b9bb905
Fix compilation errors by adding missing ruby_helpers.h includes
bouwkast Nov 7, 2025
032adc0
Fix header ordering in private_vm_api_access.c
bouwkast Nov 7, 2025
3f01db9
Fix: Declare exception classes locally in private_vm_api_access.c
bouwkast Nov 7, 2025
abe3f0d
Update RBS signature for renamed method constant_exception_message
bouwkast Nov 7, 2025
f04f01d
Fix: Move error method before private keyword
bouwkast Nov 7, 2025
9e8e039
Fix serialize! to raise ProfilingInternalError
bouwkast Nov 7, 2025
c31665f
Add compile-time safe macros for profiler exception raising
bouwkast Nov 14, 2025
6a292a4
Merge branch 'master' into steven/error-logs-remediation-custom-profi…
marcotc Nov 17, 2025
b5480a1
Working
marcotc Nov 17, 2025
807af7b
First try
marcotc Nov 18, 2025
d740503
Second
marcotc Nov 18, 2025
f14ee3a
wip
marcotc Nov 19, 2025
f4234c6
wip
marcotc Nov 19, 2025
b99a2cc
wip
marcotc Nov 19, 2025
c83b723
wip
marcotc Nov 19, 2025
90327d1
wip
marcotc Nov 20, 2025
d7f0dc3
wip
marcotc Nov 20, 2025
c191889
wip
marcotc Nov 20, 2025
639ee6e
wip
marcotc Nov 20, 2025
ef6b441
wip
marcotc Nov 20, 2025
5e6fc62
wip
marcotc Nov 21, 2025
19db45e
wip
marcotc Nov 21, 2025
962845d
wip
marcotc Nov 21, 2025
6c77ebc
Revert whitespace-only changes (blank lines)
marcotc Nov 21, 2025
b7edd96
Revert line wrapping and whitespace changes in libdatadog_api files
marcotc Nov 21, 2025
1192a01
Merge branch 'master' into marcotc/error-logs-remediation-custom-prof…
marcotc Nov 24, 2025
c1cb858
Rename prefix
marcotc Nov 24, 2025
2b260e6
Change arg type
marcotc Nov 24, 2025
dd009d1
Raise argument error exception
marcotc Nov 25, 2025
745a67f
Move init
marcotc Nov 25, 2025
fe72ab0
Add fmt string
marcotc Nov 25, 2025
4a2e3be
Add telemetry str
marcotc Nov 25, 2025
5432e65
Add errno telemetry
marcotc Nov 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
#include "clock_id.h"
#include "helpers.h"
#include "private_vm_api_access.h"
#include "ruby_helpers.h"
#include "time_helpers.h"

// Validate that our home-cooked pthread_id_for() matches pthread_self() for the current thread
void self_test_clock_id(void) {
rb_nativethread_id_t expected_pthread_id = pthread_self();
rb_nativethread_id_t actual_pthread_id = pthread_id_for(rb_thread_current());

if (expected_pthread_id != actual_pthread_id) rb_raise(rb_eRuntimeError, "pthread_id_for() self-test failed");
if (expected_pthread_id != actual_pthread_id) raise_error(eNativeRuntimeError, "pthread_id_for() self-test failed");
}

// Safety: This function is assumed never to raise exceptions by callers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) {
after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID ||
after_gvl_running_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID
) {
rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
raise_error(eNativeRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)");
}
#else
gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround();
Expand Down Expand Up @@ -472,10 +472,7 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) {
cpu_and_wall_time_worker_state *old_state = active_sampler_instance_state;
if (old_state != NULL) {
if (is_thread_alive(old_state->owner_thread)) {
rb_raise(
rb_eRuntimeError,
"Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread"
);
raise_error(eNativeRuntimeError, "Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread");
} else {
// The previously active thread seems to have died without cleaning up after itself.
// In this case, we can still go ahead and start the profiler BUT we make sure to disable any existing tracepoint
Expand Down Expand Up @@ -821,7 +818,7 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) {
NULL
);
#else
rb_raise(rb_eArgError, "GVL profiling is not supported in this Ruby version");
raise_error(eNativeArgumentError, "GVL profiling is not supported in this Ruby version");
#endif
}

Expand Down Expand Up @@ -1093,7 +1090,7 @@ static void reset_stats_not_thread_safe(cpu_and_wall_time_worker_state *state) {
static void sleep_for(uint64_t time_ns) {
// As a simplification, we currently only support setting .tv_nsec
if (time_ns >= SECONDS_AS_NS(1)) {
grab_gvl_and_raise(rb_eArgError, "sleep_for can only sleep for less than 1 second, time_ns: %"PRIu64, time_ns);
grab_gvl_and_raise(eNativeArgumentError, "sleep_for can only sleep for less than 1 second, time_ns: %"PRIu64, time_ns);
}

struct timespec time_to_sleep = {.tv_nsec = time_ns};
Expand Down Expand Up @@ -1284,7 +1281,7 @@ static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) {

static void delayed_error(cpu_and_wall_time_worker_state *state, const char *error) {
// If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop.
stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error));
stop_state(state, rb_exc_new_cstr(eNativeRuntimeError, error));
}

static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void discrete_dynamic_sampler_reset(discrete_dynamic_sampler *sampler, long now_

void discrete_dynamic_sampler_set_overhead_target_percentage(discrete_dynamic_sampler *sampler, double target_overhead, long now_ns) {
if (target_overhead <= 0 || target_overhead > 100) {
rb_raise(rb_eArgError, "Target overhead must be a double between ]0,100] was %f", target_overhead);
raise_error(eNativeArgumentError, "Target overhead must be a double between ]0,100] was %f", target_overhead);
}
sampler->target_overhead = target_overhead;
return discrete_dynamic_sampler_reset(sampler, now_ns);
Expand Down Expand Up @@ -369,7 +369,7 @@ static VALUE _native_new(VALUE klass) {

long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE);
if (now_ns == 0) {
rb_raise(rb_eRuntimeError, "failed to get clock time");
raise_error(eNativeRuntimeError, "failed to get clock time");
}
discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <datadog/profiling.h>

#include "collectors_gc_profiling_helper.h"
#include "ruby_helpers.h"

// This helper is used by the Datadog::Profiling::Collectors::ThreadContext to profile garbage collection.
// It's tested through that class' interfaces.
Expand Down Expand Up @@ -71,7 +72,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) {
1; // gc type

if (max_label_count > labels_length) {
rb_raise(rb_eArgError, "BUG: gc_profiling_set_metadata invalid labels_length (%d) < max_label_count (%d)", labels_length, max_label_count);
raise_error(eNativeArgumentError, "BUG: gc_profiling_set_metadata invalid labels_length (%d) < max_label_count (%d)", labels_length, max_label_count);
}

uint8_t label_pos = 0;
Expand Down Expand Up @@ -119,7 +120,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) {
};

if (label_pos > max_label_count) {
rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
raise_error(eNativeRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
}

return label_pos;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ static void *run_idle_sampling_loop(void *state_ptr) {
// Process pending action
if (next_action == ACTION_RUN) {
if (run_action_function == NULL) {
grab_gvl_and_raise(rb_eRuntimeError, "Unexpected NULL run_action_function in run_idle_sampling_loop");
grab_gvl_and_raise(eNativeRuntimeError, "Unexpected NULL run_action_function in run_idle_sampling_loop");
}

run_action_function();
Expand Down Expand Up @@ -206,7 +206,7 @@ static VALUE _native_stop(DDTRACE_UNUSED VALUE self, VALUE self_instance) {
void idle_sampling_helper_request_action(VALUE self_instance, void (*run_action_function)(void)) {
idle_sampling_loop_state *state;
if (!rb_typeddata_is_kind_of(self_instance, &idle_sampling_helper_typed_data)) {
grab_gvl_and_raise(rb_eTypeError, "Wrong argument for idle_sampling_helper_request_action");
grab_gvl_and_raise(eNativeTypeError, "Wrong argument for idle_sampling_helper_request_action");
}
// This should never fail the the above check passes
TypedData_Get_Struct(self_instance, idle_sampling_loop_state, &idle_sampling_helper_typed_data, state);
Expand Down
11 changes: 6 additions & 5 deletions ext/datadog_profiling_native_extension/collectors_stack.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "datadog_ruby_common.h"
#include "private_vm_api_access.h"
#include "ruby_helpers.h"
#include "stack_recorder.h"
#include "collectors_stack.h"

Expand Down Expand Up @@ -284,11 +285,11 @@ void sample_thread(
// here, but >= 0 makes this easier to understand/debug.
bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns >= 0;

if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label");
if (cpu_or_wall_sample && state_label == NULL) raise_error(eNativeRuntimeError, "BUG: Unexpected missing state_label");

if (has_cpu_time) {
state_label->str = DDOG_CHARSLICE_C("had cpu");
if (labels.is_gvl_waiting_state) rb_raise(rb_eRuntimeError, "BUG: Unexpected combination of cpu-time with is_gvl_waiting");
if (labels.is_gvl_waiting_state) raise_error(eNativeRuntimeError, "BUG: Unexpected combination of cpu-time with is_gvl_waiting");
}

int top_of_stack_position = captured_frames - 1;
Expand Down Expand Up @@ -600,8 +601,8 @@ bool prepare_sample_thread(VALUE thread, sampling_buffer *buffer) {
}

uint16_t sampling_buffer_check_max_frames(int max_frames) {
if (max_frames < 5) rb_raise(rb_eArgError, "Invalid max_frames: value must be >= 5");
if (max_frames > MAX_FRAMES_LIMIT) rb_raise(rb_eArgError, "Invalid max_frames: value must be <= " MAX_FRAMES_LIMIT_AS_STRING);
if (max_frames < 5) raise_error(eNativeArgumentError, "Invalid max_frames: value must be >= 5");
if (max_frames > MAX_FRAMES_LIMIT) raise_error(eNativeArgumentError, "Invalid max_frames: value must be <= " MAX_FRAMES_LIMIT_AS_STRING);
return max_frames;
}

Expand All @@ -618,7 +619,7 @@ void sampling_buffer_initialize(sampling_buffer *buffer, uint16_t max_frames, dd

void sampling_buffer_free(sampling_buffer *buffer) {
if (buffer->max_frames == 0 || buffer->locations == NULL || buffer->stack_buffer == NULL) {
rb_raise(rb_eArgError, "sampling_buffer_free called with invalid buffer");
raise_error(eNativeArgumentError, "sampling_buffer_free called with invalid buffer");
}

ruby_xfree(buffer->stack_buffer);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "helpers.h"
#include "libdatadog_helpers.h"
#include "private_vm_api_access.h"
#include "ruby_helpers.h"
#include "stack_recorder.h"
#include "time_helpers.h"
#include "unsafe_api_calls_check.h"
Expand Down Expand Up @@ -518,7 +519,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
} else if (otel_context_enabled == ID2SYM(rb_intern("both"))) {
state->otel_context_enabled = OTEL_CONTEXT_ENABLED_BOTH;
} else {
rb_raise(rb_eArgError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
raise_error(eNativeArgumentError, "Unexpected value for otel_context_enabled: %+" PRIsVALUE, otel_context_enabled);
}

global_waiting_for_gvl_threshold_ns = NUM2UINT(waiting_for_gvl_threshold_ns);
Expand All @@ -539,7 +540,7 @@ static VALUE _native_initialize(int argc, VALUE *argv, DDTRACE_UNUSED VALUE _sel
static VALUE _native_sample(DDTRACE_UNUSED VALUE _self, VALUE collector_instance, VALUE profiler_overhead_stack_thread, VALUE allow_exception) {
ENFORCE_BOOLEAN(allow_exception);

if (!is_thread_alive(profiler_overhead_stack_thread)) rb_raise(rb_eArgError, "Unexpected: profiler_overhead_stack_thread is not alive");
if (!is_thread_alive(profiler_overhead_stack_thread)) raise_error(eNativeArgumentError, "Unexpected: profiler_overhead_stack_thread is not alive");

if (allow_exception == Qfalse) debug_enter_unsafe_context();

Expand Down Expand Up @@ -831,7 +832,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) {
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);

if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) {
rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
raise_error(eNativeRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available");
}

int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata
Expand Down Expand Up @@ -998,7 +999,7 @@ static void trigger_sample_for_thread(
// @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone
// changes the code erroneously and remove it entirely?
if (label_pos > max_label_count) {
rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
raise_error(eNativeRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count);
}

ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos};
Expand Down Expand Up @@ -1295,7 +1296,7 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns,
elapsed_time_ns = 0;
} else {
// We don't expect non-wall time to go backwards, so let's flag this as a bug
rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples");
raise_error(eNativeRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples");
}
}

Expand Down Expand Up @@ -1961,7 +1962,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
thread_context_collector_state *state;
TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);

if (!state->timeline_enabled) rb_raise(rb_eRuntimeError, "GVL profiling requires timeline to be enabled");
if (!state->timeline_enabled) raise_error(eNativeRuntimeError, "GVL profiling requires timeline to be enabled");

intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread);

Expand Down Expand Up @@ -2154,7 +2155,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) {
TypedData_Get_Struct(collector_instance, thread_context_collector_state, &thread_context_collector_typed_data, state);

per_thread_context *thread_context = get_context_for(thread, state);
if (thread_context == NULL) rb_raise(rb_eArgError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists");
if (thread_context == NULL) raise_error(eNativeArgumentError, "Unexpected: This method cannot be used unless the per-thread context for the thread already exists");

thread_context->cpu_time_at_previous_sample_ns += NUM2LONG(delta_ns);

Expand Down
3 changes: 2 additions & 1 deletion ext/datadog_profiling_native_extension/encoded_profile.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "encoded_profile.h"
#include "datadog_ruby_common.h"
#include "libdatadog_helpers.h"
#include "ruby_helpers.h"

// This class exists to wrap a ddog_prof_EncodedProfile into a Ruby object
// This file implements the native bits of the Datadog::Profiling::EncodedProfile class
Expand Down Expand Up @@ -41,7 +42,7 @@ VALUE from_ddog_prof_EncodedProfile(ddog_prof_EncodedProfile profile) {
static ddog_ByteSlice get_bytes(ddog_prof_EncodedProfile *state) {
ddog_prof_Result_ByteSlice raw_bytes = ddog_prof_EncodedProfile_bytes(state);
if (raw_bytes.tag == DDOG_PROF_RESULT_BYTE_SLICE_ERR_BYTE_SLICE) {
rb_raise(rb_eRuntimeError, "Failed to get bytes from profile: %"PRIsVALUE, get_error_details_and_drop(&raw_bytes.err));
raise_error(eNativeRuntimeError, "Failed to get bytes from profile: %"PRIsVALUE, get_error_details_and_drop(&raw_bytes.err));
}
return raw_bytes.ok;
}
Expand Down
Loading
Loading