Skip to content

Commit 564c828

Browse files
committed
First working ver runtime stacks
1 parent 0049066 commit 564c828

File tree

3 files changed

+162
-43
lines changed

3 files changed

+162
-43
lines changed

ext/libdatadog_api/crashtracker.c

Lines changed: 22 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -175,28 +175,13 @@ static VALUE _native_stop(DDTRACE_UNUSED VALUE _self) {
175175
return Qtrue;
176176
}
177177

178-
// Ruby runtime stack callback implementation
179-
// This function will be called by libdatadog during crash handling
178+
180179
static void ruby_runtime_stack_callback(
181180
void (*emit_frame)(const ddog_crasht_RuntimeStackFrame*),
182181
void (*emit_stacktrace_string)(const char*)
183182
) {
184-
// Only using emit_frame - ignore emit_stacktrace_string parameter
185183
(void)emit_stacktrace_string;
186184

187-
// Try to safely walk Ruby stack using direct VM structure access
188-
// Avoid Ruby API calls that might hang in crash context
189-
190-
// First emit a marker frame to show callback is working
191-
ddog_crasht_RuntimeStackFrame marker_frame = {
192-
.function_name = "ruby_stack_walker_start",
193-
.file_name = "crashtracker.c",
194-
.line_number = 1,
195-
.column_number = 0
196-
};
197-
emit_frame(&marker_frame);
198-
199-
// Try to get current thread and execution context safely
200185
VALUE current_thread = rb_thread_current();
201186
if (current_thread == Qnil) return;
202187

@@ -213,7 +198,6 @@ static void ruby_runtime_stack_callback(
213198
const rb_execution_context_t *ec = th->ec;
214199
if (!ec) return;
215200

216-
// Safety checks
217201
if (th->status == THREAD_KILLED) return;
218202
if (!ec->vm_stack || ec->vm_stack_size == 0) return;
219203

@@ -222,40 +206,47 @@ static void ruby_runtime_stack_callback(
222206

223207
if (!cfp || !end_cfp) return;
224208

225-
// Skip dummy frames
226209
end_cfp = RUBY_VM_NEXT_CONTROL_FRAME(end_cfp);
227210
if (end_cfp <= cfp) return;
228211

229-
// Walk stack frames with extreme caution
212+
const rb_control_frame_t *top_sentinel = RUBY_VM_NEXT_CONTROL_FRAME(cfp);
213+
214+
cfp = RUBY_VM_NEXT_CONTROL_FRAME(end_cfp);
215+
230216
int frame_count = 0;
231-
const int MAX_FRAMES = 10; // Keep very low to minimize crash risk
232217

233-
for (; cfp != RUBY_VM_NEXT_CONTROL_FRAME(end_cfp) && frame_count < MAX_FRAMES;
234-
cfp = RUBY_VM_NEXT_CONTROL_FRAME(cfp)) {
218+
for (; cfp != top_sentinel; cfp = RUBY_VM_NEXT_CONTROL_FRAME(cfp)) {
219+
if (cfp->iseq && !cfp->pc) {
220+
continue;
221+
}
235222

236223
if (VM_FRAME_RUBYFRAME_P(cfp) && cfp->iseq) {
237-
// Instead of calling rb_iseq_* functions, work directly with iseq struct
238224
const rb_iseq_t *iseq = cfp->iseq;
239225

240-
// Basic frame info without risky API calls
241-
char frame_desc[64];
242-
snprintf(frame_desc, sizeof(frame_desc), "ruby_frame_%d", frame_count);
226+
VALUE name = rb_iseq_base_label(iseq);
227+
const char *function_name = (name != Qnil) ? RSTRING_PTR(name) : "<unknown>";
228+
229+
VALUE filename = rb_iseq_path(iseq);
230+
const char *file_name = (filename != Qnil) ? RSTRING_PTR(filename) : "<unknown>";
243231

244-
// Try to get basic line info safely using direct struct access
245232
int line_no = 0;
246233
if (iseq && cfp->pc) {
247-
// Use direct access to iseq body instead of helper functions
248234
if (iseq->body && iseq->body->iseq_encoded && iseq->body->iseq_size > 0) {
249235
ptrdiff_t pc_offset = cfp->pc - iseq->body->iseq_encoded;
250236
if (pc_offset >= 0 && pc_offset < iseq->body->iseq_size) {
251-
line_no = frame_count + 1; // Use frame position as approximation
237+
// Use the Ruby VM line calculation like ddtrace_rb_profile_frames
238+
size_t pos = pc_offset;
239+
if (pos > 0) {
240+
pos--; // Use pos-1 because PC points next instruction
241+
}
242+
line_no = rb_iseq_line_no(iseq, pos);
252243
}
253244
}
254245
}
255246

256247
ddog_crasht_RuntimeStackFrame frame = {
257-
.function_name = frame_desc,
258-
.file_name = "ruby_source.rb",
248+
.function_name = function_name,
249+
.file_name = file_name,
259250
.line_number = line_no,
260251
.column_number = 0
261252
};
@@ -264,21 +255,11 @@ static void ruby_runtime_stack_callback(
264255
frame_count++;
265256
}
266257
}
267-
268-
// Emit end marker
269-
ddog_crasht_RuntimeStackFrame end_frame = {
270-
.function_name = "ruby_stack_walker_end",
271-
.file_name = "crashtracker.c",
272-
.line_number = frame_count,
273-
.column_number = 0
274-
};
275-
emit_frame(&end_frame);
276258
}
277259

278260
static VALUE _native_register_runtime_stack_callback(DDTRACE_UNUSED VALUE _self, VALUE callback_type) {
279261
ENFORCE_TYPE(callback_type, T_SYMBOL);
280262

281-
// Verify we're using the frame type (should always be :frame)
282263
VALUE frame_symbol = ID2SYM(rb_intern("frame"));
283264
if (callback_type != frame_symbol) {
284265
rb_raise(rb_eArgError, "Invalid callback_type. Only :frame is supported");

spec/datadog/core/crashtracking/component_spec.rb

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@
240240
crash_tracker.start
241241
trigger.call
242242
end
243-
puts(crash_report_experimental)
243+
puts(crash_report_experimental.to_json)
244244
puts(log_messages)
245245
expect(stack_trace).to match(array_including(hash_including(function: function)))
246246
expect(stack_trace.size).to be > 10
@@ -348,7 +348,6 @@
348348
end
349349
end
350350

351-
352351
describe '#runtime_callback_registered?' do
353352
it 'returns true when callback is registered' do
354353
crashtracker = build_crashtracker(logger: logger)

test_crashtracker_standalone.rb

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#!/usr/bin/env ruby
2+
3+
require 'json'
4+
require 'webrick'
5+
require 'fiddle'
6+
require 'datadog'
7+
8+
# Simple HTTP server to capture crash reports
9+
def start_crash_server(port)
10+
server = WEBrick::HTTPServer.new(
11+
Port: port,
12+
Logger: WEBrick::Log.new(File.open(File::NULL, "w")),
13+
AccessLog: []
14+
)
15+
16+
crash_report = nil
17+
18+
server.mount_proc '/' do |req, res|
19+
if req.request_method == 'POST'
20+
body = req.body
21+
crash_report = JSON.parse(body) rescue body
22+
puts "=== CRASH REPORT RECEIVED ==="
23+
puts JSON.pretty_generate(crash_report) if crash_report.is_a?(Hash)
24+
puts "=== END CRASH REPORT ==="
25+
end
26+
res.body = '{}'
27+
end
28+
29+
Thread.new { server.start }
30+
[server, proc { crash_report }]
31+
end
32+
33+
# Nested Ruby functions to create a meaningful stack
34+
def level_5
35+
puts "In level_5 - about to crash"
36+
# Trigger segfault
37+
Fiddle.free(42)
38+
end
39+
40+
def level_4
41+
puts "In level_4"
42+
level_5
43+
end
44+
45+
def level_3
46+
puts "In level_3"
47+
level_4
48+
end
49+
50+
def level_2
51+
puts "In level_2"
52+
level_3
53+
end
54+
55+
def level_1
56+
puts "In level_1"
57+
level_2
58+
end
59+
60+
def main_crash_test
61+
puts "Starting crash test with nested functions"
62+
level_1
63+
end
64+
65+
# Main test
66+
puts "Starting standalone crashtracker test..."
67+
68+
# Start server
69+
server, get_crash_report = start_crash_server(9999)
70+
sleep 0.1 # Let server start
71+
72+
puts "Forking process to test crashtracker..."
73+
74+
pid = fork do
75+
begin
76+
puts "Child process started"
77+
78+
# Configure crashtracker
79+
Datadog.configure do |c|
80+
c.agent.host = '127.0.0.1'
81+
c.agent.port = 9999
82+
end
83+
84+
puts "Crashtracker configured, starting crash test..."
85+
86+
# Call our nested function that will crash
87+
main_crash_test
88+
89+
rescue => e
90+
puts "Unexpected error in child: #{e}"
91+
exit 1
92+
end
93+
end
94+
95+
# Wait for child process
96+
Process.wait(pid)
97+
puts "Child process finished with status: #{$?.exitstatus}"
98+
99+
# Give server time to receive the crash report
100+
sleep 2
101+
102+
# Get and save the crash report
103+
crash_report = get_crash_report.call
104+
if crash_report
105+
# Write full crash report to tmp file
106+
timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
107+
crash_file = "/tmp/crashtracker_report_#{timestamp}.json"
108+
File.write(crash_file, JSON.pretty_generate(crash_report))
109+
puts "\n=== CRASH REPORT SAVED ==="
110+
puts "Full crash report written to: #{crash_file}"
111+
112+
puts "\n=== RUNTIME STACK ANALYSIS ==="
113+
if crash_report.is_a?(Hash) && crash_report.dig('payload', 0, 'message')
114+
message = JSON.parse(crash_report.dig('payload', 0, 'message'))
115+
runtime_stack = message['experimental']['runtime_stack']
116+
if runtime_stack
117+
puts "Runtime stack format: #{runtime_stack['format']}"
118+
puts "Number of frames captured: #{runtime_stack['frames']&.length || 0}"
119+
puts "\nStack frames:"
120+
runtime_stack['frames']&.each_with_index do |frame, i|
121+
puts " [#{i}] #{frame['function']} @ #{frame['file']}:#{frame['line']}"
122+
end
123+
124+
runtime_stack_file = "/tmp/runtime_stack_#{timestamp}.json"
125+
File.write(runtime_stack_file, JSON.pretty_generate(runtime_stack))
126+
puts "\nRuntime stack saved to: #{runtime_stack_file}"
127+
else
128+
puts "No runtime_stack found in crash report"
129+
end
130+
else
131+
puts "Could not parse crash report structure"
132+
end
133+
else
134+
puts "No crash report received"
135+
end
136+
137+
# Cleanup
138+
server.shutdown
139+
puts "\nTest complete."

0 commit comments

Comments
 (0)