|
5 | 5 |
|
6 | 6 | using System; |
7 | 7 | using System.Collections.Generic; |
| 8 | +using System.Diagnostics; |
8 | 9 | using System.Linq; |
9 | 10 | using System.Threading; |
10 | 11 | using Datadog.Trace.Debugger.Expressions; |
11 | 12 | using Datadog.Trace.Debugger.Helpers; |
| 13 | +using Datadog.Trace.Debugger.PInvoke; |
12 | 14 | using Datadog.Trace.Debugger.RateLimiting; |
13 | 15 | using Datadog.Trace.Debugger.Sink.Models; |
14 | | -using Datadog.Trace.Vendors.Serilog; |
| 16 | +using Datadog.Trace.Logging; |
| 17 | +using Datadog.Trace.Vendors.Serilog.Events; |
15 | 18 |
|
16 | 19 | #nullable enable |
17 | 20 | namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation |
18 | 21 | { |
19 | 22 | internal class ExceptionReplayProbe |
20 | 23 | { |
| 24 | + private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor<ExceptionReplayProbe>(); |
21 | 25 | private readonly int _hashCode; |
22 | 26 | private readonly object _locker = new(); |
23 | 27 | private readonly List<ExceptionCase> _exceptionCases = new(); |
@@ -107,6 +111,8 @@ private void ProcessCase(ExceptionCase @case) |
107 | 111 |
|
108 | 112 | internal void AddExceptionCase(ExceptionCase @case, bool isPartOfCase) |
109 | 113 | { |
| 114 | + var shouldRefreshAfterLock = false; |
| 115 | + |
110 | 116 | lock (_locker) |
111 | 117 | { |
112 | 118 | if (isPartOfCase && ShouldInstrument()) |
@@ -134,6 +140,13 @@ internal void AddExceptionCase(ExceptionCase @case, bool isPartOfCase) |
134 | 140 |
|
135 | 141 | _exceptionCases.Add(@case); |
136 | 142 | ProcessCase(@case); |
| 143 | + |
| 144 | + shouldRefreshAfterLock = @case.Probes?.Length == 1; |
| 145 | + } |
| 146 | + |
| 147 | + if (shouldRefreshAfterLock) |
| 148 | + { |
| 149 | + TryRefreshSingleFrameProbeStatus(); |
137 | 150 | } |
138 | 151 | } |
139 | 152 |
|
@@ -166,5 +179,79 @@ public override int GetHashCode() |
166 | 179 | { |
167 | 180 | return _hashCode; |
168 | 181 | } |
| 182 | + |
| 183 | + /// <summary> |
| 184 | + /// If an exception case only contains a single customer frame, we never build parent/child call-path hashes, |
| 185 | + /// meaning the ordinary probe-status polling code in <see cref="ExceptionProbeProcessor"/> never executes. |
| 186 | + /// For CI Visibility (and other single-frame scenarios) this left probes permanently stuck in the default |
| 187 | + /// <see cref="Status.RECEIVED"/> state, so snapshots were never captured. To avoid changing the behaviour |
| 188 | + /// for multi-frame cases, we perform a one-off eager poll right after the probe is attached. The poll is |
| 189 | + /// executed outside the probe lock because we may wait up to a few seconds while the CLR completes ReJIT and |
| 190 | + /// we do not want to block unrelated instrumentation updates. |
| 191 | + /// </summary> |
| 192 | + private void TryRefreshSingleFrameProbeStatus() |
| 193 | + { |
| 194 | + if (string.IsNullOrEmpty(ProbeId)) |
| 195 | + { |
| 196 | + return; |
| 197 | + } |
| 198 | + |
| 199 | + try |
| 200 | + { |
| 201 | + // In practice the native tracer reports INSTALLED for ~500 ms after we request ReJIT, but CI Visibility |
| 202 | + // tests regularly need a little longer (module load + async offloader). We therefore try a handful of |
| 203 | + // times with a generous delay so we can observe the final INSTRUMENTED status without changing the |
| 204 | + // behaviour for other scenarios. |
| 205 | + const int maxAttempts = 20; |
| 206 | + var stopwatch = Stopwatch.StartNew(); |
| 207 | + |
| 208 | + for (var attempt = 0; attempt < maxAttempts; attempt++) |
| 209 | + { |
| 210 | + var statuses = DebuggerNativeMethods.GetProbesStatuses(new[] { ProbeId }); |
| 211 | + if (statuses.Length == 0) |
| 212 | + { |
| 213 | + return; |
| 214 | + } |
| 215 | + |
| 216 | + var previous = ProbeStatus; |
| 217 | + ProbeStatus = statuses[0].Status; |
| 218 | + ErrorMessage = statuses[0].ErrorMessage; |
| 219 | + |
| 220 | + if (Log.IsEnabled(LogEventLevel.Debug)) |
| 221 | + { |
| 222 | + var message = $"Eager status refresh for single-frame probe {ProbeId}. Previous={previous}, Current={ProbeStatus}, Attempt={attempt + 1}, ElapsedMs={stopwatch.ElapsedMilliseconds}"; |
| 223 | + Log.Debug("{Message}", message); |
| 224 | + } |
| 225 | + |
| 226 | + if (ProbeStatus == Status.INSTRUMENTED) |
| 227 | + { |
| 228 | + break; |
| 229 | + } |
| 230 | + |
| 231 | + if (ProbeStatus == Status.ERROR || ProbeStatus == Status.BLOCKED) |
| 232 | + { |
| 233 | + break; |
| 234 | + } |
| 235 | + |
| 236 | + if (attempt < maxAttempts - 1) |
| 237 | + { |
| 238 | + Thread.Sleep(attempt == 0 ? 1_500 : 250); |
| 239 | + } |
| 240 | + } |
| 241 | + |
| 242 | + if (ProbeStatus != Status.INSTRUMENTED) |
| 243 | + { |
| 244 | + Log.Warning( |
| 245 | + "Single-frame probe {ProbeId} never reported INSTRUMENTED during eager refresh. FinalStatus={Status}, TotalWaitMs={ElapsedMs}", |
| 246 | + ProbeId, |
| 247 | + ProbeStatus, |
| 248 | + stopwatch.ElapsedMilliseconds); |
| 249 | + } |
| 250 | + } |
| 251 | + catch (Exception ex) |
| 252 | + { |
| 253 | + Log.Warning(ex, "Failed to eagerly refresh probe status for {ProbeId}", ProbeId); |
| 254 | + } |
| 255 | + } |
169 | 256 | } |
170 | 257 | } |
0 commit comments