Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tracer/build/_build/Build.Shared.Steps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ partial class Build
{
DeleteDirectory(NativeLoaderProject.Directory / "bin");

var finalArchs = FastDevLoop ? "arm64" : string.Join(';', OsxArchs);
var finalArchs = string.Join(';', OsxArchs);
var buildDirectory = NativeBuildDirectory + "_" + finalArchs.Replace(';', '_');
EnsureExistingDirectory(buildDirectory);

Expand Down
2 changes: 1 addition & 1 deletion tracer/build/_build/Build.Steps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ bool RequiresThoroughTesting()
{
DeleteDirectory(NativeTracerProject.Directory / "build");

var finalArchs = FastDevLoop ? "arm64" : string.Join(';', OsxArchs);
var finalArchs = string.Join(';', OsxArchs);
var buildDirectory = NativeBuildDirectory + "_" + finalArchs.Replace(';', '_');
EnsureExistingDirectory(buildDirectory);

Expand Down
5 changes: 5 additions & 0 deletions tracer/build/_build/Build.Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,11 @@ Target RegenerateSolutions
.Description("Regenerates the 'build' solutions based on the 'master' solution")
.Executes(() =>
{
if (FastDevLoop)
{
return;
}

// Create a copy of the "full solution"
var sln = ProjectModelTasks.CreateSolution(
fileName: RootDirectory / "Datadog.Trace.Samples.g.sln",
Expand Down
5 changes: 5 additions & 0 deletions tracer/build/_build/Build.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ public Build()
.Description("Cleans all build output")
.Executes(() =>
{
if (FastDevLoop)
{
return;
}

if (IsWin)
{
// These are created as part of the CreatePlatformlessSymlinks target and cause havok
Expand Down
5 changes: 5 additions & 0 deletions tracer/src/Datadog.Trace.Tools.Runner/CiUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ async Task UploadRepositoryChangesAsync()
profilerEnvironmentVariables[Configuration.ConfigurationKeys.Debugger.ExceptionReplayEnabled] = "1";
profilerEnvironmentVariables[Configuration.ConfigurationKeys.Debugger.RateLimitSeconds] = "0";
profilerEnvironmentVariables[Configuration.ConfigurationKeys.Debugger.UploadFlushInterval] = "1000";

if (agentless)
{
profilerEnvironmentVariables[Configuration.ConfigurationKeys.Debugger.ExceptionReplayAgentlessEnabled] = "1";
}
}

// Let's set the code coverage datacollector if the code coverage is enabled
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,18 @@ internal static class Debugger
/// </summary>
/// <seealso cref="DebuggerSettings.CodeOriginMaxUserFrames"/>
public const string CodeOriginMaxUserFrames = "DD_CODE_ORIGIN_FOR_SPANS_MAX_USER_FRAMES";

/// <summary>
/// Configuration key for enabling or disabling agentless Exception Replay uploads.
/// Default value is false.
/// </summary>
public const string ExceptionReplayAgentlessEnabled = "DD_EXCEPTION_REPLAY_AGENTLESS_ENABLED";

/// <summary>
/// Configuration key for overriding the agentless Exception Replay intake URL.
/// Default value is derived from DD_SITE (https://debugger-intake.&lt;site&gt;/api/v2/debugger).
/// </summary>
public const string ExceptionReplayAgentlessUrl = "DD_EXCEPTION_REPLAY_AGENTLESS_URL";
}
}
}
6 changes: 6 additions & 0 deletions tracer/src/Datadog.Trace/Debugger/DebuggerManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ private void InitializeSymbolUploaderIfNeeded(TracerSettings tracerSettings, Deb
return;
}

if (ExceptionReplaySettings.AgentlessEnabled)
{
Log.Information("Exception Replay agentless mode enabled; skipping symbol uploader initialization because it requires the Datadog Agent and Remote Configuration.");
return;
}

if (Interlocked.CompareExchange(ref _symDbInitialized, 1, 0) != 0)
{
// Once created, the symbol uploader persists even if DI is later disabled
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@

using System;
using System.Threading.Tasks;
using Datadog.Trace.Agent;
using Datadog.Trace.Debugger.ExceptionAutoInstrumentation.ThirdParty;
using Datadog.Trace.Debugger.Helpers;
using Datadog.Trace.Debugger.Sink;
using Datadog.Trace.Debugger.Snapshots;
using Datadog.Trace.Debugger.Upload;
using Datadog.Trace.HttpOverStreams;
using Datadog.Trace.Logging;

namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
Expand Down Expand Up @@ -65,25 +63,38 @@ private void InitSnapshotsSink()
// Set up the snapshots sink.
var snapshotSlicer = SnapshotSlicer.Create(debuggerSettings);
_snapshotSink = SnapshotSink.Create(debuggerSettings, snapshotSlicer);
// TODO: respond to changes in exporter settings
var apiFactory = AgentTransportStrategy.Get(
tracer.Settings.Manager.InitialExporterSettings,
productName: "debugger",
tcpTimeout: TimeSpan.FromSeconds(15),
AgentHttpHeaderNames.MinimalHeaders,
() => new MinimalAgentHeaderHelper(),
uri => uri);
var discoveryService = tracer.TracerManager.DiscoveryService;
var gitMetadataTagsProvider = tracer.TracerManager.GitMetadataTagsProvider;
ExceptionReplayTransportInfo transportInfo;

var snapshotUploadApi = DebuggerUploadApiFactory.CreateSnapshotUploadApi(apiFactory, discoveryService, gitMetadataTagsProvider);
try
{
transportInfo = ExceptionReplayTransportFactory.Create(tracer.Settings, Settings, discoveryService);
}
catch (InvalidOperationException ex)
{
Log.Error(ex, "Exception Replay transport could not be initialized in agentless mode. Disabling Exception Replay.");
_isDisabled = true;
return;
}

var snapshotUploadApi = DebuggerUploadApiFactory.CreateSnapshotUploadApi(
transportInfo.ApiRequestFactory,
transportInfo.DiscoveryService,
gitMetadataTagsProvider,
transportInfo.StaticEndpoint);
var snapshotBatchUploader = BatchUploader.Create(snapshotUploadApi);

_uploader = SnapshotUploader.Create(
snapshotSink: _snapshotSink,
snapshotBatchUploader: snapshotBatchUploader,
debuggerSettings);

if (transportInfo.IsAgentless)
{
Log.Information("Exception Replay agentless uploads enabled. Symbol uploads remain unavailable without the Datadog Agent.");
}

_ = Task.Run(() => _uploader.StartFlushingAsync())
.ContinueWith(
t =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,23 @@

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using Datadog.Trace.Debugger.Expressions;
using Datadog.Trace.Debugger.Helpers;
using Datadog.Trace.Debugger.PInvoke;
using Datadog.Trace.Debugger.RateLimiting;
using Datadog.Trace.Debugger.Sink.Models;
using Datadog.Trace.Vendors.Serilog;
using Datadog.Trace.Logging;
using Datadog.Trace.Vendors.Serilog.Events;

#nullable enable
namespace Datadog.Trace.Debugger.ExceptionAutoInstrumentation
{
internal class ExceptionReplayProbe
{
private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor<ExceptionReplayProbe>();
private readonly int _hashCode;
private readonly object _locker = new();
private readonly List<ExceptionCase> _exceptionCases = new();
Expand Down Expand Up @@ -107,6 +111,8 @@ private void ProcessCase(ExceptionCase @case)

internal void AddExceptionCase(ExceptionCase @case, bool isPartOfCase)
{
var shouldRefreshAfterLock = false;

lock (_locker)
{
if (isPartOfCase && ShouldInstrument())
Expand Down Expand Up @@ -134,6 +140,13 @@ internal void AddExceptionCase(ExceptionCase @case, bool isPartOfCase)

_exceptionCases.Add(@case);
ProcessCase(@case);

shouldRefreshAfterLock = @case.Probes?.Length == 1;
}

if (shouldRefreshAfterLock)
{
TryRefreshSingleFrameProbeStatus();
}
}

Expand Down Expand Up @@ -166,5 +179,85 @@ public override int GetHashCode()
{
return _hashCode;
}

/// <summary>
/// If an exception case only contains a single customer frame, we never build parent/child call-path hashes,
/// meaning the ordinary probe-status polling code in <see cref="ExceptionProbeProcessor"/> never executes.
/// For CI Visibility (and other single-frame scenarios) this left probes permanently stuck in the default
/// <see cref="Status.RECEIVED"/> state, so snapshots were never captured. To avoid changing the behaviour
/// for multi-frame cases, we perform a one-off eager poll right after the probe is attached. The poll is
/// executed outside the probe lock because we may wait up to a few seconds while the CLR completes ReJIT and
/// we do not want to block unrelated instrumentation updates.
/// </summary>
private void TryRefreshSingleFrameProbeStatus()
{
// Only apply this patch to the test optimization product
if (!Datadog.Trace.Ci.TestOptimization.Instance.IsRunning)
{
return;
}

if (string.IsNullOrEmpty(ProbeId))
{
return;
}

try
{
// In practice the native tracer reports INSTALLED for ~500 ms after we request ReJIT, but CI Visibility
// tests regularly need a little longer (module load + async offloader). We therefore try a handful of
// times with a generous delay so we can observe the final INSTRUMENTED status without changing the
// behaviour for other scenarios.
const int maxAttempts = 20;
var stopwatch = Stopwatch.StartNew();

for (var attempt = 0; attempt < maxAttempts; attempt++)
{
var statuses = DebuggerNativeMethods.GetProbesStatuses(new[] { ProbeId });
if (statuses.Length == 0)
{
return;
}

var previous = ProbeStatus;
ProbeStatus = statuses[0].Status;
ErrorMessage = statuses[0].ErrorMessage;

if (Log.IsEnabled(LogEventLevel.Debug))
{
var message = $"Eager status refresh for single-frame probe {ProbeId}. Previous={previous}, Current={ProbeStatus}, Attempt={attempt + 1}, ElapsedMs={stopwatch.ElapsedMilliseconds}";
Log.Debug("{Message}", message);
}

if (ProbeStatus == Status.INSTRUMENTED)
{
break;
}

if (ProbeStatus == Status.ERROR || ProbeStatus == Status.BLOCKED)
{
break;
}

if (attempt < maxAttempts - 1)
{
Thread.Sleep(attempt == 0 ? 1_500 : 250);
}
}

if (ProbeStatus != Status.INSTRUMENTED)
{
Log.Warning(
"Single-frame probe {ProbeId} never reported INSTRUMENTED during eager refresh. FinalStatus={Status}, TotalWaitMs={ElapsedMs}",
ProbeId,
ProbeStatus,
stopwatch.ElapsedMilliseconds);
}
}
catch (Exception ex)
{
Log.Warning(ex, "Failed to eagerly refresh probe status for {ProbeId}", ProbeId);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ internal class ExceptionReplaySettings
public const int DefaultMaxFramesToCapture = 4;
public const int DefaultRateLimitSeconds = 60 * 60; // 1 hour
public const int DefaultMaxExceptionAnalysisLimit = 100;
private const string DefaultSite = "datadoghq.com";

public ExceptionReplaySettings(IConfigurationSource? source, IConfigurationTelemetry telemetry)
{
Expand Down Expand Up @@ -47,6 +48,11 @@ public ExceptionReplaySettings(IConfigurationSource? source, IConfigurationTelem
.WithKeys(ConfigurationKeys.Debugger.MaxExceptionAnalysisLimit)
.AsInt32(DefaultMaxExceptionAnalysisLimit, x => x > 0)
.Value;

AgentlessEnabled = config.WithKeys(ConfigurationKeys.Debugger.ExceptionReplayAgentlessEnabled).AsBool(false);
AgentlessUrlOverride = config.WithKeys(ConfigurationKeys.Debugger.ExceptionReplayAgentlessUrl).AsString();
AgentlessApiKey = config.WithKeys(ConfigurationKeys.ApiKey).AsRedactedString();
AgentlessSite = config.WithKeys(ConfigurationKeys.Site).AsString(DefaultSite, site => !string.IsNullOrEmpty(site)) ?? DefaultSite;
}

public bool Enabled { get; }
Expand All @@ -61,6 +67,14 @@ public ExceptionReplaySettings(IConfigurationSource? source, IConfigurationTelem

public int MaxExceptionAnalysisLimit { get; }

public bool AgentlessEnabled { get; }

public string? AgentlessUrlOverride { get; }

public string? AgentlessApiKey { get; }

public string AgentlessSite { get; }

public static ExceptionReplaySettings FromSource(IConfigurationSource source, IConfigurationTelemetry telemetry)
{
return new ExceptionReplaySettings(source, telemetry);
Expand Down
Loading
Loading