99module Datadog
1010 module Core
1111 module Telemetry
12- # Accumulates events and sends them to the API at a regular interval, including heartbeat event.
12+ # Accumulates events and sends them to the API at a regular interval,
13+ # including heartbeat event.
14+ #
15+ # @api private
1316 class Worker
1417 include Core ::Workers ::Queue
1518 include Core ::Workers ::Polling
@@ -40,7 +43,7 @@ def initialize(
4043 self . enabled = enabled
4144 # Workers::IntervalLoop settings
4245 self . loop_base_interval = metrics_aggregation_interval_seconds
43- self . fork_policy = Core ::Workers ::Async ::Thread ::FORK_POLICY_STOP
46+ self . fork_policy = Core ::Workers ::Async ::Thread ::FORK_POLICY_RESTART
4447
4548 @shutdown_timeout = shutdown_timeout
4649 @buffer_size = buffer_size
@@ -53,12 +56,13 @@ def initialize(
5356 attr_reader :logger
5457 attr_reader :initial_event_once
5558 attr_reader :initial_event
59+ attr_reader :emitter
5660
5761 # Returns true if worker thread is successfully started,
5862 # false if worker thread was not started but telemetry is enabled,
5963 # nil if telemetry is disabled.
6064 def start ( initial_event )
61- return if ! enabled? || forked ?
65+ return unless enabled?
6266
6367 @initial_event = initial_event
6468
@@ -79,7 +83,21 @@ def stop(force_stop = false, timeout = @shutdown_timeout)
7983 # for not enqueueing event (presently) is that telemetry is disabled
8084 # altogether, and in this case other methods return nil.
8185 def enqueue ( event )
82- return if !enabled? || forked?
86+ return unless enabled?
87+
88+ # Start the worker if needed, including in forked children.
89+ # Needs to be done before pushing to buffer since perform
90+ # may invoke after_fork handler which resets the buffer.
91+ #
92+ # Telemetry is special in that it permits events to be submitted
93+ # to the worker with the worker not running, and the worker is
94+ # explicitly started later (to maintain proper initialization order).
95+ # Thus here we can't just call perform unconditionally and must
96+ # check if the worker is supposed to be running, and only call
97+ # perform in that case.
98+ if worker && !worker . alive?
99+ perform
100+ end
83101
84102 buffer . push ( event )
85103 true
@@ -133,7 +151,7 @@ def flush
133151 private
134152
135153 def perform ( *events )
136- return if ! enabled? || forked ?
154+ return unless enabled?
137155
138156 if need_initial_event?
139157 started!
@@ -189,7 +207,9 @@ def started!
189207 # dependencies and send the new ones.
190208 # System tests demand only one instance of this event per
191209 # dependency.
192- send_event ( Event ::AppDependenciesLoaded . new ) if @dependency_collection && initial_event . class . eql? ( Telemetry ::Event ::AppStarted ) # standard:disable Style/ClassEqualityComparison:
210+ if @dependency_collection && initial_event . app_started?
211+ send_event ( Event ::AppDependenciesLoaded . new )
212+ end
193213
194214 true
195215 else
@@ -240,6 +260,28 @@ def disable_on_not_found!(response)
240260 disable!
241261 end
242262
263+ # Stop the worker after fork without sending closing event.
264+ # The closing event will be (or should be) sent by the worker
265+ # in the parent process.
266+ # Also, discard any accumulated events since they will be sent by
267+ # the parent.
268+ def after_fork
269+ # If telemetry is disabled, we still reset the state to avoid
270+ # having wrong state. It is possible that in the future telemetry
271+ # will be re-enabled after errors.
272+ buffer . clear
273+ initial_event_once . reset
274+ # In the child process, we get a new runtime_id.
275+ # As such we need to send AppStarted event.
276+ # In the parent process, the event may have been the
277+ # SynthAppClientConfigurationChange instead of AppStarted,
278+ # and in that case we need to convert it to the "regular"
279+ # AppStarted event.
280+ if @initial_event . is_a? ( Event ::SynthAppClientConfigurationChange )
281+ @initial_event . reset! # steep:ignore
282+ end
283+ end
284+
243285 # Deduplicate logs by counting the number of repeated occurrences of the same log
244286 # entry and replacing them with a single entry with the calculated `count` value.
245287 # Non-log events are unchanged.
0 commit comments