Skip to content

Commit fba70fd

Browse files
authored
DEBUG-3439 DEBUG-3440 remove prefixes from probe paths when path matching (#4346)
1 parent 595581d commit fba70fd

File tree

6 files changed

+194
-23
lines changed

6 files changed

+194
-23
lines changed

lib/datadog/di/code_tracker.rb

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -140,16 +140,23 @@ def iseqs_for_path_suffix(suffix)
140140
exact = registry[suffix]
141141
return [suffix, exact] if exact
142142

143-
inexact = []
144-
registry.each do |path, iseq|
145-
if Utils.path_matches_suffix?(path, suffix)
146-
inexact << [path, iseq]
143+
suffix = suffix.dup
144+
loop do
145+
inexact = []
146+
registry.each do |path, iseq|
147+
if Utils.path_matches_suffix?(path, suffix)
148+
inexact << [path, iseq]
149+
end
147150
end
151+
if inexact.length > 1
152+
raise Error::MultiplePathsMatch, "Multiple paths matched requested suffix"
153+
end
154+
if inexact.any?
155+
return inexact.first
156+
end
157+
return nil unless suffix.include?('/')
158+
suffix.sub!(%r{.*/+}, '')
148159
end
149-
if inexact.length > 1
150-
raise Error::MultiplePathsMatch, "Multiple paths matched requested suffix"
151-
end
152-
inexact.first
153160
end
154161
end
155162

lib/datadog/di/probe.rb

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,19 +155,16 @@ def location
155155
# Returns whether the provided +path+ matches the user-designated
156156
# file (of a line probe).
157157
#
158-
# If file is an absolute path (i.e., it starts with a slash), the file
159-
# must be identical to path to match.
160-
#
161-
# If file is not an absolute path, the path matches if the file is its suffix,
162-
# at a path component boundary.
158+
# Delegates to Utils.path_can_match_spec? which performs fuzzy
159+
# matching. See the comments in utils.rb for details.
163160
def file_matches?(path)
164161
if path.nil?
165162
raise ArgumentError, "Cannot match against a nil path"
166163
end
167164
unless file
168165
raise ArgumentError, "Probe does not have a file to match against"
169166
end
170-
Utils.path_matches_suffix?(path, file)
167+
Utils.path_can_match_spec?(path, file)
171168
end
172169

173170
# Instrumentation module for method probes.

lib/datadog/di/utils.rb

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,82 @@
33
module Datadog
44
module DI
55
module Utils
6+
# General path matching considerations
7+
# ------------------------------------
8+
#
9+
# The following use cases must be supported:
10+
# 1. The "probe path" is relative path to the file from source code
11+
# repository root. The project is deployed from the repository root,
12+
# such that that same relative path exists at runtime from the
13+
# root of the application.
14+
# 2. The "probe path" is a relative path to the file in a monorepo
15+
# where the project being deployed is in a subdirectory.
16+
# This the "probe path" contains additional directory components
17+
# in the beginning that do not exist in the runtime environment.
18+
# 3. The "probe path" is an absolute path to the file on the customer's
19+
# development system. As specified this path definitely does not
20+
# exist at runtime, and can start with a prefix that is unknown
21+
# to any both UI and tracer code.
22+
# 4. Same as (3), but the customer is using a Windows computer for
23+
# development and has the path specified in the wrong case
24+
# (which works fine on their development machine).
25+
# 5. The "probe path" is the basename or any suffix of the path to
26+
# the desired file, typed manually by the customer into the UI.
27+
#
28+
# A related concern is that if multiple runtime paths match the path
29+
# specification in the probe, the tracer must return an error to the
30+
# backend/UI rather than instrumenting any of the matching paths.
31+
#
32+
# The logic for path matching should therefore, generally, be as follows:
33+
# 1. If the "probe path" is absolute, see if it exists at runtime.
34+
# If so, take it as the desired path and finish.
35+
# 2. Attempt to identify the application root, by checking if the current
36+
# working directory contains a file called Gemfile. If yes, assume
37+
# the current working directory is the application root, otherwise
38+
# consider the application root to be unknown.
39+
# 3. If the application root is known and the "probe path" is relative,
40+
# concatenate the "probe path" to the application root and check
41+
# if the resulting path exists at runtime. If so, take it as the
42+
# desired path and finish.
43+
# 4. If the "probe path" is relative, go through the known file paths,
44+
# filter these paths down to those whose suffix is the "probe path",
45+
# and check how many we are left with. If exactly one, assume this
46+
# is the desired path and finish. If more than one, return an error
47+
# "multiple matching paths".
48+
# 5. If the application root is known, for each suffix of the "probe path",
49+
# see if that relative paths concatenated to the application root
50+
# results in a known file. If a known file is found, assume this
51+
# is the wanted file and finish.
52+
# 6. For each suffix of the "probe path", filter the set of known paths
53+
# down to those that end in the suffix. If exactly one path remains
54+
# for a given suffix, assume this is the wanted path and finish.
55+
# If more than one path remains for a given suffix, return the error
56+
# "multiple matching paths".
57+
# 7. Repeat step 5 but perform case-insensitive comparison.
58+
# 8. Repeat step 6 but perform case-insensitive comparison.
59+
#
60+
# Note that we do not look for "probe paths" under the current working
61+
# directory at runtime if the current working directory does not contain
62+
# a Gemfile, to avoid finding files from potentially undesired bases.
63+
#
64+
# What "known file"/"known path" means also differs based on the
65+
# operation being performed:
66+
# - If the operation is "install a probe", "known file/path" can
67+
# include files on the filesystem that have not been loaded as
68+
# well as paths from the code tracker.
69+
# - If the operation is "check if any pending probes match the file
70+
# that was just loaded", we would only consider the path that was
71+
# just loaded and not check the filesystem.
72+
#
73+
# Filesystem inquiries are obviously quite expensive and should be
74+
# cached. For the vast majority of applications it should be safe to
75+
# indefinitely cache whether a particular filesystem paths exists
76+
# in both positive and negative.
77+
#
78+
# As a "quick fix", currently after performing the suffix matching
79+
# we just strip leading directory components from the "probe path"
80+
# until we get a match via a "suffix of the suffix".
81+
682
# Returns whether the provided +path+ matches the user-designated
783
# file suffix (of a line probe).
884
#
@@ -41,6 +117,21 @@ module Utils
41117
# !!(path =~ %r,(/|\A)#{Regexp.quote(suffix)}\z,)
42118
end
43119
end
120+
121+
# Returns whether the provided +path+ matches the "probe path" in
122+
# +spec+. Attempts all of the fuzzy matches by stripping directories
123+
# from the front of +spec+. Does not consider othr known paths to
124+
# identify the case of (potentially) multiple matching paths for +spec+.
125+
module_function def path_can_match_spec?(path, spec)
126+
return true if path_matches_suffix?(path, spec)
127+
128+
spec = spec.dup
129+
loop do
130+
return false unless spec.include?('/')
131+
spec.sub!(%r{.*/+}, '')
132+
return true if path_matches_suffix?(path, spec)
133+
end
134+
end
44135
end
45136
end
46137
end

sig/datadog/di/utils.rbs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module Datadog
22
module DI
33
module Utils
44
def self?.path_matches_suffix?: (String path, String suffix) -> bool
5+
def self?.path_can_match_spec?: (String path, String suffix) -> bool
56
end
67
end
78
end

spec/datadog/di/integration/everything_from_remote_config_spec.rb

Lines changed: 49 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -230,10 +230,10 @@ def target_method
230230

231231
let(:payloads) { [] }
232232

233-
def do_rc(expect_hook: true)
233+
def do_rc(expect_hook: :hook_method)
234234
expect(probe_manager).to receive(:add_probe).and_call_original
235235
if expect_hook
236-
expect(instrumenter).to receive(:hook_method).and_call_original
236+
expect(instrumenter).to receive(expect_hook).and_call_original
237237
end
238238
# Events can be batched, meaning +post+ could be called once or twice
239239
# depending on how threads are scheduled by the VM.
@@ -370,13 +370,22 @@ def assert_received_and_errored
370370
context 'line probe' do
371371
with_code_tracking
372372

373-
context 'line probe received targeting loaded code not in code tracker' do
374-
let(:probe_spec) do
375-
{id: '11', name: 'bar', type: 'LOG_PROBE', where: {
376-
sourceFile: 'instrumentation_integration_test_class.rb', lines: [22]
377-
}}
373+
shared_context 'targeting integration test class via load' do
374+
before do
375+
begin
376+
Object.send(:remove_const, :InstrumentationIntegrationTestClass)
377+
rescue
378+
nil
379+
end
380+
load File.join(File.dirname(__FILE__), 'instrumentation_integration_test_class.rb')
381+
382+
# We want the probe status to be reported, therefore need to
383+
# disable exception propagation.
384+
settings.dynamic_instrumentation.internal.propagate_all_exceptions = false
378385
end
386+
end
379387

388+
shared_context 'targeting integration test class via require' do
380389
before do
381390
begin
382391
Object.send(:remove_const, :InstrumentationIntegrationTestClass)
@@ -391,14 +400,46 @@ def assert_received_and_errored
391400
expect($LOADED_FEATURES.detect do |path|
392401
File.basename(path) == 'instrumentation_integration_test_class.rb'
393402
end).to be_truthy
394-
component.code_tracker.clear
395403

396404
# We want the probe status to be reported, therefore need to
397405
# disable exception propagation.
398406
settings.dynamic_instrumentation.internal.propagate_all_exceptions = false
399407
end
408+
end
409+
410+
context 'line probe with path containing extra prefix directories' do
411+
let(:probe_spec) do
412+
{id: '11', name: 'bar', type: 'LOG_PROBE', where: {
413+
sourceFile: 'junk/prefix/instrumentation_integration_test_class.rb', lines: [22]
414+
}}
415+
end
416+
417+
include_context 'targeting integration test class via load'
400418

401419
it 'instruments code and adds probe to installed list' do
420+
expect_lazy_log(logger, :debug, /received probe from RC:/)
421+
422+
do_rc(expect_hook: :hook_line)
423+
assert_received_and_installed
424+
425+
expect(probe_manager.installed_probes.length).to eq 1
426+
end
427+
end
428+
429+
context 'line probe received targeting loaded code not in code tracker' do
430+
let(:probe_spec) do
431+
{id: '11', name: 'bar', type: 'LOG_PROBE', where: {
432+
sourceFile: 'instrumentation_integration_test_class.rb', lines: [22]
433+
}}
434+
end
435+
436+
include_context 'targeting integration test class via require'
437+
438+
before do
439+
component.code_tracker.clear
440+
end
441+
442+
it 'marks RC payload as errored' do
402443
expect_lazy_log_many(logger, :debug,
403444
/received probe from RC:/,
404445
/error processing probe configuration:.*File matching probe path.*was loaded and is not in code tracker registry/,)

spec/datadog/di/utils_spec.rb

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
di_test
66

77
describe '.path_matches_suffix?' do
8+
# NB; when updating this list also add to the list in
9+
# path_can_match_spec? test below.
810
[
911
['exact match - absolute path', '/foo/bar.rb', '/foo/bar.rb', true],
1012
# Suffix matching is only applicable to relative paths.
@@ -15,7 +17,7 @@
1517
['extra leading slash in file', '//foo/bar.rb', '/foo/bar.rb', false],
1618
['extra slash in the middle of file', 'foo//bar.rb', '/foo/bar.rb', false],
1719
['nothing in common', 'blah.rb', '/foo/bar.rb', false],
18-
['path is a suffix of file', '/a/foo/bar.rbr', '/foo/bar.rb', false],
20+
['path is a suffix of file', '/a/foo/bar.rb', '/foo/bar.rb', false],
1921

2022
# We expect path to always be an absolute path.
2123
['path is relative', 'bar.rb', 'bar.rb', false],
@@ -29,4 +31,36 @@
2931
end
3032
end
3133
end
34+
35+
describe '.path_can_match_spec?' do
36+
# NB; when updating this list also add to the list in
37+
# path_matches_suffix? test above.
38+
[
39+
['exact match - absolute path', '/foo/bar.rb', '/foo/bar.rb', true],
40+
# Prefixes of suffix are removed until there is a match
41+
['absolute path is a suffix', '/bar.rb', '/foo/bar.rb', true],
42+
# ... but not if basename does not match
43+
['absolute path is a suffix', '/bar.rb', '/foo/bar1.rb', false],
44+
['suffix - multiple path components', 'foo/bar.rb', '/foo/bar.rb', true],
45+
['suffix - at basename', 'bar.rb', '/foo/bar.rb', true],
46+
['suffix - not at path component boundary', 'ar.rb', '/foo/bar.rb', false],
47+
# Extra leading slashes are removed
48+
['extra leading slash in file', '//foo/bar.rb', '/foo/bar.rb', true],
49+
# Extra slashes in the middle are also removed
50+
['extra slash in the middle of file', 'foo//bar.rb', '/foo/bar.rb', true],
51+
['nothing in common', 'blah.rb', '/foo/bar.rb', false],
52+
['path is a suffix of file', '/a/foo/bar.rb', '/foo/bar.rb', true],
53+
54+
# We expect path to always be an absolute path.
55+
['path is relative', 'bar.rb', 'bar.rb', false],
56+
].each do |name, suffix_, path_, result_|
57+
suffix, path, result = suffix_, path_, result_
58+
59+
context name do
60+
it "is #{result}" do
61+
expect(described_class.path_can_match_spec?(path, suffix)).to be result
62+
end
63+
end
64+
end
65+
end
3266
end

0 commit comments

Comments
 (0)