|
3 | 3 | module Datadog |
4 | 4 | module DI |
5 | 5 | module Utils |
| 6 | + # General path matching considerations |
| 7 | + # ------------------------------------ |
| 8 | + # |
| 9 | + # The following use cases must be supported: |
| 10 | + # 1. The "probe path" is relative path to the file from source code |
| 11 | + # repository root. The project is deployed from the repository root, |
| 12 | + # such that that same relative path exists at runtime from the |
| 13 | + # root of the application. |
| 14 | + # 2. The "probe path" is a relative path to the file in a monorepo |
| 15 | + # where the project being deployed is in a subdirectory. |
| 16 | + # This the "probe path" contains additional directory components |
| 17 | + # in the beginning that do not exist in the runtime environment. |
| 18 | + # 3. The "probe path" is an absolute path to the file on the customer's |
| 19 | + # development system. As specified this path definitely does not |
| 20 | + # exist at runtime, and can start with a prefix that is unknown |
| 21 | + # to any both UI and tracer code. |
| 22 | + # 4. Same as (3), but the customer is using a Windows computer for |
| 23 | + # development and has the path specified in the wrong case |
| 24 | + # (which works fine on their development machine). |
| 25 | + # 5. The "probe path" is the basename or any suffix of the path to |
| 26 | + # the desired file, typed manually by the customer into the UI. |
| 27 | + # |
| 28 | + # A related concern is that if multiple runtime paths match the path |
| 29 | + # specification in the probe, the tracer must return an error to the |
| 30 | + # backend/UI rather than instrumenting any of the matching paths. |
| 31 | + # |
| 32 | + # The logic for path matching should therefore, generally, be as follows: |
| 33 | + # 1. If the "probe path" is absolute, see if it exists at runtime. |
| 34 | + # If so, take it as the desired path and finish. |
| 35 | + # 2. Attempt to identify the application root, by checking if the current |
| 36 | + # working directory contains a file called Gemfile. If yes, assume |
| 37 | + # the current working directory is the application root, otherwise |
| 38 | + # consider the application root to be unknown. |
| 39 | + # 3. If the application root is known and the "probe path" is relative, |
| 40 | + # concatenate the "probe path" to the application root and check |
| 41 | + # if the resulting path exists at runtime. If so, take it as the |
| 42 | + # desired path and finish. |
| 43 | + # 4. If the "probe path" is relative, go through the known file paths, |
| 44 | + # filter these paths down to those whose suffix is the "probe path", |
| 45 | + # and check how many we are left with. If exactly one, assume this |
| 46 | + # is the desired path and finish. If more than one, return an error |
| 47 | + # "multiple matching paths". |
| 48 | + # 5. If the application root is known, for each suffix of the "probe path", |
| 49 | + # see if that relative paths concatenated to the application root |
| 50 | + # results in a known file. If a known file is found, assume this |
| 51 | + # is the wanted file and finish. |
| 52 | + # 6. For each suffix of the "probe path", filter the set of known paths |
| 53 | + # down to those that end in the suffix. If exactly one path remains |
| 54 | + # for a given suffix, assume this is the wanted path and finish. |
| 55 | + # If more than one path remains for a given suffix, return the error |
| 56 | + # "multiple matching paths". |
| 57 | + # 7. Repeat step 5 but perform case-insensitive comparison. |
| 58 | + # 8. Repeat step 6 but perform case-insensitive comparison. |
| 59 | + # |
| 60 | + # Note that we do not look for "probe paths" under the current working |
| 61 | + # directory at runtime if the current working directory does not contain |
| 62 | + # a Gemfile, to avoid finding files from potentially undesired bases. |
| 63 | + # |
| 64 | + # What "known file"/"known path" means also differs based on the |
| 65 | + # operation being performed: |
| 66 | + # - If the operation is "install a probe", "known file/path" can |
| 67 | + # include files on the filesystem that have not been loaded as |
| 68 | + # well as paths from the code tracker. |
| 69 | + # - If the operation is "check if any pending probes match the file |
| 70 | + # that was just loaded", we would only consider the path that was |
| 71 | + # just loaded and not check the filesystem. |
| 72 | + # |
| 73 | + # Filesystem inquiries are obviously quite expensive and should be |
| 74 | + # cached. For the vast majority of applications it should be safe to |
| 75 | + # indefinitely cache whether a particular filesystem paths exists |
| 76 | + # in both positive and negative. |
| 77 | + # |
| 78 | + # As a "quick fix", currently after performing the suffix matching |
| 79 | + # we just strip leading directory components from the "probe path" |
| 80 | + # until we get a match via a "suffix of the suffix". |
| 81 | + |
6 | 82 | # Returns whether the provided +path+ matches the user-designated |
7 | 83 | # file suffix (of a line probe). |
8 | 84 | # |
@@ -41,6 +117,21 @@ module Utils |
41 | 117 | # !!(path =~ %r,(/|\A)#{Regexp.quote(suffix)}\z,) |
42 | 118 | end |
43 | 119 | end |
| 120 | + |
| 121 | + # Returns whether the provided +path+ matches the "probe path" in |
| 122 | + # +spec+. Attempts all of the fuzzy matches by stripping directories |
| 123 | + # from the front of +spec+. Does not consider othr known paths to |
| 124 | + # identify the case of (potentially) multiple matching paths for +spec+. |
| 125 | + module_function def path_can_match_spec?(path, spec) |
| 126 | + return true if path_matches_suffix?(path, spec) |
| 127 | + |
| 128 | + spec = spec.dup |
| 129 | + loop do |
| 130 | + return false unless spec.include?('/') |
| 131 | + spec.sub!(%r{.*/+}, '') |
| 132 | + return true if path_matches_suffix?(path, spec) |
| 133 | + end |
| 134 | + end |
44 | 135 | end |
45 | 136 | end |
46 | 137 | end |
0 commit comments