From 5f8c9b3eb606aaf338b9a59bdda70be909d06350 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 7 Aug 2025 10:50:27 -0500 Subject: [PATCH 1/4] Add group expression --- relay-cabi/src/glob.rs | 3 +++ relay-pattern/src/lib.rs | 47 ++++++++++++++++++++++++++++++++++ relay-pattern/src/wildmatch.rs | 1 + 3 files changed, 51 insertions(+) diff --git a/relay-cabi/src/glob.rs b/relay-cabi/src/glob.rs index a3e9b14c069..e2f1c678bb6 100644 --- a/relay-cabi/src/glob.rs +++ b/relay-cabi/src/glob.rs @@ -139,6 +139,9 @@ mod tests { }} } + test_glob!("foo@1", "!{foo@*,bar@*}", false, {}); + test_glob!("bar@1", "!{foo@*,bar@*}", false, {}); + test_glob!("baz@1", "!{foo@*,bar@*}", true, {}); test_glob!("hello.py", "*.py", true, {}); test_glob!("hello.py", "*.js", false, {}); test_glob!("foo/hello.py", "*.py", true, {}); diff --git a/relay-pattern/src/lib.rs b/relay-pattern/src/lib.rs index 43476cdb74d..05fd975a118 100644 --- a/relay-pattern/src/lib.rs +++ b/relay-pattern/src/lib.rs @@ -25,6 +25,8 @@ //! * `[!a-z]` matches one character that is not in the given range. //! * `{a,b}` matches any pattern within the alternation group. //! * `\` escapes any of the above special characters and treats it as a literal. +//! * `(a)` matches any pattern within the group. +//! * `(!a)` matches the inverted pattern within the group. //! //! # Complexity //! @@ -59,6 +61,8 @@ enum ErrorKind { InvalidRange(char, char), /// Unbalanced character class. The pattern contains unbalanced `[`, `]` characters. UnbalancedCharacterClass, + /// Unbalanced group. The pattern contains unbalanced `(`, `)` characters. + UnbalancedGroup, /// Character class is invalid and cannot be parsed. InvalidCharacterClass, /// Nested alternates are not valid. @@ -84,6 +88,7 @@ impl fmt::Display for Error { write!(f, "Invalid character range `{start}-{end}`") } ErrorKind::UnbalancedCharacterClass => write!(f, "Unbalanced character class"), + ErrorKind::UnbalancedGroup => write!(f, "Unbalanced group"), ErrorKind::InvalidCharacterClass => write!(f, "Invalid character class"), ErrorKind::NestedAlternates => write!(f, "Nested alternates"), ErrorKind::UnbalancedAlternates => write!(f, "Unbalanced alternates"), @@ -504,6 +509,8 @@ impl<'a> Parser<'a> { match c { '?' => self.push_token(Token::Any(NonZeroUsize::MIN)), '*' => self.push_token(Token::Wildcard), + '(' => self.parse_group()?, + ')' => return Err(ErrorKind::UnbalancedGroup), '[' => self.parse_class()?, ']' => return Err(ErrorKind::UnbalancedCharacterClass), '{' => self.start_alternates()?, @@ -555,6 +562,33 @@ impl<'a> Parser<'a> { Ok(()) } + fn parse_group(&mut self) -> Result<(), ErrorKind> { + let negated = self.advance_if(|c| c == '!'); + let mut literal = String::new(); + + loop { + let Some(c) = self.advance() else { + return Err(ErrorKind::UnbalancedCharacterClass); + }; + + match c { + '(' => return Err(ErrorKind::InvalidCharacterClass), + ')' => break, + c => literal.push(match c { + '\\' => self.advance().ok_or(ErrorKind::DanglingEscape)?, + c => c, + }), + } + } + + self.push_token(Token::Group { + negated, + literal: Literal::new(literal, self.options), + }); + + Ok(()) + } + fn parse_class(&mut self) -> Result<(), ErrorKind> { let negated = self.advance_if(|c| c == '!'); @@ -763,6 +797,8 @@ enum Token { Wildcard, /// A class token `[abc]` or its negated variant `[!abc]`. Class { negated: bool, ranges: Ranges }, + /// A group token `(abc)` or its negated variant `(!abc)`. + Group { negated: bool, literal: Literal }, /// A list of nested alternate tokens `{a,b}`. Alternates(Vec), /// A list of optional tokens. @@ -1936,4 +1972,15 @@ mod tests { assert!(!patterns.is_match("foo")); assert!(patterns.is_match("bar")); } + + #[test] + fn test_patterns_inverted() { + let mut builder = Patterns::builder().add("!abc").unwrap(); + + let patterns = builder.take(); + assert!(patterns.is_match("a")); + assert!(patterns.is_match("b")); + assert!(patterns.is_match("c")); + assert!(!patterns.is_match("abc")); + } } diff --git a/relay-pattern/src/wildmatch.rs b/relay-pattern/src/wildmatch.rs index 6f3482db780..466cd6bcc93 100644 --- a/relay-pattern/src/wildmatch.rs +++ b/relay-pattern/src/wildmatch.rs @@ -61,6 +61,7 @@ where // no match here. None => false, }, + Token::Group { negated, literal } => {} Token::Any(n) => { advance!(match n_chars_to_bytes(*n, h_current) { Some(n) => n, From 4ab0be0c3ff04a032ce524a3aec96aef640dfa61 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 7 Aug 2025 11:18:19 -0500 Subject: [PATCH 2/4] Add inverted handling and coverage --- relay-cabi/src/glob.rs | 3 --- relay-pattern/src/lib.rs | 15 +++++++++------ relay-pattern/src/wildmatch.rs | 24 +++++++++++++++++++++++- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/relay-cabi/src/glob.rs b/relay-cabi/src/glob.rs index e2f1c678bb6..a3e9b14c069 100644 --- a/relay-cabi/src/glob.rs +++ b/relay-cabi/src/glob.rs @@ -139,9 +139,6 @@ mod tests { }} } - test_glob!("foo@1", "!{foo@*,bar@*}", false, {}); - test_glob!("bar@1", "!{foo@*,bar@*}", false, {}); - test_glob!("baz@1", "!{foo@*,bar@*}", true, {}); test_glob!("hello.py", "*.py", true, {}); test_glob!("hello.py", "*.js", false, {}); test_glob!("foo/hello.py", "*.py", true, {}); diff --git a/relay-pattern/src/lib.rs b/relay-pattern/src/lib.rs index 05fd975a118..a4e60d99bd1 100644 --- a/relay-pattern/src/lib.rs +++ b/relay-pattern/src/lib.rs @@ -568,7 +568,7 @@ impl<'a> Parser<'a> { loop { let Some(c) = self.advance() else { - return Err(ErrorKind::UnbalancedCharacterClass); + return Err(ErrorKind::UnbalancedGroup); }; match c { @@ -1975,12 +1975,15 @@ mod tests { #[test] fn test_patterns_inverted() { - let mut builder = Patterns::builder().add("!abc").unwrap(); + // We want to match anything that is not prefixed with foo@ or bar@ + let mut builder = Patterns::builder().add("(!foo@)(!bar@)*").unwrap(); let patterns = builder.take(); - assert!(patterns.is_match("a")); - assert!(patterns.is_match("b")); - assert!(patterns.is_match("c")); - assert!(!patterns.is_match("abc")); + assert!(!patterns.is_match("foo@1.1")); + assert!(!patterns.is_match("bar@1.1")); + assert!(patterns.is_match("baz@1.1")); + assert!(patterns.is_match("foobar@1.1")); + assert!(patterns.is_match("barbaz@1.1")); + assert!(patterns.is_match("barbaz@1.1")); } } diff --git a/relay-pattern/src/wildmatch.rs b/relay-pattern/src/wildmatch.rs index 466cd6bcc93..9197ee7d0b5 100644 --- a/relay-pattern/src/wildmatch.rs +++ b/relay-pattern/src/wildmatch.rs @@ -61,7 +61,29 @@ where // no match here. None => false, }, - Token::Group { negated, literal } => {} + // Token::Group never advances the cursor position. It can only tell you if the + // prefix match failed or succeeded. If it failed execution is stopped. If it + // succeeded then we iterate the token cursor and reparse the haystack from the + // exact position we started from in the Token::Group step. + Token::Group { negated, literal } => match M::is_prefix(h_current, literal) { + Some(n) => { + if *negated { + // We matched the literal but the negated operator was specified. The + // match fails and we do not advance the pointer. + false + } else { + // The haystack cursor is advanced because we matched the prefix. This + // is identical behavior to normal literal matching behavior. + advance!(n) + } + } + // We did not match the prefix literal. If the negated operator was + // specified we return true indicating a match but we do not increment + // the haystack pointer. We've only determined that the haystack is not + // prefixed with some value. We haven't made a definitive conclusion about + // _what_ the prefix actually is. The next token will perform that operation. + None => *negated, + }, Token::Any(n) => { advance!(match n_chars_to_bytes(*n, h_current) { Some(n) => n, From 42d8520a4d1fd785a8104efd8c8c4754fa8af386 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 7 Aug 2025 11:30:39 -0500 Subject: [PATCH 3/4] Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c0064a41af..93db9878c84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +**Features**: + +- Add negated prefix to glob matching. ([#5040](https://github.com/getsentry/relay/pull/5040)) + **Bug Fixes**: - Normalize OS and Browser names in contexts when missing a version. ([#4957](https://github.com/getsentry/relay/pull/4957)) From 96635eed25ddeda8fa914d7c09e6d50ad98a3672 Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Thu, 7 Aug 2025 11:33:01 -0500 Subject: [PATCH 4/4] Use appropriate error class --- relay-pattern/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/relay-pattern/src/lib.rs b/relay-pattern/src/lib.rs index a4e60d99bd1..f512f8ff0ad 100644 --- a/relay-pattern/src/lib.rs +++ b/relay-pattern/src/lib.rs @@ -63,6 +63,8 @@ enum ErrorKind { UnbalancedCharacterClass, /// Unbalanced group. The pattern contains unbalanced `(`, `)` characters. UnbalancedGroup, + /// Groups may not be nested. + InvalidNestedGroup, /// Character class is invalid and cannot be parsed. InvalidCharacterClass, /// Nested alternates are not valid. @@ -89,6 +91,7 @@ impl fmt::Display for Error { } ErrorKind::UnbalancedCharacterClass => write!(f, "Unbalanced character class"), ErrorKind::UnbalancedGroup => write!(f, "Unbalanced group"), + ErrorKind::InvalidNestedGroup => write!(f, "Nested grouping is not permitted"), ErrorKind::InvalidCharacterClass => write!(f, "Invalid character class"), ErrorKind::NestedAlternates => write!(f, "Nested alternates"), ErrorKind::UnbalancedAlternates => write!(f, "Unbalanced alternates"), @@ -572,7 +575,7 @@ impl<'a> Parser<'a> { }; match c { - '(' => return Err(ErrorKind::InvalidCharacterClass), + '(' => return Err(ErrorKind::InvalidNestedGroup), ')' => break, c => literal.push(match c { '\\' => self.advance().ok_or(ErrorKind::DanglingEscape)?,