@@ -65,3 +65,132 @@ async def test_keywords_does_not_trigger_on_benign_text() -> None:
6565 result = await keywords (ctx = None , data = "Safe content" , config = config )
6666
6767 assert result .tripwire_triggered is False # noqa: S101
68+
69+
70+ def test_match_keywords_does_not_match_partial_words () -> None :
71+ """Ensure substrings embedded in larger words are ignored."""
72+ config = KeywordCfg (keywords = ["orld" ])
73+ result = match_keywords ("Hello, world!" , config , guardrail_name = "Test Guardrail" )
74+
75+ assert result .tripwire_triggered is False # noqa: S101
76+
77+
78+ def test_match_keywords_handles_numeric_tokens () -> None :
79+ """Keywords containing digits should match exact tokens."""
80+ config = KeywordCfg (keywords = ["world123" ])
81+ result = match_keywords ("Hello, world123" , config , guardrail_name = "Test Guardrail" )
82+
83+ assert result .tripwire_triggered is True # noqa: S101
84+ assert result .info ["matched" ] == ["world123" ] # noqa: S101
85+
86+
87+ def test_match_keywords_rejects_partial_numeric_tokens () -> None :
88+ """Numeric keywords should not match when extra digits follow."""
89+ config = KeywordCfg (keywords = ["world123" ])
90+ result = match_keywords ("Hello, world12345" , config , guardrail_name = "Test Guardrail" )
91+
92+ assert result .tripwire_triggered is False # noqa: S101
93+
94+
95+ def test_match_keywords_handles_underscored_tokens () -> None :
96+ """Underscored keywords should be detected exactly once."""
97+ config = KeywordCfg (keywords = ["w_o_r_l_d" ])
98+ result = match_keywords ("Hello, w_o_r_l_d" , config , guardrail_name = "Test Guardrail" )
99+
100+ assert result .tripwire_triggered is True # noqa: S101
101+ assert result .info ["matched" ] == ["w_o_r_l_d" ] # noqa: S101
102+
103+
104+ def test_match_keywords_rejects_words_embedded_in_underscores () -> None :
105+ """Words surrounded by underscores should not trigger partial matches."""
106+ config = KeywordCfg (keywords = ["world" ])
107+ result = match_keywords ("Hello, test_world_test" , config , guardrail_name = "Test Guardrail" )
108+
109+ assert result .tripwire_triggered is False # noqa: S101
110+
111+
112+ def test_match_keywords_handles_chinese_characters () -> None :
113+ """Unicode keywords such as Chinese characters should match."""
114+ config = KeywordCfg (keywords = ["你好" ])
115+ result = match_keywords ("你好" , config , guardrail_name = "Test Guardrail" )
116+
117+ assert result .tripwire_triggered is True # noqa: S101
118+ assert result .info ["matched" ] == ["你好" ] # noqa: S101
119+
120+
121+ def test_match_keywords_handles_chinese_tokens_with_digits () -> None :
122+ """Unicode keywords that include digits should match whole tokens."""
123+ config = KeywordCfg (keywords = ["你好123" ])
124+ result = match_keywords ("你好123" , config , guardrail_name = "Test Guardrail" )
125+
126+ assert result .tripwire_triggered is True # noqa: S101
127+ assert result .info ["matched" ] == ["你好123" ] # noqa: S101
128+
129+
130+ def test_match_keywords_rejects_partial_chinese_tokens_with_digits () -> None :
131+ """Unicode keywords with trailing digits should not match supersets."""
132+ config = KeywordCfg (keywords = ["你好123" ])
133+ result = match_keywords ("你好12345" , config , guardrail_name = "Test Guardrail" )
134+
135+ assert result .tripwire_triggered is False # noqa: S101
136+
137+
138+ def test_match_keywords_applies_boundaries_to_all_keywords () -> None :
139+ """Every keyword in a multi-token pattern should respect Unicode boundaries."""
140+ config = KeywordCfg (keywords = ["test" , "hello" , "world" ])
141+ result = match_keywords ("testing hello world" , config , guardrail_name = "Test Guardrail" )
142+
143+ assert result .tripwire_triggered is True # noqa: S101
144+ assert result .info ["matched" ] == ["hello" , "world" ] # noqa: S101
145+
146+
147+ def test_match_keywords_detects_email_like_patterns () -> None :
148+ """Email-like keywords starting with punctuation should match after word chars."""
149+ config = KeywordCfg (keywords = ["@corp.com" ])
150+ result = match_keywords (
"[email protected] " ,
config ,
guardrail_name = "Test Guardrail" )
151+
152+ assert result .tripwire_triggered is True # noqa: S101
153+ assert result .info ["matched" ] == ["@corp.com" ] # noqa: S101
154+
155+
156+ def test_match_keywords_detects_hashtag_patterns () -> None :
157+ """Hashtag keywords starting with punctuation should match after word chars."""
158+ config = KeywordCfg (keywords = ["#leak" ])
159+ result = match_keywords ("abc#leak" , config , guardrail_name = "Test Guardrail" )
160+
161+ assert result .tripwire_triggered is True # noqa: S101
162+ assert result .info ["matched" ] == ["#leak" ] # noqa: S101
163+
164+
165+ def test_match_keywords_respects_end_boundary_for_punctuation_prefixed () -> None :
166+ """Punctuation-prefixed keywords ending with word chars need end boundary."""
167+ config = KeywordCfg (keywords = ["@leak" ])
168+ # Should not match when word chars continue after
169+ result = match_keywords ("foo@leakmore" , config , guardrail_name = "Test Guardrail" )
170+ assert result .tripwire_triggered is False # noqa: S101
171+
172+ # Should match when followed by non-word char
173+ result = match_keywords ("foo@leak bar" , config , guardrail_name = "Test Guardrail" )
174+ assert result .tripwire_triggered is True # noqa: S101
175+ assert result .info ["matched" ] == ["@leak" ] # noqa: S101
176+
177+
178+ def test_match_keywords_handles_full_punctuation_keywords () -> None :
179+ """Keywords consisting only of punctuation should match anywhere."""
180+ config = KeywordCfg (keywords = ["@#$" ])
181+ result = match_keywords ("test@#$test" , config , guardrail_name = "Test Guardrail" )
182+
183+ assert result .tripwire_triggered is True # noqa: S101
184+ assert result .info ["matched" ] == ["@#$" ] # noqa: S101
185+
186+
187+ def test_match_keywords_mixed_punctuation_and_word_chars () -> None :
188+ """Keywords with both punctuation prefix and suffix should work correctly."""
189+ config = KeywordCfg (keywords = ["@user@" ])
190+ # Should match when embedded
191+ result = match_keywords ("test@user@test" , config , guardrail_name = "Test Guardrail" )
192+ assert result .tripwire_triggered is True # noqa: S101
193+
194+ # Should match even when followed by more text (no boundaries applied to punctuation edges)
195+ result = match_keywords ("test@user@more" , config , guardrail_name = "Test Guardrail" )
196+ assert result .tripwire_triggered is True # noqa: S101
0 commit comments