Skip to content

Commit e835c5f

Browse files
committed
Copilot fixes
1 parent 033f9b1 commit e835c5f

File tree

2 files changed

+52
-14
lines changed

2 files changed

+52
-14
lines changed

src/__tests__/unit/checks/keywords-urls.test.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,29 @@ describe('keywords guardrail', () => {
149149
expect(result.info?.matchedKeywords).toEqual(['#foo']);
150150
});
151151

152+
it('ignores keywords that become empty after sanitization', () => {
153+
const result = keywordsCheck(
154+
{},
155+
'Totally benign text',
156+
KeywordsConfig.parse({ keywords: ['!!!'] })
157+
) as GuardrailResult;
158+
159+
expect(result.tripwireTriggered).toBe(false);
160+
expect(result.info?.matchedKeywords).toEqual([]);
161+
expect(result.info?.sanitizedKeywords).toEqual(['']);
162+
});
163+
164+
it('still matches other keywords when some sanitize to empty strings', () => {
165+
const result = keywordsCheck(
166+
{},
167+
'Please keep this secret!',
168+
KeywordsConfig.parse({ keywords: ['...', 'secret!!!'] })
169+
) as GuardrailResult;
170+
171+
expect(result.tripwireTriggered).toBe(true);
172+
expect(result.info?.matchedKeywords).toEqual(['secret']);
173+
});
174+
152175
it('matches keywords ending with special characters', () => {
153176
const result = keywordsCheck(
154177
{},

src/checks/keywords.ts

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ export type KeywordsContext = z.infer<typeof KeywordsContext>;
4040
* @param config Configuration specifying keywords and behavior
4141
* @returns GuardrailResult indicating if tripwire was triggered
4242
*/
43+
const unicodeWordCharRegex = /[\p{L}\p{N}]/u;
44+
const isWordChar = (char: string | undefined): boolean => {
45+
if (!char) return false;
46+
if (char === '_') return true;
47+
return unicodeWordCharRegex.test(char);
48+
};
49+
4350
export const keywordsCheck: CheckFn<KeywordsContext, string, KeywordsConfig> = (
4451
ctx,
4552
text,
@@ -52,28 +59,36 @@ export const keywordsCheck: CheckFn<KeywordsContext, string, KeywordsConfig> = (
5259
// Sanitize keywords by stripping trailing punctuation
5360
const sanitizedKeywords = keywords.map((k: string) => k.replace(/[.,!?;:]+$/, ''));
5461

55-
// Escape special regex characters so keywords are treated literally
56-
const escapedKeywords = sanitizedKeywords.map((k: string) =>
57-
k.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
58-
);
59-
60-
const isWordChar = (char: string | undefined) => {
61-
if (!char) return false;
62-
if (char === '_') return true;
63-
return /[\p{L}\p{N}]/u.test(char);
64-
};
62+
const keywordEntries = sanitizedKeywords
63+
.map((sanitized) => ({
64+
sanitized,
65+
escaped: sanitized.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'),
66+
}))
67+
.filter(({ sanitized }) => sanitized.length > 0);
68+
69+
if (keywordEntries.length === 0) {
70+
return {
71+
tripwireTriggered: false,
72+
info: {
73+
matchedKeywords: [],
74+
originalKeywords: keywords,
75+
sanitizedKeywords,
76+
totalKeywords: keywords.length,
77+
textLength: text.length,
78+
},
79+
};
80+
}
6581

6682
// Apply unicode-aware word boundaries per keyword so tokens that start/end with punctuation still match.
67-
const keywordPatterns = escapedKeywords.map((keyword, index) => {
68-
const originalKeyword = sanitizedKeywords[index];
69-
const keywordChars = Array.from(originalKeyword);
83+
const keywordPatterns = keywordEntries.map(({ sanitized, escaped }) => {
84+
const keywordChars = Array.from(sanitized);
7085
const firstChar = keywordChars[0];
7186
const lastChar = keywordChars[keywordChars.length - 1];
7287
const needsLeftBoundary = isWordChar(firstChar);
7388
const needsRightBoundary = isWordChar(lastChar);
7489
const leftBoundary = needsLeftBoundary ? '(?<![\\p{L}\\p{N}_])' : '';
7590
const rightBoundary = needsRightBoundary ? '(?![\\p{L}\\p{N}_])' : '';
76-
return `${leftBoundary}${keyword}${rightBoundary}`;
91+
return `${leftBoundary}${escaped}${rightBoundary}`;
7792
});
7893

7994
const patternText = `(?:${keywordPatterns.join('|')})`;

0 commit comments

Comments
 (0)