@@ -40,6 +40,13 @@ export type KeywordsContext = z.infer<typeof KeywordsContext>;
4040 * @param config Configuration specifying keywords and behavior
4141 * @returns GuardrailResult indicating if tripwire was triggered
4242 */
43+ const unicodeWordCharRegex = / [ \p{ L} \p{ N} ] / u;
44+ const isWordChar = ( char : string | undefined ) : boolean => {
45+ if ( ! char ) return false ;
46+ if ( char === '_' ) return true ;
47+ return unicodeWordCharRegex . test ( char ) ;
48+ } ;
49+
4350export const keywordsCheck : CheckFn < KeywordsContext , string , KeywordsConfig > = (
4451 ctx ,
4552 text ,
@@ -52,28 +59,36 @@ export const keywordsCheck: CheckFn<KeywordsContext, string, KeywordsConfig> = (
5259 // Sanitize keywords by stripping trailing punctuation
5360 const sanitizedKeywords = keywords . map ( ( k : string ) => k . replace ( / [ . , ! ? ; : ] + $ / , '' ) ) ;
5461
55- // Escape special regex characters so keywords are treated literally
56- const escapedKeywords = sanitizedKeywords . map ( ( k : string ) =>
57- k . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, '\\$&' )
58- ) ;
59-
60- const isWordChar = ( char : string | undefined ) => {
61- if ( ! char ) return false ;
62- if ( char === '_' ) return true ;
63- return / [ \p{ L} \p{ N} ] / u. test ( char ) ;
64- } ;
62+ const keywordEntries = sanitizedKeywords
63+ . map ( ( sanitized ) => ( {
64+ sanitized,
65+ escaped : sanitized . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, '\\$&' ) ,
66+ } ) )
67+ . filter ( ( { sanitized } ) => sanitized . length > 0 ) ;
68+
69+ if ( keywordEntries . length === 0 ) {
70+ return {
71+ tripwireTriggered : false ,
72+ info : {
73+ matchedKeywords : [ ] ,
74+ originalKeywords : keywords ,
75+ sanitizedKeywords,
76+ totalKeywords : keywords . length ,
77+ textLength : text . length ,
78+ } ,
79+ } ;
80+ }
6581
6682 // Apply unicode-aware word boundaries per keyword so tokens that start/end with punctuation still match.
67- const keywordPatterns = escapedKeywords . map ( ( keyword , index ) => {
68- const originalKeyword = sanitizedKeywords [ index ] ;
69- const keywordChars = Array . from ( originalKeyword ) ;
83+ const keywordPatterns = keywordEntries . map ( ( { sanitized, escaped } ) => {
84+ const keywordChars = Array . from ( sanitized ) ;
7085 const firstChar = keywordChars [ 0 ] ;
7186 const lastChar = keywordChars [ keywordChars . length - 1 ] ;
7287 const needsLeftBoundary = isWordChar ( firstChar ) ;
7388 const needsRightBoundary = isWordChar ( lastChar ) ;
7489 const leftBoundary = needsLeftBoundary ? '(?<![\\p{L}\\p{N}_])' : '' ;
7590 const rightBoundary = needsRightBoundary ? '(?![\\p{L}\\p{N}_])' : '' ;
76- return `${ leftBoundary } ${ keyword } ${ rightBoundary } ` ;
91+ return `${ leftBoundary } ${ escaped } ${ rightBoundary } ` ;
7792 } ) ;
7893
7994 const patternText = `(?:${ keywordPatterns . join ( '|' ) } )` ;
0 commit comments