Skip to content

Commit 4cf1569

Browse files
committed
More robust allow list matching
1 parent 0e95a1c commit 4cf1569

File tree

2 files changed

+458
-71
lines changed

2 files changed

+458
-71
lines changed

src/__tests__/unit/checks/keywords-urls.test.ts

Lines changed: 220 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import { describe, it, expect } from 'vitest';
66
import { keywordsCheck, KeywordsConfig } from '../../../checks/keywords';
7-
import { urls } from '../../../checks/urls';
7+
import { urls, UrlsConfig } from '../../../checks/urls';
88
import { competitorsCheck } from '../../../checks/competitors';
99
import { GuardrailResult } from '../../../types';
1010

@@ -34,6 +34,16 @@ describe('keywords guardrail', () => {
3434
});
3535
});
3636

37+
describe('UrlsConfig', () => {
38+
it('normalizes allowed scheme inputs', () => {
39+
const config = UrlsConfig.parse({
40+
allowed_schemes: ['HTTPS://', 'http:', ' https '],
41+
});
42+
43+
expect(Array.from(config.allowed_schemes).sort()).toEqual(['http', 'https']);
44+
});
45+
});
46+
3747
describe('urls guardrail', () => {
3848
it('allows https URLs listed in the allow list', async () => {
3949
const result = await urls(
@@ -92,6 +102,215 @@ describe('urls guardrail', () => {
92102
expect(result.info?.blocked).toContain('https://other.com');
93103
expect(result.tripwireTriggered).toBe(true);
94104
});
105+
106+
it('allows full URLs with explicit paths in the allow list', async () => {
107+
const text = [
108+
'https://suntropy.es',
109+
'https://api.example.com/v1/tools?id=2',
110+
'https://api.example.com/v2',
111+
].join(' ');
112+
113+
const result = await urls(
114+
{},
115+
text,
116+
{
117+
url_allow_list: ['https://suntropy.es', 'https://api.example.com/v1'],
118+
allowed_schemes: new Set(['https']),
119+
allow_subdomains: false,
120+
block_userinfo: true,
121+
}
122+
);
123+
124+
expect(result.info?.allowed).toEqual(
125+
expect.arrayContaining([
126+
'https://suntropy.es',
127+
'https://api.example.com/v1/tools?id=2',
128+
])
129+
);
130+
expect(result.info?.blocked).toContain('https://api.example.com/v2');
131+
});
132+
133+
it('respects path segment boundaries to avoid prefix bypasses', async () => {
134+
const text = [
135+
'https://example.com/api',
136+
'https://example.com/api/users',
137+
'https://example.com/api2',
138+
'https://example.com/api-v2',
139+
].join(' ');
140+
141+
const result = await urls(
142+
{},
143+
text,
144+
{
145+
url_allow_list: ['https://example.com/api'],
146+
allowed_schemes: new Set(['https']),
147+
allow_subdomains: false,
148+
block_userinfo: true,
149+
}
150+
);
151+
152+
expect(result.info?.allowed).toEqual(
153+
expect.arrayContaining([
154+
'https://example.com/api',
155+
'https://example.com/api/users',
156+
])
157+
);
158+
expect(result.info?.blocked).toEqual(
159+
expect.arrayContaining([
160+
'https://example.com/api2',
161+
'https://example.com/api-v2',
162+
])
163+
);
164+
});
165+
166+
it('matches scheme-less allow list entries across configured schemes', async () => {
167+
const text = ['https://example.com', 'http://example.com'].join(' ');
168+
169+
const result = await urls(
170+
{},
171+
text,
172+
{
173+
url_allow_list: ['example.com'],
174+
allowed_schemes: new Set(['https', 'http']),
175+
allow_subdomains: false,
176+
block_userinfo: true,
177+
}
178+
);
179+
180+
expect(result.info?.allowed).toEqual(
181+
expect.arrayContaining(['https://example.com', 'http://example.com'])
182+
);
183+
expect(result.info?.blocked).toEqual([]);
184+
});
185+
186+
it('enforces explicit scheme matches when allow list entries include schemes', async () => {
187+
const text = ['https://bank.example.com', 'http://bank.example.com'].join(' ');
188+
189+
const result = await urls(
190+
{},
191+
text,
192+
{
193+
url_allow_list: ['https://bank.example.com'],
194+
allowed_schemes: new Set(['https', 'http']),
195+
allow_subdomains: false,
196+
block_userinfo: true,
197+
}
198+
);
199+
200+
expect(result.info?.allowed).toEqual(expect.arrayContaining(['https://bank.example.com']));
201+
expect(result.info?.blocked).toContain('http://bank.example.com');
202+
});
203+
204+
it('supports CIDR ranges and explicit port matching', async () => {
205+
const text = [
206+
'https://10.5.5.5',
207+
'https://192.168.1.100',
208+
'https://192.168.2.1',
209+
'https://example.com:8443',
210+
'https://example.com',
211+
'https://api.internal.com:9000',
212+
].join(' ');
213+
214+
const result = await urls(
215+
{},
216+
text,
217+
{
218+
url_allow_list: ['10.0.0.0/8', '192.168.1.0/24', 'https://example.com:8443', 'api.internal.com'],
219+
allowed_schemes: new Set(['https']),
220+
allow_subdomains: false,
221+
block_userinfo: true,
222+
}
223+
);
224+
225+
expect(result.info?.allowed).toEqual(
226+
expect.arrayContaining([
227+
'https://10.5.5.5',
228+
'https://192.168.1.100',
229+
'https://example.com:8443',
230+
'https://api.internal.com:9000',
231+
])
232+
);
233+
expect(result.info?.blocked).toEqual(
234+
expect.arrayContaining(['https://192.168.2.1', 'https://example.com'])
235+
);
236+
});
237+
238+
it('requires query strings and fragments to match exactly when configured', async () => {
239+
const text = [
240+
'https://example.com/search?q=test',
241+
'https://example.com/search?q=other',
242+
'https://example.com/docs#intro',
243+
'https://example.com/docs#outro',
244+
].join(' ');
245+
246+
const result = await urls(
247+
{},
248+
text,
249+
{
250+
url_allow_list: [
251+
'https://example.com/search?q=test',
252+
'https://example.com/docs#intro',
253+
],
254+
allowed_schemes: new Set(['https']),
255+
allow_subdomains: false,
256+
block_userinfo: true,
257+
}
258+
);
259+
260+
expect(result.info?.allowed).toEqual(
261+
expect.arrayContaining([
262+
'https://example.com/search?q=test',
263+
'https://example.com/docs#intro',
264+
])
265+
);
266+
expect(result.info?.blocked).toEqual(
267+
expect.arrayContaining([
268+
'https://example.com/search?q=other',
269+
'https://example.com/docs#outro',
270+
])
271+
);
272+
});
273+
274+
it('blocks URLs containing only a password in userinfo when configured', async () => {
275+
const result = await urls(
276+
{},
277+
'https://:[email protected]',
278+
{
279+
url_allow_list: ['example.com'],
280+
allowed_schemes: new Set(['https']),
281+
allow_subdomains: false,
282+
block_userinfo: true,
283+
}
284+
);
285+
286+
expect(result.info?.blocked).toContain('https://:[email protected]');
287+
expect(
288+
(result.info?.blocked_reasons as string[]).some((reason) => reason.includes('userinfo'))
289+
).toBe(true);
290+
});
291+
292+
it('handles malformed ports gracefully without crashing', async () => {
293+
const text = [
294+
'https://example.com:99999',
295+
'https://example.com:abc',
296+
'https://example.com:-1',
297+
].join(' ');
298+
299+
const result = await urls(
300+
{},
301+
text,
302+
{
303+
url_allow_list: ['example.com'],
304+
allowed_schemes: new Set(['https']),
305+
allow_subdomains: false,
306+
block_userinfo: true,
307+
}
308+
);
309+
310+
expect(result.tripwireTriggered).toBe(true);
311+
expect(result.info?.blocked).toHaveLength(3);
312+
expect(result.info?.blocked_reasons).toHaveLength(3);
313+
});
95314
});
96315

97316
describe('competitors guardrail', () => {

0 commit comments

Comments
 (0)