@@ -243,58 +243,63 @@ function detectUrls(text: string): string[] {
243243function validateUrlSecurity (
244244 urlString : string ,
245245 config : UrlsConfig
246- ) : { parsedUrl : URL | null ; reason : string } {
246+ ) : { parsedUrl : URL | null ; reason : string ; hadScheme : boolean } {
247247 try {
248248 let parsedUrl : URL ;
249249 let originalScheme : string ;
250+ let hadScheme : boolean ;
250251
251252 // Parse URL - preserve original scheme for validation
252253 if ( urlString . includes ( '://' ) ) {
253254 // Standard URL with double-slash scheme (http://, https://, ftp://, etc.)
254255 parsedUrl = new URL ( urlString ) ;
255256 originalScheme = parsedUrl . protocol . replace ( / : $ / , '' ) ;
257+ hadScheme = true ;
256258 } else if (
257259 urlString . includes ( ':' ) &&
258260 urlString . split ( ':' , 1 ) [ 0 ] . match ( / ^ ( d a t a | j a v a s c r i p t | v b s c r i p t | m a i l t o ) $ / )
259261 ) {
260262 // Special single-colon schemes
261263 parsedUrl = new URL ( urlString ) ;
262264 originalScheme = parsedUrl . protocol . replace ( / : $ / , '' ) ;
265+ hadScheme = true ;
263266 } else {
264267 // Add http scheme for parsing, but remember this is a default
265268 parsedUrl = new URL ( `http://${ urlString } ` ) ;
266269 originalScheme = 'http' ; // Default scheme for scheme-less URLs
270+ hadScheme = false ;
267271 }
268272
269273 // Basic validation: must have scheme and hostname (except for special schemes)
270274 if ( ! parsedUrl . protocol ) {
271- return { parsedUrl : null , reason : 'Invalid URL format' } ;
275+ return { parsedUrl : null , reason : 'Invalid URL format' , hadScheme : false } ;
272276 }
273277
274278 // Special schemes like data: and javascript: don't need hostname
275279 const parsedScheme = parsedUrl . protocol . replace ( / : $ / , '' ) . toLowerCase ( ) ;
276280 if ( ! HOSTLESS_SCHEMES . has ( parsedScheme ) && ! parsedUrl . hostname ) {
277- return { parsedUrl : null , reason : 'Invalid URL format' } ;
281+ return { parsedUrl : null , reason : 'Invalid URL format' , hadScheme } ;
278282 }
279283
280284 // Security validations - use original scheme
285+ // Only check allowed_schemes if the URL explicitly had a scheme
281286 const normalizedScheme = originalScheme . toLowerCase ( ) ;
282287
283- if ( ! config . allowed_schemes . has ( normalizedScheme ) ) {
284- return { parsedUrl : null , reason : `Blocked scheme: ${ normalizedScheme } ` } ;
288+ if ( hadScheme && ! config . allowed_schemes . has ( normalizedScheme ) ) {
289+ return { parsedUrl : null , reason : `Blocked scheme: ${ normalizedScheme } ` , hadScheme } ;
285290 }
286291
287292 if ( config . block_userinfo && ( parsedUrl . username || parsedUrl . password ) ) {
288- return { parsedUrl : null , reason : 'Contains userinfo (potential credential injection)' } ;
293+ return { parsedUrl : null , reason : 'Contains userinfo (potential credential injection)' , hadScheme } ;
289294 }
290295
291296 // Everything else (IPs, localhost, private IPs) goes through allow list logic
292- return { parsedUrl, reason : '' } ;
297+ return { parsedUrl, reason : '' , hadScheme } ;
293298 } catch ( error ) {
294299 // Provide specific error information for debugging
295300 const errorName = error instanceof Error ? error . name : 'Error' ;
296301 const errorMessage = error instanceof Error ? error . message : String ( error ) ;
297- return { parsedUrl : null , reason : `URL parsing error: ${ errorName } : ${ errorMessage } ` } ;
302+ return { parsedUrl : null , reason : `URL parsing error: ${ errorName } : ${ errorMessage } ` , hadScheme : false } ;
298303 }
299304}
300305
@@ -328,8 +333,13 @@ function isIpv4Address(value: string): boolean {
328333
329334/**
330335 * Check if URL is allowed based on the allow list configuration.
336+ *
337+ * @param parsedUrl - The parsed URL to check
338+ * @param allowList - List of allowed URL patterns
339+ * @param allowSubdomains - Whether to allow subdomains
340+ * @param hadScheme - Whether the original URL had an explicit scheme
331341 */
332- function isUrlAllowed ( parsedUrl : URL , allowList : string [ ] , allowSubdomains : boolean ) : boolean {
342+ function isUrlAllowed ( parsedUrl : URL , allowList : string [ ] , allowSubdomains : boolean , hadScheme : boolean ) : boolean {
333343 if ( allowList . length === 0 ) {
334344 return false ;
335345 }
@@ -395,11 +405,14 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool
395405 continue ;
396406 }
397407
398- if ( hasExplicitScheme && allowedScheme && allowedScheme !== schemeLower ) {
408+ // Scheme matching for IPs: only enforce when BOTH allow list entry AND URL have explicit schemes
409+ if ( hasExplicitScheme && hadScheme && allowedScheme && allowedScheme !== schemeLower ) {
399410 continue ;
400411 }
401412
402- if ( allowedPort !== null ) {
413+ // Port matching: only enforce when allow list entry explicitly specifies a port
414+ // Check parsedAllowed.port (empty string when no port specified) not allowedPort (always has default)
415+ if ( parsedAllowed . port ) {
403416 if ( urlPort === null || allowedPort !== urlPort ) {
404417 continue ;
405418 }
@@ -435,7 +448,9 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool
435448
436449 const allowedDomain = allowedHost . replace ( / ^ w w w \. / , '' ) ;
437450
438- if ( allowedPort !== null ) {
451+ // Port matching: only enforce when allow list entry explicitly specifies a port
452+ // Check parsedAllowed.port (empty string when no port specified) not allowedPort (always has default)
453+ if ( parsedAllowed . port ) {
439454 if ( urlPort === null || allowedPort !== urlPort ) {
440455 continue ;
441456 }
@@ -447,7 +462,8 @@ function isUrlAllowed(parsedUrl: URL, allowList: string[], allowSubdomains: bool
447462 continue ;
448463 }
449464
450- if ( hasExplicitScheme && allowedScheme && allowedScheme !== schemeLower ) {
465+ // Scheme matching for domains: only enforce when BOTH allow list entry AND URL have explicit schemes
466+ if ( hasExplicitScheme && hadScheme && allowedScheme && allowedScheme !== schemeLower ) {
451467 continue ;
452468 }
453469
@@ -492,7 +508,7 @@ export const urls: CheckFn<UrlsContext, string, UrlsConfig> = async (ctx, data,
492508
493509 for ( const urlString of detectedUrls ) {
494510 // Validate URL with security checks
495- const { parsedUrl, reason } = validateUrlSecurity ( urlString , actualConfig ) ;
511+ const { parsedUrl, reason, hadScheme } = validateUrlSecurity ( urlString , actualConfig ) ;
496512
497513 if ( parsedUrl === null ) {
498514 blocked . push ( urlString ) ;
@@ -509,7 +525,7 @@ export const urls: CheckFn<UrlsContext, string, UrlsConfig> = async (ctx, data,
509525 // They were already validated for scheme permission in validateUrlSecurity
510526 allowed . push ( urlString ) ;
511527 } else if (
512- isUrlAllowed ( parsedUrl , actualConfig . url_allow_list , actualConfig . allow_subdomains )
528+ isUrlAllowed ( parsedUrl , actualConfig . url_allow_list , actualConfig . allow_subdomains , hadScheme )
513529 ) {
514530 allowed . push ( urlString ) ;
515531 } else {
0 commit comments