|
| 1 | +import re |
| 2 | +import gixy |
| 3 | +from gixy.plugins.plugin import Plugin |
| 4 | +from gixy.core.regexp import Regexp |
| 5 | + |
| 6 | + |
| 7 | +class invalid_regex(Plugin): |
| 8 | + """ |
| 9 | + Detects when a directive references a regex capture group ($1, $2, etc.) |
| 10 | + that doesn't exist in the associated regex pattern. |
| 11 | + |
| 12 | + Insecure examples: |
| 13 | + rewrite "(?i)/" $1 break; # (?i) is a non-capturing flag, no groups exist |
| 14 | + rewrite "^/path" $1 redirect; # No capturing groups in pattern |
| 15 | + if ($uri ~ "^/test") { set $x $1; } # No capturing groups in pattern |
| 16 | + """ |
| 17 | + |
| 18 | + summary = 'Using a nonexistent regex capture group.' |
| 19 | + severity = gixy.severity.MEDIUM |
| 20 | + description = 'Referencing a capture group (like $1, $2) that does not exist in the regex pattern will result in an empty value.' |
| 21 | + help_url = 'https://nginx.org/en/docs/http/ngx_http_rewrite_module.html' |
| 22 | + directives = ['rewrite', 'set'] |
| 23 | + |
| 24 | + # Pattern to find $1, $2, etc. references in strings |
| 25 | + CAPTURE_GROUP_REF = re.compile(r'\$([1-9]\d*)') |
| 26 | + |
| 27 | + def audit(self, directive): |
| 28 | + if directive.name == 'rewrite': |
| 29 | + self._audit_rewrite(directive) |
| 30 | + elif directive.name == 'set': |
| 31 | + self._audit_set(directive) |
| 32 | + |
| 33 | + def _audit_rewrite(self, directive): |
| 34 | + """Audit rewrite directives for invalid group references.""" |
| 35 | + if len(directive.args) < 2: |
| 36 | + return |
| 37 | + |
| 38 | + pattern = directive.args[0] |
| 39 | + replacement = directive.args[1] |
| 40 | + |
| 41 | + # Find all referenced capture groups in the replacement string |
| 42 | + referenced_groups = set() |
| 43 | + for match in self.CAPTURE_GROUP_REF.finditer(replacement): |
| 44 | + referenced_groups.add(int(match.group(1))) |
| 45 | + |
| 46 | + if not referenced_groups: |
| 47 | + return |
| 48 | + |
| 49 | + # Parse the regex to determine available groups |
| 50 | + try: |
| 51 | + regexp = Regexp(pattern, case_sensitive=True) |
| 52 | + available_groups = set(regexp.groups.keys()) |
| 53 | + # Remove group 0 (the full match) from available groups |
| 54 | + available_groups.discard(0) |
| 55 | + except Exception: |
| 56 | + # If we can't parse the regex, skip this check |
| 57 | + return |
| 58 | + |
| 59 | + # Check for referenced groups that don't exist |
| 60 | + invalid_groups = referenced_groups - available_groups |
| 61 | + |
| 62 | + if invalid_groups: |
| 63 | + invalid_list = ', '.join(f'${g}' for g in sorted(invalid_groups)) |
| 64 | + if len(available_groups) == 0: |
| 65 | + reason = ( |
| 66 | + f'The replacement string references capture group(s) {invalid_list}, ' |
| 67 | + f'but the pattern "{pattern}" has no capturing groups.' |
| 68 | + ) |
| 69 | + else: |
| 70 | + available_list = ', '.join(f'${g}' for g in sorted(available_groups)) |
| 71 | + reason = ( |
| 72 | + f'The replacement string references capture group(s) {invalid_list}, ' |
| 73 | + f'but the pattern "{pattern}" only has {available_list}.' |
| 74 | + ) |
| 75 | + |
| 76 | + self.add_issue( |
| 77 | + directive=directive, |
| 78 | + reason=reason |
| 79 | + ) |
| 80 | + |
| 81 | + def _audit_set(self, directive): |
| 82 | + """Audit set directives that may reference regex groups from parent if blocks.""" |
| 83 | + if len(directive.args) < 2: |
| 84 | + return |
| 85 | + |
| 86 | + value = directive.args[1] |
| 87 | + |
| 88 | + # Find all referenced capture groups |
| 89 | + referenced_groups = set() |
| 90 | + for match in self.CAPTURE_GROUP_REF.finditer(value): |
| 91 | + referenced_groups.add(int(match.group(1))) |
| 92 | + |
| 93 | + if not referenced_groups: |
| 94 | + return |
| 95 | + |
| 96 | + # Check if this set is inside an if block with a regex |
| 97 | + parent = directive.parent |
| 98 | + if_directive = None |
| 99 | + |
| 100 | + while parent and not if_directive: |
| 101 | + if hasattr(parent, 'name') and parent.name == 'if': |
| 102 | + if_directive = parent |
| 103 | + break |
| 104 | + parent = getattr(parent, 'parent', None) |
| 105 | + |
| 106 | + if not if_directive: |
| 107 | + # Not in an if block, can't determine regex context |
| 108 | + return |
| 109 | + |
| 110 | + # Check if the if condition has a regex operator |
| 111 | + if not hasattr(if_directive, 'args') or len(if_directive.args) < 3: |
| 112 | + return |
| 113 | + |
| 114 | + operator = if_directive.args[1] |
| 115 | + if operator not in ['~', '~*']: |
| 116 | + return |
| 117 | + |
| 118 | + pattern = if_directive.args[2] |
| 119 | + |
| 120 | + # Parse the regex to determine available groups |
| 121 | + try: |
| 122 | + regexp = Regexp(pattern, case_sensitive=(operator == '~')) |
| 123 | + available_groups = set(regexp.groups.keys()) |
| 124 | + available_groups.discard(0) |
| 125 | + except Exception: |
| 126 | + return |
| 127 | + |
| 128 | + # Check for referenced groups that don't exist |
| 129 | + invalid_groups = referenced_groups - available_groups |
| 130 | + |
| 131 | + if invalid_groups: |
| 132 | + invalid_list = ', '.join(f'${g}' for g in sorted(invalid_groups)) |
| 133 | + if len(available_groups) == 0: |
| 134 | + reason = ( |
| 135 | + f'The set directive references capture group(s) {invalid_list}, ' |
| 136 | + f'but the if condition pattern "{pattern}" has no capturing groups.' |
| 137 | + ) |
| 138 | + else: |
| 139 | + available_list = ', '.join(f'${g}' for g in sorted(available_groups)) |
| 140 | + reason = ( |
| 141 | + f'The set directive references capture group(s) {invalid_list}, ' |
| 142 | + f'but the if condition pattern "{pattern}" only has {available_list}.' |
| 143 | + ) |
| 144 | + |
| 145 | + self.add_issue( |
| 146 | + directive=[directive, if_directive], |
| 147 | + reason=reason |
| 148 | + ) |
0 commit comments