3232final class RegexHelper
3333{
3434 // Partial regular expressions (wrap with `/` on each side and add the case-insensitive `i` flag before use)
35- public const PARTIAL_ENTITY = '&(?: #x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31}); ' ;
35+ public const PARTIAL_ENTITY = '&(?> #x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31}); ' ;
3636 public const PARTIAL_ESCAPABLE = '[!"#$%& \'()*+,.\/:;<=>?@[ \\\\\]^_`{|}~-] ' ;
3737 public const PARTIAL_ESCAPED_CHAR = '\\\\' . self ::PARTIAL_ESCAPABLE ;
3838 public const PARTIAL_IN_DOUBLE_QUOTES = '"( ' . self ::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*" ' ;
@@ -49,11 +49,11 @@ final class RegexHelper
4949 public const PARTIAL_ATTRIBUTEVALUE = '(?: ' . self ::PARTIAL_UNQUOTEDVALUE . '| ' . self ::PARTIAL_SINGLEQUOTEDVALUE . '| ' . self ::PARTIAL_DOUBLEQUOTEDVALUE . ') ' ;
5050 public const PARTIAL_ATTRIBUTEVALUESPEC = '(?: ' . '\s*= ' . '\s* ' . self ::PARTIAL_ATTRIBUTEVALUE . ') ' ;
5151 public const PARTIAL_ATTRIBUTE = '(?: ' . '\s+ ' . self ::PARTIAL_ATTRIBUTENAME . self ::PARTIAL_ATTRIBUTEVALUESPEC . '?) ' ;
52- public const PARTIAL_OPENTAG = '< ' . self ::PARTIAL_TAGNAME . self ::PARTIAL_ATTRIBUTE . '* ' . '\s*\/?> ' ;
53- public const PARTIAL_CLOSETAG = '<\/ ' . self ::PARTIAL_TAGNAME . '\s*[>] ' ;
54- public const PARTIAL_OPENBLOCKTAG = '< ' . self ::PARTIAL_BLOCKTAGNAME . self ::PARTIAL_ATTRIBUTE . '* ' . '\s*\/?> ' ;
55- public const PARTIAL_CLOSEBLOCKTAG = '<\/ ' . self ::PARTIAL_BLOCKTAGNAME . '\s*[>] ' ;
56- public const PARTIAL_HTMLCOMMENT = '<!-->|<!--->|<!--[\s\S]*?--> ' ;
52+ public const PARTIAL_OPENTAG = '< ' . self ::PARTIAL_TAGNAME . self ::PARTIAL_ATTRIBUTE . '*+ ' . '\s*+ \/?+ > ' ;
53+ public const PARTIAL_CLOSETAG = '<\/ ' . self ::PARTIAL_TAGNAME . '\s*+ [>] ' ;
54+ public const PARTIAL_OPENBLOCKTAG = '< ' . self ::PARTIAL_BLOCKTAGNAME . self ::PARTIAL_ATTRIBUTE . '*+ ' . '\s*+ \/?+ > ' ;
55+ public const PARTIAL_CLOSEBLOCKTAG = '<\/ ' . self ::PARTIAL_BLOCKTAGNAME . '\s*+ [>] ' ;
56+ public const PARTIAL_HTMLCOMMENT = '(?: <!-->|<!--->|<!--[\s\S]*?-->) ' ;
5757 public const PARTIAL_PROCESSINGINSTRUCTION = '[<][?][\s\S]*?[?][>] ' ;
5858 public const PARTIAL_DECLARATION = '<![A-Za-z]+ ' . '[^>]*> ' ;
5959 public const PARTIAL_CDATA = '<!\[CDATA\[[\s\S]*?]\]> ' ;
@@ -65,14 +65,14 @@ final class RegexHelper
6565 '| ' . '\'( ' . self ::PARTIAL_ESCAPED_CHAR . '|[^ \'\x00])*+ \'' .
6666 '| ' . '\(( ' . self ::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*+\)) ' ;
6767
68- public const REGEX_PUNCTUATION = '/^[!"#$%& \' ()*+,\-. \\ /:;<=>?@ \\ [ \\ ] \\\\ ^_`{|}~ \p{P}\p{S}]/u ' ;
68+ public const REGEX_PUNCTUATION = '/^[\p{P}\p{S}]/u ' ;
6969 public const REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i ' ;
7070 public const REGEX_SAFE_DATA_PROTOCOL = '/^data:image\/(?:png|gif|jpeg|webp)/i ' ;
7171 public const REGEX_NON_SPACE = '/[^ \t\f\v\r\n]/ ' ;
7272
7373 public const REGEX_WHITESPACE_CHAR = '/^[ \t\n\x0b\x0c\x0d]/ ' ;
7474 public const REGEX_UNICODE_WHITESPACE_CHAR = '/^\pZ|\s/u ' ;
75- public const REGEX_THEMATIC_BREAK = '/^(?:\*[ \t]*){3,}$|^ (?:_[ \t]*){3,}$|^ (?:-[ \t]*){3,}$/ ' ;
75+ public const REGEX_THEMATIC_BREAK = '/^(?:(?: \*[ \t]*){3,}| (?:_[ \t]*){3,}| (?:-[ \t]*){3,}) $/ ' ;
7676 public const REGEX_LINK_DESTINATION_BRACES = '/^(?:<(?:[^<> \\n \\\\\\x00]| \\\\.)*>)/ ' ;
7777
7878 /**
@@ -192,7 +192,7 @@ public static function getHtmlBlockOpenRegex(int $type): string
192192 case HtmlBlock::TYPE_5_CDATA :
193193 return '/^<!\[CDATA\[/i ' ;
194194 case HtmlBlock::TYPE_6_BLOCK_ELEMENT :
195- return '%^<[/]? (?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s|[/]?[>]|$)%i ' ;
195+ return '%^</?+ (?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h[123456]|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)(?:\s++ |[/]?+ [>]|$)%i ' ;
196196 case HtmlBlock::TYPE_7_MISC_ELEMENT :
197197 return '/^(?: ' . self ::PARTIAL_OPENTAG . '| ' . self ::PARTIAL_CLOSETAG . ') \\s*$/i ' ;
198198 default :
0 commit comments