RegexHelper
in package
FinalYes
Provides regular expressions and utilities for parsing Markdown
All of the PARTIAL_ regex constants assume that they'll be used in case-insensitive searches All other complete regexes provided by this class (either via constants or methods) will have case-insensitivity enabled.
Tags
Table of Contents
Constants
- PARTIAL_ATTRIBUTE = '(?:' . '\s+' . self::PARTIAL_ATTRIBUTENAME . self::PARTIAL_ATTRIBUTEVALUESPEC . '?)'
- PARTIAL_ATTRIBUTENAME = '[a-z_:][a-z0-9:._-]*'
- PARTIAL_ATTRIBUTEVALUE = '(?:' . self::PARTIAL_UNQUOTEDVALUE . '|' . self::PARTIAL_SINGLEQUOTEDVALUE . '|' . self::PARTIAL_DOUBLEQUOTEDVALUE . ')'
- PARTIAL_ATTRIBUTEVALUESPEC = '(?:' . '\s*=' . '\s*' . self::PARTIAL_ATTRIBUTEVALUE . ')'
- PARTIAL_BLOCKTAGNAME = '(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|search|section|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
- PARTIAL_CDATA = '<!\[CDATA\[[\s\S]*?]\]>'
- PARTIAL_CLOSEBLOCKTAG = '<\/' . self::PARTIAL_BLOCKTAGNAME . '\s*[>]'
- PARTIAL_CLOSETAG = '<\/' . self::PARTIAL_TAGNAME . '\s*[>]'
- PARTIAL_DECLARATION = '<![A-Za-z]+' . '[^>]*>'
- PARTIAL_DOUBLEQUOTEDVALUE = '"[^"]*"'
- PARTIAL_ENTITY = '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});'
- PARTIAL_ESCAPABLE = '[!"#$%&\'()*+,.\/:;<=>?@[\\\\\\]^_`{|}~-]'
- PARTIAL_ESCAPED_CHAR = '\\\\' . self::PARTIAL_ESCAPABLE
- PARTIAL_HTMLBLOCKOPEN = '<(?:' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s\/>]|$)' . '|' . '\/' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s>]|$)' . '|' . '[?!])'
- PARTIAL_HTMLCOMMENT = '<!-->|<!--->|<!--[\s\S]*?-->'
- PARTIAL_HTMLTAG = '(?:' . self::PARTIAL_OPENTAG . '|' . self::PARTIAL_CLOSETAG . '|' . self::PARTIAL_HTMLCOMMENT . '|' . self::PARTIAL_PROCESSINGINSTRUCTION . '|' . self::PARTIAL_DECLARATION . '|' . self::PARTIAL_CDATA . ')'
- PARTIAL_IN_DOUBLE_QUOTES = '"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*"'
- PARTIAL_IN_PARENS = '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^)\x00])*\)'
- PARTIAL_IN_PARENS_NOSP = '\((' . self::PARTIAL_REG_CHAR . '|' . self::PARTIAL_ESCAPED_CHAR . '|\\\\)*\)'
- PARTIAL_IN_SINGLE_QUOTES = '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*\''
- PARTIAL_LINK_TITLE = '^(?:"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*+"' . '|' . '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*+\'' . '|' . '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*+\))'
- PARTIAL_OPENBLOCKTAG = '<' . self::PARTIAL_BLOCKTAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>'
- PARTIAL_OPENTAG = '<' . self::PARTIAL_TAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>'
- PARTIAL_PROCESSINGINSTRUCTION = '[<][?][\s\S]*?[?][>]'
- PARTIAL_REG_CHAR = '[^\\\\()\x00-\x20]'
- PARTIAL_SINGLEQUOTEDVALUE = '\'[^\']*\''
- PARTIAL_TAGNAME = '[a-z][a-z0-9-]*'
- PARTIAL_UNQUOTEDVALUE = '[^"\'=<>`\x00-\x20]+'
- REGEX_LINK_DESTINATION_BRACES = '/^(?:<(?:[^<>\n\\\\\\x00]|\\\\.)*>)/'
- REGEX_NON_SPACE = '/[^ \t\f\v\r\n]/'
- REGEX_PUNCTUATION = '/^[!"#$%&\'()*+,\-.\/:;<=>?@\[\]\\\\^_`{|}~\p{P}\p{S}]/u'
- REGEX_SAFE_DATA_PROTOCOL = '/^data:image\/(?:png|gif|jpeg|webp)/i'
- REGEX_THEMATIC_BREAK = '/^(?:\*[ \t]*){3,}$|^(?:_[ \t]*){3,}$|^(?:-[ \t]*){3,}$/'
- REGEX_UNICODE_WHITESPACE_CHAR = '/^\pZ|\s/u'
- REGEX_UNSAFE_PROTOCOL = '/^javascript:|vbscript:|file:|data:/i'
- REGEX_WHITESPACE_CHAR = '/^[ \t\n\x0b\x0c\x0d]/'
Methods
- isEscapable() : bool
- isLetter() : bool
- isLinkPotentiallyUnsafe() : bool
- isWhitespace() : bool
- matchAt() : int|null
- Attempt to match a regex in string s at offset offset
- matchFirst() : array<string|int, string>|null
- Functional wrapper around preg_match_all which only returns the first set of matches
- unescape() : string
- Replace backslash escapes with literal characters
Constants
PARTIAL_ATTRIBUTE
public
mixed
PARTIAL_ATTRIBUTE
= '(?:' . '\s+' . self::PARTIAL_ATTRIBUTENAME . self::PARTIAL_ATTRIBUTEVALUESPEC . '?)'
PARTIAL_ATTRIBUTENAME
public
mixed
PARTIAL_ATTRIBUTENAME
= '[a-z_:][a-z0-9:._-]*'
PARTIAL_ATTRIBUTEVALUE
public
mixed
PARTIAL_ATTRIBUTEVALUE
= '(?:' . self::PARTIAL_UNQUOTEDVALUE . '|' . self::PARTIAL_SINGLEQUOTEDVALUE . '|' . self::PARTIAL_DOUBLEQUOTEDVALUE . ')'
PARTIAL_ATTRIBUTEVALUESPEC
public
mixed
PARTIAL_ATTRIBUTEVALUESPEC
= '(?:' . '\s*=' . '\s*' . self::PARTIAL_ATTRIBUTEVALUE . ')'
PARTIAL_BLOCKTAGNAME
public
mixed
PARTIAL_BLOCKTAGNAME
= '(?:address|article|aside|base|basefont|blockquote|body|caption|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption|figure|footer|form|frame|frameset|h1|head|header|hr|html|iframe|legend|li|link|main|menu|menuitem|nav|noframes|ol|optgroup|option|p|param|search|section|summary|table|tbody|td|tfoot|th|thead|title|tr|track|ul)'
PARTIAL_CDATA
public
mixed
PARTIAL_CDATA
= '<!\[CDATA\[[\s\S]*?]\]>'
PARTIAL_CLOSEBLOCKTAG
public
mixed
PARTIAL_CLOSEBLOCKTAG
= '<\/' . self::PARTIAL_BLOCKTAGNAME . '\s*[>]'
PARTIAL_CLOSETAG
public
mixed
PARTIAL_CLOSETAG
= '<\/' . self::PARTIAL_TAGNAME . '\s*[>]'
PARTIAL_DECLARATION
public
mixed
PARTIAL_DECLARATION
= '<![A-Za-z]+' . '[^>]*>'
PARTIAL_DOUBLEQUOTEDVALUE
public
mixed
PARTIAL_DOUBLEQUOTEDVALUE
= '"[^"]*"'
PARTIAL_ENTITY
public
mixed
PARTIAL_ENTITY
= '&(?:#x[a-f0-9]{1,6}|#[0-9]{1,7}|[a-z][a-z0-9]{1,31});'
PARTIAL_ESCAPABLE
public
mixed
PARTIAL_ESCAPABLE
= '[!"#$%&\'()*+,.\/:;<=>?@[\\\\\\]^_`{|}~-]'
PARTIAL_ESCAPED_CHAR
public
mixed
PARTIAL_ESCAPED_CHAR
= '\\\\' . self::PARTIAL_ESCAPABLE
PARTIAL_HTMLBLOCKOPEN
public
mixed
PARTIAL_HTMLBLOCKOPEN
= '<(?:' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s\/>]|$)' . '|' . '\/' . self::PARTIAL_BLOCKTAGNAME . '(?:[\s>]|$)' . '|' . '[?!])'
PARTIAL_HTMLCOMMENT
public
mixed
PARTIAL_HTMLCOMMENT
= '<!-->|<!--->|<!--[\s\S]*?-->'
PARTIAL_HTMLTAG
public
mixed
PARTIAL_HTMLTAG
= '(?:' . self::PARTIAL_OPENTAG . '|' . self::PARTIAL_CLOSETAG . '|' . self::PARTIAL_HTMLCOMMENT . '|' . self::PARTIAL_PROCESSINGINSTRUCTION . '|' . self::PARTIAL_DECLARATION . '|' . self::PARTIAL_CDATA . ')'
PARTIAL_IN_DOUBLE_QUOTES
public
mixed
PARTIAL_IN_DOUBLE_QUOTES
= '"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*"'
PARTIAL_IN_PARENS
public
mixed
PARTIAL_IN_PARENS
= '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^)\x00])*\)'
PARTIAL_IN_PARENS_NOSP
public
mixed
PARTIAL_IN_PARENS_NOSP
= '\((' . self::PARTIAL_REG_CHAR . '|' . self::PARTIAL_ESCAPED_CHAR . '|\\\\)*\)'
PARTIAL_IN_SINGLE_QUOTES
public
mixed
PARTIAL_IN_SINGLE_QUOTES
= '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*\''
PARTIAL_LINK_TITLE
public
mixed
PARTIAL_LINK_TITLE
= '^(?:"(' . self::PARTIAL_ESCAPED_CHAR . '|[^"\x00])*+"' . '|' . '\'(' . self::PARTIAL_ESCAPED_CHAR . '|[^\'\x00])*+\'' . '|' . '\((' . self::PARTIAL_ESCAPED_CHAR . '|[^()\x00])*+\))'
PARTIAL_OPENBLOCKTAG
public
mixed
PARTIAL_OPENBLOCKTAG
= '<' . self::PARTIAL_BLOCKTAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>'
PARTIAL_OPENTAG
public
mixed
PARTIAL_OPENTAG
= '<' . self::PARTIAL_TAGNAME . self::PARTIAL_ATTRIBUTE . '*' . '\s*\/?>'
PARTIAL_PROCESSINGINSTRUCTION
public
mixed
PARTIAL_PROCESSINGINSTRUCTION
= '[<][?][\s\S]*?[?][>]'
PARTIAL_REG_CHAR
public
mixed
PARTIAL_REG_CHAR
= '[^\\\\()\x00-\x20]'
PARTIAL_SINGLEQUOTEDVALUE
public
mixed
PARTIAL_SINGLEQUOTEDVALUE
= '\'[^\']*\''
PARTIAL_TAGNAME
public
mixed
PARTIAL_TAGNAME
= '[a-z][a-z0-9-]*'
PARTIAL_UNQUOTEDVALUE
public
mixed
PARTIAL_UNQUOTEDVALUE
= '[^"\'=<>`\x00-\x20]+'
REGEX_LINK_DESTINATION_BRACES
public
mixed
REGEX_LINK_DESTINATION_BRACES
= '/^(?:<(?:[^<>\n\\\\\\x00]|\\\\.)*>)/'
REGEX_NON_SPACE
public
mixed
REGEX_NON_SPACE
= '/[^ \t\f\v\r\n]/'
REGEX_PUNCTUATION
public
mixed
REGEX_PUNCTUATION
= '/^[!"#$%&\'()*+,\-.\/:;<=>?@\[\]\\\\^_`{|}~\p{P}\p{S}]/u'
REGEX_SAFE_DATA_PROTOCOL
public
mixed
REGEX_SAFE_DATA_PROTOCOL
= '/^data:image\/(?:png|gif|jpeg|webp)/i'
REGEX_THEMATIC_BREAK
public
mixed
REGEX_THEMATIC_BREAK
= '/^(?:\*[ \t]*){3,}$|^(?:_[ \t]*){3,}$|^(?:-[ \t]*){3,}$/'
REGEX_UNICODE_WHITESPACE_CHAR
public
mixed
REGEX_UNICODE_WHITESPACE_CHAR
= '/^\pZ|\s/u'
REGEX_UNSAFE_PROTOCOL
public
mixed
REGEX_UNSAFE_PROTOCOL
= '/^javascript:|vbscript:|file:|data:/i'
REGEX_WHITESPACE_CHAR
public
mixed
REGEX_WHITESPACE_CHAR
= '/^[ \t\n\x0b\x0c\x0d]/'
Methods
isEscapable()
public
static isEscapable(string $character) : bool
Parameters
- $character : string
Tags
Return values
boolisLetter()
public
static isLetter(string|null $character) : bool
Parameters
- $character : string|null
Tags
Return values
boolisLinkPotentiallyUnsafe()
public
static isLinkPotentiallyUnsafe(string $url) : bool
Parameters
- $url : string
Tags
Return values
boolisWhitespace()
public
static isWhitespace(string $character) : bool
Parameters
- $character : string
Return values
boolmatchAt()
Attempt to match a regex in string s at offset offset
public
static matchAt(string $regex, string $string[, int $offset = 0 ]) : int|null
Parameters
- $regex : string
- $string : string
- $offset : int = 0
Tags
Return values
int|null —Index of match, or null
matchFirst()
Functional wrapper around preg_match_all which only returns the first set of matches
public
static matchFirst(string $pattern, string $subject[, int $offset = 0 ]) : array<string|int, string>|null
Parameters
- $pattern : string
- $subject : string
- $offset : int = 0
Tags
Return values
array<string|int, string>|nullunescape()
Replace backslash escapes with literal characters
public
static unescape(string $string) : string
Parameters
- $string : string