File: //proc/thread-self/root/usr/local/modsecurity-crs/regex-assembly/934120.ra
##! Please refer to the documentation at
##! https://coreruleset.org/docs/development/regex_assembly/.
##! Bypasses and techniques here come from:
##! - https://github.com/swisskyrepo/PayloadsAllTheThings/tree/master/Server%20Side%20Request%20Forgery
##! - https://github.com/cujanovic/SSRF-Testing
##!+ i
##! This regex starts with a list of all the schemes that can be used to make a request
##!> assemble
##!> include url-schemes
##!=>
://
##!=>
##! http://425.510.425.510/ Dotted decimal with overflow (already covered by RFI rule 931100)
##! http://2852039166/ Dotless decimal
##! http://7147006462/ Dotless decimal with overflow
\d{10}
##! http://0xA9.0xFE.0xA9.0xFE/ Dotted hexadecimal
(?:0x[a-f0-9]{2}\.){3}0x[a-f0-9]{2}
##! http://0xA9FEA9FE/ Dotless hexadecimal
0x[a-f0-9]{8}
##! http://0x41414141A9FEA9FE/ Dotless hexadecimal with overflow
0x[a-f0-9]{16}
##! http://0251.0376.0251.0376/ Dotted octal
##! http://0251.00376.000251.0000376/ Dotted octal with padding
(?:0{1,4}\d{1,3}\.){3}0{1,4}\d{1,3}
##! http://169.254.43518/
\d{1,3}\.\d{1,3}\.\d{5}
##! http://169.16689662/
\d{1,3}\.\d{8}
##! glibc Name Service Switch abuse
##! http://\\l\\o\\c\\a\\l\\h\\o\\s\\t (while underscore is not valid RFC syntax, it is allowed and might be used)
(?:\x5c\x5c[a-z\d-]\.?_?)+
##! http://[::ffff:a9fe:a9fe] IPV6 Compressed - IPv6 (base regex from https://ihateregex.io/expr/ipv6/, with [0-9] converted to \d and with non-capturing groups (below))
##! http://[0:0:0:0:0:ffff:a9fe:a9fe] IPV6 Expanded
##! http://[fe80::%zone1] link-local unicast with zone ID
##! http://[0:0:0:0:0:ffff:169.254.169.254] IPV6/IPV4
##! http://[::] the unspecified address
##! Something that looks like IPv6 in a URL.
##! Matches full and compressed IPv6, link-local IPv6 with
##! zone ID, and embedded IPv4.
##! We could match the IPv6 specification here but that would
##! decrease performance of the regular expression and would
##! actually increase the possibility for bypasses.
\[[a-f\d:]+(?:[\d.]+|%\w+)?\]
##! These come from https://github.com/cujanovic/SSRF-Testing
##! These bypasses work by confusing URL parsers in different languages (e.g., PHP, Python, Ruby, Perl)
##! and libraries (e.g. cURL). The bypasses are parser specific but will often be combined to break
##! multiple parsers with one try. The goal is often to get the application to call another library
##! with the malicious URL, e.g. libcurl or glibc (name resolution via gethostbyname(), see also
##! Name Service Switch abuse above).
##! http://127.88.23.245:22/+&@google.com:80#+@google.com:80/ (already covered by RFI rule 931100)
##! http://127.88.23.245:22/?@google.com:80/ (already covered by RFI rule 931100)
##! http://127.88.23.245:22/#@www.google.com:80/ (already covered by RFI rule 931100)
##! http://google.com:80\\@127.88.23.245:22/ (already covered by RFI rule 931100)
##! http://google.com:80+&@127.88.23.245:22/#+@google.com:80/
##! http://google.com:80+&@google.com:80#+@127.88.23.245:22/
##! create ip-or-domain for later use
##!> assemble
(?:\d{1,3}\.){3,3}\d{1,3}
[a-z][\w\-\.]{1,255}
##!=>
:\d{1,5}
##!=< ip-or-domain
##!<
##!> assemble
##! domain + port
[a-z][\w\-\.]{1,255}:\d{1,5}
##!=>
##! at least one of the evasion techniques
##!> assemble
##! technique 1
##!> assemble
##! possible white spaces to fool safety checks in URL parsers
\s*
##!=>
##! &@ to confuse URL parsers (& can indicate query parameter, @ indicates user info)
&?@
##!=>
##! IPv4 + port or domain + port
##!=> ip-or-domain
##! optional forward slash
\/?
##!=>
##!<
##! technique 2
##!> assemble
##! fragment to confuse URL parsers
#
##!=>
##! possible white spaces to fool safety checks in URL parsers
\s*
##!=>
##! &@ to confuse URL parsers (& can indicate query parameter, @ indicates user info)
&?@
##!=>
##! IPv4 + port or domain + port
##!=> ip-or-domain
##! optional forward slash
/?
##!=>
##!<
##!<
##!=>
+
##!=>
##!<
##! Enclosed alphanumerics are used for evasion (https://en.wikipedia.org/wiki/Enclosed_Alphanumerics).
##! See also https://github.com/cujanovic/SSRF-Testing.
##! These will normally sound many alarms, but having them flagged as ssrf attempt makes sense
##!> assemble
##! ⓪,①,②,③,④,⑤,⑥,⑦,⑧,⑨,⑩,⑪,⑫,⑬,⑭,⑮,⑯,⑰,⑱,⑲,⑳
\xe2\x91[\xaa\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3]
##! ⑴,⑵,⑶,⑷,⑸,⑹,⑺,⑻,⑼,⑽,⑾,⑿
\xe2\x91[\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf]
##! ⒀,⒁,⒂,⒃,⒄,⒅,⒆,⒇
\xe2\x92[\x80\x81\x82\x83\x84\x85\x86\x87]
##! ⒈,⒉,⒊,⒋,⒌,⒍,⒎,⒏,⒐,⒑,⒒,⒓,⒔,⒕,⒖,⒗,⒘,⒙,⒚,⒛
\xe2\x92[\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b]
##! ⒜,⒝,⒞,⒟,⒠,⒡,⒢,⒣,⒤,⒥,⒦,⒧,⒨,⒩,⒪,⒫,⒬,⒭,⒮,⒯,⒰,⒱,⒲,⒳,⒴,⒵
\xe2\x92[\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5]
##! Ⓐ,Ⓑ,Ⓒ,Ⓓ,Ⓔ,Ⓕ,Ⓖ,Ⓗ,Ⓘ,Ⓙ
\xe2\x92[\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf]
##! Ⓚ,Ⓛ,Ⓜ,Ⓝ,Ⓞ,Ⓟ,Ⓠ,Ⓡ,Ⓢ,Ⓣ,Ⓤ,Ⓥ,Ⓦ,Ⓧ,Ⓨ,Ⓩ
\xe2\x93[\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f]
##! ⓐ,ⓑ,ⓒ,ⓓ,ⓔ,ⓕ,ⓖ,ⓗ,ⓘ,ⓙ,ⓚ,ⓛ
\xe2\x93[\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b]
##! ⓜ,ⓝ,ⓞ,ⓟ,ⓠ,ⓡ,ⓢ,ⓣ,ⓤ,ⓥ,ⓦ,ⓧ,ⓨ,ⓩ
\xe2\x93[\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9]
##! ⓫,⓬,⓭,⓮,⓯,⓰,⓱,⓲,⓳,⓴
\xe2\x93[\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4]
##! ⓿,⓵,⓶,⓷,⓸,⓹,⓺,⓻,⓼,⓽,⓾
\xe2\x93[\xbf\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe]
##! ideographic full stop: 。
\xe3\x80\x82
##!=< enclosed-alnums
##!<
##! an IP could start with digits and dots
##!> assemble
[\d.]{0,11}
##!=>
##!> assemble
##!=> enclosed-alnums
##!<
##!=>
##! match all for capture
+
##!=>
##!<
##!<