2017-09-14 12:03:20 -04:00
|
|
|
module Twitter
|
|
|
|
class Regex
|
|
|
|
|
|
|
|
REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou
|
2018-02-11 17:49:18 -05:00
|
|
|
REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
|
2017-09-14 12:03:20 -04:00
|
|
|
REGEXEN[:valid_url_balanced_parens] = /
|
|
|
|
\(
|
|
|
|
(?:
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}+
|
|
|
|
|
|
|
|
|
# allow one nested level of balanced parentheses
|
|
|
|
(?:
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
|
|
|
\(
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}+
|
|
|
|
\)
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
|
|
|
)
|
|
|
|
)
|
|
|
|
\)
|
|
|
|
/iox
|
|
|
|
REGEXEN[:valid_url_path] = /(?:
|
|
|
|
(?:
|
|
|
|
#{REGEXEN[:valid_general_url_path_chars]}*
|
|
|
|
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
|
|
|
|
#{REGEXEN[:valid_url_path_ending_chars]}
|
|
|
|
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
|
|
|
|
)/iox
|
|
|
|
REGEXEN[:valid_url] = %r{
|
|
|
|
( # $1 total match
|
|
|
|
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter
|
|
|
|
( # $3 URL
|
2018-06-15 14:21:47 -04:00
|
|
|
((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional)
|
2017-09-14 12:03:20 -04:00
|
|
|
(#{REGEXEN[:valid_domain]}) # $5 Domain(s)
|
|
|
|
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
|
|
|
|
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor
|
|
|
|
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String
|
|
|
|
)
|
|
|
|
)
|
|
|
|
}iox
|
|
|
|
end
|
|
|
|
end
|