When sanitizing a URL to redirect to, UTF-8 characters can be URL encoded, instead of being removed.
While RFC 3986 does not specify which character sets are allowed in URIs, Section 2.5 states that octects matching UTF-8 character encoding should be percent-encoded, then unreserved octets outside of the UTF-8 range should be percent-encoded. As browsers tend to only implement support for UTF-8 in URLs, this change only implements the UTF-8 encoding part. We may revisit the second part if it becomes an issue. Fixes #31486 git-svn-id: https://develop.svn.wordpress.org/trunk@31587 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
parent
eb28f78cd3
commit
d258490653
|
@ -1201,6 +1201,19 @@ if ( !function_exists('wp_sanitize_redirect') ) :
|
|||
* @return string redirect-sanitized URL
|
||||
**/
|
||||
function wp_sanitize_redirect($location) {
|
||||
$regex = '/
|
||||
(
|
||||
(?: [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
|
||||
| [\xE1-\xEC][\x80-\xBF]{2}
|
||||
| \xED[\x80-\x9F][\x80-\xBF]
|
||||
| [\xEE-\xEF][\x80-\xBF]{2}
|
||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3}
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2}
|
||||
){1,50} # ...one or more times
|
||||
)/x';
|
||||
$location = preg_replace_callback( $regex, '_wp_sanitize_utf8_in_redirect', $location );
|
||||
$location = preg_replace('|[^a-z0-9-~+_.?#=&;,/:%!*\[\]()]|i', '', $location);
|
||||
$location = wp_kses_no_null($location);
|
||||
|
||||
|
@ -1209,6 +1222,19 @@ function wp_sanitize_redirect($location) {
|
|||
$location = _deep_replace($strip, $location);
|
||||
return $location;
|
||||
}
|
||||
|
||||
/**
|
||||
* URL encode UTF-8 characters in a URL.
|
||||
*
|
||||
* @ignore
|
||||
* @since 4.2.0
|
||||
* @access private
|
||||
*
|
||||
* @see wp_sanitize_redirect()
|
||||
*/
|
||||
function _wp_sanitize_utf8_in_redirect( $matches ) {
|
||||
return urlencode( $matches[0] );
|
||||
}
|
||||
endif;
|
||||
|
||||
if ( !function_exists('wp_safe_redirect') ) :
|
||||
|
|
|
@ -11,6 +11,7 @@ class Tests_Formatting_Redirect extends WP_UnitTestCase {
|
|||
$this->assertEquals('http://example.com/watchthecarriagereturngo', wp_sanitize_redirect('http://example.com/watchthecarriagereturn%0Dgo'));
|
||||
$this->assertEquals('http://example.com/watchthecarriagereturngo', wp_sanitize_redirect('http://example.com/watchthecarriagereturn%0dgo'));
|
||||
$this->assertEquals('http://example.com/watchtheallowedcharacters-~+_.?#=&;,/:%!*stay', wp_sanitize_redirect('http://example.com/watchtheallowedcharacters-~+_.?#=&;,/:%!*stay'));
|
||||
$this->assertEquals('http://example.com/watchtheutf8convert%F0%9D%8C%86', wp_sanitize_redirect("http://example.com/watchtheutf8convert\xf0\x9d\x8c\x86"));
|
||||
//Nesting checks
|
||||
$this->assertEquals('http://example.com/watchthecarriagereturngo', wp_sanitize_redirect('http://example.com/watchthecarriagereturn%0%0ddgo'));
|
||||
$this->assertEquals('http://example.com/watchthecarriagereturngo', wp_sanitize_redirect('http://example.com/watchthecarriagereturn%0%0DDgo'));
|
||||
|
|
Loading…
Reference in New Issue