HTTP API: Simplify wp_parse_url() to ensure consistent results.

[38694] revealed some URL formats were been parsed incorrectly, including those used by Google Fonts. This change simplifies the function to use placeholder values which cause PHP's parsing to behave consistently.

Props jrf, peterwilsoncc.
Fixes #36356.


git-svn-id: https://develop.svn.wordpress.org/trunk@38726 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Peter Wilson 2016-10-04 20:32:40 +00:00
parent 5ba47b2119
commit 3d2f8ba836
2 changed files with 183 additions and 55 deletions

View File

@ -623,11 +623,16 @@ function ms_allowed_http_request_hosts( $is_external, $host ) {
}
/**
* A wrapper for PHP's parse_url() function that handles edgecases in < PHP 5.4.7
* A wrapper for PHP's parse_url() function that handles consistency in the return
* values across PHP versions.
*
* PHP 5.4.7 expanded parse_url()'s ability to handle non-absolute url's, including
* schemeless and relative url's with :// in the path, this works around those
* limitations providing a standard output on PHP 5.2~5.4+.
* schemeless and relative url's with :// in the path. This function works around
* those limitations providing a standard output on PHP 5.2~5.4+.
*
* Secondly, across various PHP versions, schemeless URLs starting containing a ":"
* in the query are being handled inconsistently. This function works around those
* differences as well.
*
* Error suppression is used as prior to PHP 5.3.3, an E_WARNING would be generated
* when URL parsing failed.
@ -640,63 +645,96 @@ function ms_allowed_http_request_hosts( $is_external, $host ) {
* predefined constants to specify which one.
* Defaults to -1 (= return all parts as an array).
* @see http://php.net/manual/en/function.parse-url.php
* @return mixed False on failure; Array of URL components on success;
* When a specific component has been requested: null if the component doesn't
* exist in the given URL; a sting or - in the case of PHP_URL_PORT - integer
* when it does; See parse_url()'s return values.
* @return mixed False on parse failure; Array of URL components on success;
* When a specific component has been requested: null if the component
* doesn't exist in the given URL; a sting or - in the case of
* PHP_URL_PORT - integer when it does. See parse_url()'s return values.
*/
function wp_parse_url( $url, $component = -1 ) {
$parts = @parse_url( $url, $component );
$to_unset = array();
$url = strval( $url );
if ( version_compare( PHP_VERSION, '5.4.7', '>=' ) ) {
if ( '//' === substr( $url, 0, 2 ) ) {
$to_unset[] = 'scheme';
$url = 'placeholder:' . $url;
} elseif ( '/' === substr( $url, 0, 1 ) ) {
$to_unset[] = 'scheme';
$to_unset[] = 'host';
$url = 'placeholder://placeholder' . $url;
}
$parts = @parse_url( $url );
if ( false === $parts ) {
// Parsing failure.
return $parts;
}
if ( false === $parts ) {
// < PHP 5.4.7 compat, trouble with relative paths including a scheme break in the path.
if ( '/' == $url[0] && false !== strpos( $url, '://' ) ) {
if ( in_array( $component, array( PHP_URL_SCHEME, PHP_URL_HOST ), true ) ) {
return null;
}
// Since we know it's a relative path, prefix with a scheme/host placeholder and try again.
if ( ! $parts = @parse_url( 'placeholder://placeholder' . $url, $component ) ) {
return $parts;
}
// Remove the placeholder values.
if ( -1 === $component ) {
unset( $parts['scheme'], $parts['host'] );
}
} else {
return $parts;
}
// Remove the placeholder values.
foreach ( $to_unset as $key ) {
unset( $parts[ $key ] );
}
// < PHP 5.4.7 compat, doesn't detect a schemeless URL's host field.
if ( '//' == substr( $url, 0, 2 ) ) {
if ( -1 === $component && ! isset( $parts['host'] ) ) {
$path_parts = explode( '/', substr( $parts['path'], 2 ), 2 );
$parts['host'] = $path_parts[0];
if ( isset( $path_parts[1] ) ) {
$parts['path'] = '/' . $path_parts[1];
} else {
unset( $parts['path'] );
}
} elseif ( PHP_URL_HOST === $component || PHP_URL_PATH === $component ) {
$all_parts = @parse_url( $url );
if ( ! isset( $all_parts['host'] ) ) {
$path_parts = explode( '/', substr( $all_parts['path'], 2 ), 2 );
if ( PHP_URL_PATH === $component ) {
if ( isset( $path_parts[1] ) ) {
$parts = '/' . $path_parts[1];
} else {
$parts = null;
}
} elseif ( PHP_URL_HOST === $component ) {
$parts = $path_parts[0];
}
}
}
}
return $parts;
return _get_component_from_parsed_url_array( $parts, $component );
}
/**
* Retrieve a specific component from a parsed URL array.
*
* @internal
*
* @since 4.7.0
*
* @param array|false $url_parts The parsed URL. Can be false if the URL failed to parse.
* @param int $component The specific component to retrieve. Use one of the PHP
* predefined constants to specify which one.
* Defaults to -1 (= return all parts as an array).
* @see http://php.net/manual/en/function.parse-url.php
* @return mixed False on parse failure; Array of URL components on success;
* When a specific component has been requested: null if the component
* doesn't exist in the given URL; a sting or - in the case of
* PHP_URL_PORT - integer when it does. See parse_url()'s return values.
*/
function _get_component_from_parsed_url_array( $url_parts, $component = -1 ) {
if ( -1 === $component ) {
return $url_parts;
}
$key = _wp_translate_php_url_constant_to_key( $component );
if ( false !== $key && is_array( $url_parts ) && isset( $url_parts[ $key ] ) ) {
return $url_parts[ $key ];
} else {
return null;
}
}
/**
* Translate a PHP_URL_* constant to the named array keys PHP uses.
*
* @internal
*
* @since 4.7.0
*
* @see http://php.net/manual/en/url.constants.php
*
* @param int $constant PHP_URL_* constant.
* @return string|bool The named key or false.
*/
function _wp_translate_php_url_constant_to_key( $constant ) {
$translation = array(
PHP_URL_SCHEME => 'scheme',
PHP_URL_HOST => 'host',
PHP_URL_PORT => 'port',
PHP_URL_USER => 'user',
PHP_URL_PASS => 'pass',
PHP_URL_PATH => 'path',
PHP_URL_QUERY => 'query',
PHP_URL_FRAGMENT => 'fragment',
);
if ( isset( $translation[ $constant ] ) ) {
return $translation[ $constant ];
} else {
return false;
}
}

View File

@ -107,6 +107,29 @@ class Tests_HTTP_HTTP extends WP_UnitTestCase {
// PHP's parse_url() calls this an invalid url, we handle it as a path
array( '/://example.com/', array( 'path' => '/://example.com/' ) ),
// Schemeless URL containing colons cause parse errors in PHP 7+.
array(
'//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin',
array(
'host' => 'fonts.googleapis.com',
'path' => '/css',
'query' => 'family=Open+Sans:400&subset=latin',
),
),
array(
'//fonts.googleapis.com/css?family=Open+Sans:400',
array(
'host' => 'fonts.googleapis.com',
'path' => '/css',
'query' => 'family=Open+Sans:400',
),
),
array( 'filenamefound', array( 'path' => 'filenamefound' ) ),
// Empty string or non-string passed in.
array( '', array( 'path' => '' ) ),
array( 123, array( 'path' => '123' ) ),
);
/*
Untestable edge cases in various PHP:
@ -117,7 +140,7 @@ class Tests_HTTP_HTTP extends WP_UnitTestCase {
/**
* @ticket 36356
*/
*/
function test_wp_parse_url_with_default_component() {
$actual = wp_parse_url( self::FULL_TEST_URL, -1 );
$this->assertEquals( array(
@ -175,6 +198,21 @@ class Tests_HTTP_HTTP extends WP_UnitTestCase {
// PHP's parse_url() calls this an invalid URL, we handle it as a path.
array( '/://example.com/', PHP_URL_PATH, '/://example.com/' ),
// Schemeless URL containing colons cause parse errors in PHP 7+.
array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_HOST, 'fonts.googleapis.com' ),
array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_PORT, null ),
array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_PATH, '/css' ),
array( '//fonts.googleapis.com/css?family=Open+Sans:400&subset=latin', PHP_URL_QUERY, 'family=Open+Sans:400&subset=latin' ),
array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_HOST, 'fonts.googleapis.com' ), // 25
array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_PORT, null ),
array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_PATH, '/css' ), //27
array( '//fonts.googleapis.com/css?family=Open+Sans:400', PHP_URL_QUERY, 'family=Open+Sans:400' ), //28
// Empty string or non-string passed in.
array( '', PHP_URL_PATH, '' ),
array( '', PHP_URL_QUERY, null ),
array( 123, PHP_URL_PORT, null ),
array( 123, PHP_URL_PATH, '123' ),
);
}
@ -224,4 +262,56 @@ class Tests_HTTP_HTTP extends WP_UnitTestCase {
}
}
}
/**
* @ticket 36356
*
* @dataProvider get_component_from_parsed_url_array_testcases
*/
function test_get_component_from_parsed_url_array( $url, $component, $expected ) {
$parts = wp_parse_url( $url );
$actual = _get_component_from_parsed_url_array( $parts, $component );
$this->assertSame( $expected, $actual );
}
function get_component_from_parsed_url_array_testcases() {
// 0: A URL, 1: PHP URL constant, 2: The expected result.
return array(
array( 'http://example.com/', -1, array( 'scheme' => 'http', 'host' => 'example.com', 'path' => '/' ) ),
array( 'http://example.com/', -1, array( 'scheme' => 'http', 'host' => 'example.com', 'path' => '/' ) ),
array( 'http://example.com/', PHP_URL_HOST, 'example.com' ),
array( 'http://example.com/', PHP_URL_USER, null ),
array( 'http:///example.com', -1, false ), // Malformed.
array( 'http:///example.com', PHP_URL_HOST, null ), // Malformed.
);
}
/**
* @ticket 36356
*
* @dataProvider wp_translate_php_url_constant_to_key_testcases
*/
function test_wp_translate_php_url_constant_to_key( $input, $expected ) {
$actual = _wp_translate_php_url_constant_to_key( $input );
$this->assertSame( $expected, $actual );
}
function wp_translate_php_url_constant_to_key_testcases() {
// 0: PHP URL constant, 1: The expected result.
return array(
array( PHP_URL_SCHEME, 'scheme' ),
array( PHP_URL_HOST, 'host' ),
array( PHP_URL_PORT, 'port' ),
array( PHP_URL_USER, 'user' ),
array( PHP_URL_PASS, 'pass' ),
array( PHP_URL_PATH, 'path' ),
array( PHP_URL_QUERY, 'query' ),
array( PHP_URL_FRAGMENT, 'fragment' ),
// Test with non-PHP_URL_CONSTANT parameter.
array( 'something', false ),
array( ABSPATH, false ),
);
}
}