Sitemaps: Prevent incorrect redirection of paged sitemap requests.

Update `redirect_canonical()` to account for custom pagination and URL format used by sitemaps in order to follow standard practices.

Introduce the function `get_sitemap_url()` to simplify getting the index and provider URLs as needed.

Props jonathanstegall, pbiron, GamerZ, salvoaranzulla, peterwilsoncc.
Fixes #50910.



git-svn-id: https://develop.svn.wordpress.org/trunk@48872 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Peter Wilson 2020-08-27 01:28:24 +00:00
parent 066882a4a8
commit bae635f1a0
4 changed files with 200 additions and 7 deletions

View File

@ -410,8 +410,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
$redirect['query'] = remove_query_arg( 'page', $redirect['query'] );
}
// Paging and feeds.
if ( get_query_var( 'paged' ) || is_feed() || get_query_var( 'cpage' ) ) {
if ( get_query_var( 'sitemap' ) ) {
$redirect_url = get_sitemap_url( get_query_var( 'sitemap' ), get_query_var( 'sitemap-subtype' ), get_query_var( 'paged' ) );
$redirect['query'] = remove_query_arg( array( 'sitemap', 'sitemap-subtype', 'paged' ), $redirect['query'] );
} elseif ( get_query_var( 'paged' ) || is_feed() || get_query_var( 'cpage' ) ) {
// Paging and feeds.
$paged = get_query_var( 'paged' );
$feed = get_query_var( 'feed' );
$cpage = get_query_var( 'cpage' );
@ -508,11 +511,6 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
$redirect['path'] = trailingslashit( $redirect['path'] ) . $addl_path;
}
// Remove trailing slash for sitemaps requests.
if ( ! empty( get_query_var( 'sitemap' ) ) ) {
$redirect['path'] = untrailingslashit( $redirect['path'] );
}
$redirect_url = $redirect['scheme'] . '://' . $redirect['host'] . $redirect['path'];
}

View File

@ -87,3 +87,39 @@ function wp_sitemaps_get_max_urls( $object_type ) {
*/
return apply_filters( 'wp_sitemaps_max_urls', 2000, $object_type );
}
/**
* Retrieves the full URL for a sitemap.
*
* @since 5.5.1
*
* @param string $name The sitemap name.
* @param string $subtype_name The sitemap subtype name. Default empty string.
* @param int $page The page of the sitemap. Default 1.
* @return string|false The sitemap URL or false if the sitemap doesn't exist.
*/
function get_sitemap_url( $name, $subtype_name = '', $page = 1 ) {
$sitemaps = wp_sitemaps_get_server();
if ( ! $sitemaps ) {
return false;
}
if ( 'index' === $name ) {
return $sitemaps->index->get_index_url();
}
$provider = $sitemaps->registry->get_provider( $name );
if ( ! $provider ) {
return false;
}
if ( $subtype_name && ! in_array( $subtype_name, array_keys( $provider->get_object_subtypes() ), true ) ) {
return false;
}
$page = absint( $page );
if ( 0 >= $page ) {
$page = 1;
}
return $provider->get_sitemap_url( $subtype_name, $page );
}

View File

@ -40,4 +40,75 @@ class Tests_Canonical_Sitemaps extends WP_Canonical_UnitTestCase {
$this->assertCanonical( '/wp-sitemap.xsl/', '/wp-sitemap.xsl' );
}
/**
* Ensure sitemaps redirects work as expected with pretty permalinks.
*
* @dataProvider data_sitemaps_canonical_pretty_redirects
* @ticket 50910
*/
public function test_sitemaps_canonical_pretty_redirects( $test_url, $expected ) {
$this->set_permalink_structure( '/%postname%/' );
$this->assertCanonical( $test_url, $expected, 50910 );
}
/**
* Data provider for test_sitemaps_canonical_pretty_redirects.
*
* @return array[] {
* Data to test with.
*
* @type string $0 The test URL.
* @type string $1 The expected canonical URL.
* }
*/
public function data_sitemaps_canonical_pretty_redirects() {
return array(
// Ugly/incorrect versions redirect correctly.
array( '/?sitemap=index', '/wp-sitemap.xml' ),
array( '/wp-sitemap.xml/', '/wp-sitemap.xml' ),
array( '/?sitemap=posts&sitemap-subtype=post', '/wp-sitemap-posts-post-1.xml' ),
array( '/?sitemap=posts&sitemap-subtype=post&paged=2', '/wp-sitemap-posts-post-2.xml' ),
array( '/?sitemap=taxonomies&sitemap-subtype=category', '/wp-sitemap-taxonomies-category-1.xml' ),
array( '/?sitemap=taxonomies&sitemap-subtype=category&paged=2', '/wp-sitemap-taxonomies-category-2.xml' ),
// Pretty versions don't redirect incorrectly.
array( '/wp-sitemap.xml', '/wp-sitemap.xml' ),
array( '/wp-sitemap-posts-post-1.xml', '/wp-sitemap-posts-post-1.xml' ),
array( '/wp-sitemap-posts-post-2.xml', '/wp-sitemap-posts-post-2.xml' ),
array( '/wp-sitemap-taxonomies-category-1.xml', '/wp-sitemap-taxonomies-category-1.xml' ),
array( '/wp-sitemap-taxonomies-category-2.xml', '/wp-sitemap-taxonomies-category-2.xml' ),
);
}
/**
* Ensure sitemaps redirects work as expected with ugly permalinks.
*
* @dataProvider data_sitemaps_canonical_ugly_redirects
* @ticket 50910
*/
public function test_sitemaps_canonical_ugly_redirects( $test_url, $expected ) {
$this->set_permalink_structure( '' );
$this->assertCanonical( $test_url, $expected, 50910 );
}
/**
* Data provider for test_sitemaps_canonical_ugly_redirects.
*
* @return array[] {
* Data to test with.
*
* @type string $0 The test URL.
* @type string $1 The expected canonical URL.
* }
*/
public function data_sitemaps_canonical_ugly_redirects() {
return array(
// Ugly permalinks remain ugly.
array( '/?sitemap=index', '/?sitemap=index' ),
array( '/?sitemap=posts&sitemap-subtype=post', '/?sitemap=posts&sitemap-subtype=post' ),
array( '/?sitemap=posts&sitemap-subtype=post&paged=2', '/?sitemap=posts&sitemap-subtype=post&paged=2' ),
array( '/?sitemap=taxonomies&sitemap-subtype=category', '/?sitemap=taxonomies&sitemap-subtype=category' ),
array( '/?sitemap=taxonomies&sitemap-subtype=category&paged=2', '/?sitemap=taxonomies&sitemap-subtype=category&paged=2' ),
);
}
}

View File

@ -58,4 +58,92 @@ class Test_Sitemaps_Functions extends WP_UnitTestCase {
$this->assertTrue( is_a( $sitemaps[ $name ], $provider ), "Default $name sitemap is not a $provider object." );
}
}
/**
* Test get_sitemap_url() with ugly permalinks.
*
* @dataProvider ugly_permalinks_provider
*/
public function test_get_sitemap_url_ugly_permalinks( $name, $subtype_name, $page, $expected ) {
$actual = get_sitemap_url( $name, $subtype_name, $page );
$this->assertSame( $expected, $actual );
}
/**
* Test get_sitemap_url() with pretty permalinks.
*
* @dataProvider pretty_permalinks_provider
*/
public function test_get_sitemap_url_pretty_permalinks( $name, $subtype_name, $page, $expected ) {
$this->set_permalink_structure( '/%postname%/' );
$actual = get_sitemap_url( $name, $subtype_name, $page );
$this->assertSame( $expected, $actual );
}
/**
* Data provider for test_get_sitemap_url_ugly_permalinks.
*
* @return array[] {
* Data to test with.
*
* @type string $0 Sitemap name.
* @type string $1 Sitemap subtype name.
* @type int $3 Sitemap page.
* @type string|false $4 Sitemap URL.
* }
*/
function ugly_permalinks_provider() {
return array(
array( 'posts', 'post', 1, home_url( '/?sitemap=posts&sitemap-subtype=post&paged=1' ) ),
array( 'posts', 'post', 0, home_url( '/?sitemap=posts&sitemap-subtype=post&paged=1' ) ),
array( 'posts', 'page', 1, home_url( '/?sitemap=posts&sitemap-subtype=page&paged=1' ) ),
array( 'posts', 'page', 5, home_url( '/?sitemap=posts&sitemap-subtype=page&paged=5' ) ),
// post_type doesn't exist.
array( 'posts', 'foo', 5, false ),
array( 'taxonomies', 'category', 1, home_url( '/?sitemap=taxonomies&sitemap-subtype=category&paged=1' ) ),
array( 'taxonomies', 'post_tag', 1, home_url( '/?sitemap=taxonomies&sitemap-subtype=post_tag&paged=1' ) ),
array( 'taxonomies', 'post_tag', -1, home_url( '/?sitemap=taxonomies&sitemap-subtype=post_tag&paged=1' ) ),
// negative paged, gets converted to it's absolute value.
array( 'users', '', 4, home_url( '/?sitemap=users&paged=4' ) ),
// users provider doesn't allow subtypes.
array( 'users', 'foo', 4, false ),
// provider doesn't exist.
array( 'foo', '', 4, false ),
);
}
/**
* Data provider for test_get_sitemap_url_pretty_permalinks
*
* @return array[] {
* Data to test with.
*
* @type string $0 Sitemap name.
* @type string $1 Sitemap subtype name.
* @type int $3 Sitemap page.
* @type string|false $4 Sitemap URL.
* }
*/
function pretty_permalinks_provider() {
return array(
array( 'posts', 'post', 1, home_url( '/wp-sitemap-posts-post-1.xml' ) ),
array( 'posts', 'post', 0, home_url( '/wp-sitemap-posts-post-1.xml' ) ),
array( 'posts', 'page', 1, home_url( '/wp-sitemap-posts-page-1.xml' ) ),
array( 'posts', 'page', 5, home_url( '/wp-sitemap-posts-page-5.xml' ) ),
// post_type doesn't exist.
array( 'posts', 'foo', 5, false ),
array( 'taxonomies', 'category', 1, home_url( '/wp-sitemap-taxonomies-category-1.xml' ) ),
array( 'taxonomies', 'post_tag', 1, home_url( '/wp-sitemap-taxonomies-post_tag-1.xml' ) ),
// negative paged, gets converted to it's absolute value.
array( 'taxonomies', 'post_tag', -1, home_url( '/wp-sitemap-taxonomies-post_tag-1.xml' ) ),
array( 'users', '', 4, home_url( '/wp-sitemap-users-4.xml' ) ),
// users provider doesn't allow subtypes.
array( 'users', 'foo', 4, false ),
// provider doesn't exist.
array( 'foo', '', 4, false ),
);
}
}