From bae635f1a0853fad4ad13e4f557a7b04464008c8 Mon Sep 17 00:00:00 2001 From: Peter Wilson Date: Thu, 27 Aug 2020 01:28:24 +0000 Subject: [PATCH] Sitemaps: Prevent incorrect redirection of paged sitemap requests. Update `redirect_canonical()` to account for custom pagination and URL format used by sitemaps in order to follow standard practices. Introduce the function `get_sitemap_url()` to simplify getting the index and provider URLs as needed. Props jonathanstegall, pbiron, GamerZ, salvoaranzulla, peterwilsoncc. Fixes #50910. git-svn-id: https://develop.svn.wordpress.org/trunk@48872 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/canonical.php | 12 ++- src/wp-includes/sitemaps.php | 36 +++++++++ tests/phpunit/tests/canonical/sitemaps.php | 71 +++++++++++++++++ tests/phpunit/tests/sitemaps/functions.php | 88 ++++++++++++++++++++++ 4 files changed, 200 insertions(+), 7 deletions(-) diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index 5a63de3481..6a74ac2d43 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -410,8 +410,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { $redirect['query'] = remove_query_arg( 'page', $redirect['query'] ); } - // Paging and feeds. - if ( get_query_var( 'paged' ) || is_feed() || get_query_var( 'cpage' ) ) { + if ( get_query_var( 'sitemap' ) ) { + $redirect_url = get_sitemap_url( get_query_var( 'sitemap' ), get_query_var( 'sitemap-subtype' ), get_query_var( 'paged' ) ); + $redirect['query'] = remove_query_arg( array( 'sitemap', 'sitemap-subtype', 'paged' ), $redirect['query'] ); + } elseif ( get_query_var( 'paged' ) || is_feed() || get_query_var( 'cpage' ) ) { + // Paging and feeds. $paged = get_query_var( 'paged' ); $feed = get_query_var( 'feed' ); $cpage = get_query_var( 'cpage' ); @@ -508,11 +511,6 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { $redirect['path'] = trailingslashit( $redirect['path'] ) . $addl_path; } - // Remove trailing slash for sitemaps requests. - if ( ! empty( get_query_var( 'sitemap' ) ) ) { - $redirect['path'] = untrailingslashit( $redirect['path'] ); - } - $redirect_url = $redirect['scheme'] . '://' . $redirect['host'] . $redirect['path']; } diff --git a/src/wp-includes/sitemaps.php b/src/wp-includes/sitemaps.php index 62752dada9..1dfcd1d69c 100644 --- a/src/wp-includes/sitemaps.php +++ b/src/wp-includes/sitemaps.php @@ -87,3 +87,39 @@ function wp_sitemaps_get_max_urls( $object_type ) { */ return apply_filters( 'wp_sitemaps_max_urls', 2000, $object_type ); } + +/** + * Retrieves the full URL for a sitemap. + * + * @since 5.5.1 + * + * @param string $name The sitemap name. + * @param string $subtype_name The sitemap subtype name. Default empty string. + * @param int $page The page of the sitemap. Default 1. + * @return string|false The sitemap URL or false if the sitemap doesn't exist. + */ +function get_sitemap_url( $name, $subtype_name = '', $page = 1 ) { + $sitemaps = wp_sitemaps_get_server(); + if ( ! $sitemaps ) { + return false; + } + + if ( 'index' === $name ) { + return $sitemaps->index->get_index_url(); + } + + $provider = $sitemaps->registry->get_provider( $name ); + if ( ! $provider ) { + return false; + } + + if ( $subtype_name && ! in_array( $subtype_name, array_keys( $provider->get_object_subtypes() ), true ) ) { + return false; + } + + $page = absint( $page ); + if ( 0 >= $page ) { + $page = 1; + } + return $provider->get_sitemap_url( $subtype_name, $page ); +} diff --git a/tests/phpunit/tests/canonical/sitemaps.php b/tests/phpunit/tests/canonical/sitemaps.php index dd0bdfbdb6..d46028068d 100644 --- a/tests/phpunit/tests/canonical/sitemaps.php +++ b/tests/phpunit/tests/canonical/sitemaps.php @@ -40,4 +40,75 @@ class Tests_Canonical_Sitemaps extends WP_Canonical_UnitTestCase { $this->assertCanonical( '/wp-sitemap.xsl/', '/wp-sitemap.xsl' ); } + /** + * Ensure sitemaps redirects work as expected with pretty permalinks. + * + * @dataProvider data_sitemaps_canonical_pretty_redirects + * @ticket 50910 + */ + public function test_sitemaps_canonical_pretty_redirects( $test_url, $expected ) { + $this->set_permalink_structure( '/%postname%/' ); + $this->assertCanonical( $test_url, $expected, 50910 ); + } + + /** + * Data provider for test_sitemaps_canonical_pretty_redirects. + * + * @return array[] { + * Data to test with. + * + * @type string $0 The test URL. + * @type string $1 The expected canonical URL. + * } + */ + public function data_sitemaps_canonical_pretty_redirects() { + return array( + // Ugly/incorrect versions redirect correctly. + array( '/?sitemap=index', '/wp-sitemap.xml' ), + array( '/wp-sitemap.xml/', '/wp-sitemap.xml' ), + array( '/?sitemap=posts&sitemap-subtype=post', '/wp-sitemap-posts-post-1.xml' ), + array( '/?sitemap=posts&sitemap-subtype=post&paged=2', '/wp-sitemap-posts-post-2.xml' ), + array( '/?sitemap=taxonomies&sitemap-subtype=category', '/wp-sitemap-taxonomies-category-1.xml' ), + array( '/?sitemap=taxonomies&sitemap-subtype=category&paged=2', '/wp-sitemap-taxonomies-category-2.xml' ), + + // Pretty versions don't redirect incorrectly. + array( '/wp-sitemap.xml', '/wp-sitemap.xml' ), + array( '/wp-sitemap-posts-post-1.xml', '/wp-sitemap-posts-post-1.xml' ), + array( '/wp-sitemap-posts-post-2.xml', '/wp-sitemap-posts-post-2.xml' ), + array( '/wp-sitemap-taxonomies-category-1.xml', '/wp-sitemap-taxonomies-category-1.xml' ), + array( '/wp-sitemap-taxonomies-category-2.xml', '/wp-sitemap-taxonomies-category-2.xml' ), + ); + } + + /** + * Ensure sitemaps redirects work as expected with ugly permalinks. + * + * @dataProvider data_sitemaps_canonical_ugly_redirects + * @ticket 50910 + */ + public function test_sitemaps_canonical_ugly_redirects( $test_url, $expected ) { + $this->set_permalink_structure( '' ); + $this->assertCanonical( $test_url, $expected, 50910 ); + } + + /** + * Data provider for test_sitemaps_canonical_ugly_redirects. + * + * @return array[] { + * Data to test with. + * + * @type string $0 The test URL. + * @type string $1 The expected canonical URL. + * } + */ + public function data_sitemaps_canonical_ugly_redirects() { + return array( + // Ugly permalinks remain ugly. + array( '/?sitemap=index', '/?sitemap=index' ), + array( '/?sitemap=posts&sitemap-subtype=post', '/?sitemap=posts&sitemap-subtype=post' ), + array( '/?sitemap=posts&sitemap-subtype=post&paged=2', '/?sitemap=posts&sitemap-subtype=post&paged=2' ), + array( '/?sitemap=taxonomies&sitemap-subtype=category', '/?sitemap=taxonomies&sitemap-subtype=category' ), + array( '/?sitemap=taxonomies&sitemap-subtype=category&paged=2', '/?sitemap=taxonomies&sitemap-subtype=category&paged=2' ), + ); + } } diff --git a/tests/phpunit/tests/sitemaps/functions.php b/tests/phpunit/tests/sitemaps/functions.php index 3fe904a6ae..7ab5c5490f 100644 --- a/tests/phpunit/tests/sitemaps/functions.php +++ b/tests/phpunit/tests/sitemaps/functions.php @@ -58,4 +58,92 @@ class Test_Sitemaps_Functions extends WP_UnitTestCase { $this->assertTrue( is_a( $sitemaps[ $name ], $provider ), "Default $name sitemap is not a $provider object." ); } } + + /** + * Test get_sitemap_url() with ugly permalinks. + * + * @dataProvider ugly_permalinks_provider + */ + public function test_get_sitemap_url_ugly_permalinks( $name, $subtype_name, $page, $expected ) { + $actual = get_sitemap_url( $name, $subtype_name, $page ); + + $this->assertSame( $expected, $actual ); + } + + /** + * Test get_sitemap_url() with pretty permalinks. + * + * @dataProvider pretty_permalinks_provider + */ + public function test_get_sitemap_url_pretty_permalinks( $name, $subtype_name, $page, $expected ) { + $this->set_permalink_structure( '/%postname%/' ); + + $actual = get_sitemap_url( $name, $subtype_name, $page ); + + $this->assertSame( $expected, $actual ); + } + + /** + * Data provider for test_get_sitemap_url_ugly_permalinks. + * + * @return array[] { + * Data to test with. + * + * @type string $0 Sitemap name. + * @type string $1 Sitemap subtype name. + * @type int $3 Sitemap page. + * @type string|false $4 Sitemap URL. + * } + */ + function ugly_permalinks_provider() { + return array( + array( 'posts', 'post', 1, home_url( '/?sitemap=posts&sitemap-subtype=post&paged=1' ) ), + array( 'posts', 'post', 0, home_url( '/?sitemap=posts&sitemap-subtype=post&paged=1' ) ), + array( 'posts', 'page', 1, home_url( '/?sitemap=posts&sitemap-subtype=page&paged=1' ) ), + array( 'posts', 'page', 5, home_url( '/?sitemap=posts&sitemap-subtype=page&paged=5' ) ), + // post_type doesn't exist. + array( 'posts', 'foo', 5, false ), + array( 'taxonomies', 'category', 1, home_url( '/?sitemap=taxonomies&sitemap-subtype=category&paged=1' ) ), + array( 'taxonomies', 'post_tag', 1, home_url( '/?sitemap=taxonomies&sitemap-subtype=post_tag&paged=1' ) ), + array( 'taxonomies', 'post_tag', -1, home_url( '/?sitemap=taxonomies&sitemap-subtype=post_tag&paged=1' ) ), + // negative paged, gets converted to it's absolute value. + array( 'users', '', 4, home_url( '/?sitemap=users&paged=4' ) ), + // users provider doesn't allow subtypes. + array( 'users', 'foo', 4, false ), + // provider doesn't exist. + array( 'foo', '', 4, false ), + ); + } + + /** + * Data provider for test_get_sitemap_url_pretty_permalinks + * + * @return array[] { + * Data to test with. + * + * @type string $0 Sitemap name. + * @type string $1 Sitemap subtype name. + * @type int $3 Sitemap page. + * @type string|false $4 Sitemap URL. + * } + */ + function pretty_permalinks_provider() { + return array( + array( 'posts', 'post', 1, home_url( '/wp-sitemap-posts-post-1.xml' ) ), + array( 'posts', 'post', 0, home_url( '/wp-sitemap-posts-post-1.xml' ) ), + array( 'posts', 'page', 1, home_url( '/wp-sitemap-posts-page-1.xml' ) ), + array( 'posts', 'page', 5, home_url( '/wp-sitemap-posts-page-5.xml' ) ), + // post_type doesn't exist. + array( 'posts', 'foo', 5, false ), + array( 'taxonomies', 'category', 1, home_url( '/wp-sitemap-taxonomies-category-1.xml' ) ), + array( 'taxonomies', 'post_tag', 1, home_url( '/wp-sitemap-taxonomies-post_tag-1.xml' ) ), + // negative paged, gets converted to it's absolute value. + array( 'taxonomies', 'post_tag', -1, home_url( '/wp-sitemap-taxonomies-post_tag-1.xml' ) ), + array( 'users', '', 4, home_url( '/wp-sitemap-users-4.xml' ) ), + // users provider doesn't allow subtypes. + array( 'users', 'foo', 4, false ), + // provider doesn't exist. + array( 'foo', '', 4, false ), + ); + } }