From 0111ecef55d192462ddfb941bf8b183e0755f71b Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Thu, 24 Sep 2015 14:03:05 +0000 Subject: [PATCH] Canonical/Rewrite: sanity check posts that are paged with ``. Page numbers past the max number of pages are returning the last page of content and causing infinite duplicate content. Awesome rewrite bug: the `page` query var was being set to `'/4'` in `$wp`. When cast to `int`, it returns `0` (Bless you, PHP). `WP_Query` calls `trim( $page, '/' )` when setting its own query var. The few places that were checking `page` before posts were queried now have sanity checks, so that these changes work without flushing rewrites. Adds/updates unit tests. Props wonderboymusic, dd32. See #11694. git-svn-id: https://develop.svn.wordpress.org/trunk@34492 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/canonical.php | 9 ++++++++- src/wp-includes/class-wp-rewrite.php | 6 ++++-- src/wp-includes/class-wp.php | 18 ++++++++++++++---- src/wp-includes/rewrite-functions.php | 2 ++ tests/phpunit/tests/canonical.php | 2 +- tests/phpunit/tests/canonical/paged.php | 25 +++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 tests/phpunit/tests/canonical/paged.php diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index 30b6f6b8b4..a88b3a8e22 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -148,6 +148,13 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { } } + if ( get_query_var( 'page' ) && $wp_query->post && + false !== strpos( $wp_query->post->post_content, '' ) ) { + $redirect['path'] = rtrim( $redirect['path'], (int) get_query_var( 'page' ) . '/' ); + $redirect['query'] = remove_query_arg( 'page', $redirect['query'] ); + $redirect_url = get_permalink( $wp_query->post->ID ); + } + } elseif ( is_object($wp_rewrite) && $wp_rewrite->using_permalinks() ) { // rewriting of old ?p=X, ?m=2004, ?m=200401, ?m=20040101 if ( is_attachment() && ! $redirect_url ) { @@ -560,7 +567,7 @@ function redirect_guess_404_permalink() { return false; if ( get_query_var( 'feed' ) ) return get_post_comments_feed_link( $post_id, get_query_var( 'feed' ) ); - elseif ( get_query_var( 'page' ) ) + elseif ( get_query_var( 'page' ) && 1 < get_query_var( 'page' ) ) return trailingslashit( get_permalink( $post_id ) ) . user_trailingslashit( get_query_var( 'page' ), 'single_paged' ); else return get_permalink( $post_id ); diff --git a/src/wp-includes/class-wp-rewrite.php b/src/wp-includes/class-wp-rewrite.php index e828f8d1cf..5f7bbb2340 100644 --- a/src/wp-includes/class-wp-rewrite.php +++ b/src/wp-includes/class-wp-rewrite.php @@ -1071,8 +1071,10 @@ class WP_Rewrite { $sub1 .= '?$'; $sub2 .= '?$'; - //post pagination, e.g. /2/ - $match = $match . '(/[0-9]+)?/?$'; + // Post pagination, e.g. /2/ + // Previously: '(/[0-9]+)?/?$', which produced '/2' for page. + // When cast to int, returned 0. + $match = $match . '(?:/([0-9]+))?/?$'; $query = $index . '?' . $query . '&page=' . $this->preg_index($num_toks + 1); } else { //not matching a permalink so this is a lot simpler //close the match and finalise the query diff --git a/src/wp-includes/class-wp.php b/src/wp-includes/class-wp.php index 60eb6aff83..4e859ee9e1 100644 --- a/src/wp-includes/class-wp.php +++ b/src/wp-includes/class-wp.php @@ -587,7 +587,7 @@ class WP { * @global WP_Query $wp_query */ public function handle_404() { - global $wp_query; + global $wp_query, $wp; // If we've already issued a 404, bail. if ( is_404() ) @@ -596,16 +596,26 @@ class WP { // Never 404 for the admin, robots, or if we found posts. if ( is_admin() || is_robots() || $wp_query->posts ) { - // Only set X-Pingback for single posts. + $success = true; if ( is_singular() ) { $p = clone $wp_query->post; + // Only set X-Pingback for single posts that allow pings. if ( $p && pings_open( $p ) ) { @header( 'X-Pingback: ' . get_bloginfo( 'pingback_url' ) ); } + + // check for paged content that exceeds the max number of pages + $next = ''; + if ( $p && false !== strpos( $p->post_content, $next ) && ! empty( $wp->query_vars['page'] ) ) { + $page = trim( $wp->query_vars['page'], '/' ); + $success = (int) $page <= ( substr_count( $p->post_content, $next ) + 1 ); + } } - status_header( 200 ); - return; + if ( $success ) { + status_header( 200 ); + return; + } } // We will 404 for paged queries, as no posts were found. diff --git a/src/wp-includes/rewrite-functions.php b/src/wp-includes/rewrite-functions.php index 96739a8da3..840e5d6425 100644 --- a/src/wp-includes/rewrite-functions.php +++ b/src/wp-includes/rewrite-functions.php @@ -256,6 +256,8 @@ function wp_resolve_numeric_slug_conflicts( $query_vars = array() ) { } elseif ( 'monthnum' === $compare && isset( $query_vars['day'] ) ) { $maybe_page = $query_vars['day']; } + // Bug found in #11694 - 'page' was returning '/4' + $maybe_page = (int) trim( $maybe_page, '/' ); $post_page_count = substr_count( $post->post_content, '' ) + 1; diff --git a/tests/phpunit/tests/canonical.php b/tests/phpunit/tests/canonical.php index aebe88bf2a..d0e635e033 100644 --- a/tests/phpunit/tests/canonical.php +++ b/tests/phpunit/tests/canonical.php @@ -96,7 +96,7 @@ class Tests_Canonical extends WP_Canonical_UnitTestCase { array( '/2010/post-format-test-au/', '/2008/06/02/post-format-test-audio/'), // A Year the post is not in array( '/post-format-test-au/', '/2008/06/02/post-format-test-audio/'), - array( '/2008/09/03/images-test/3/', array( 'url' => '/2008/09/03/images-test/3/', 'qv' => array( 'name' => 'images-test', 'year' => '2008', 'monthnum' => '09', 'day' => '03', 'page' => '/3' ) ) ), // page = /3 ?! + array( '/2008/09/03/images-test/3/', array( 'url' => '/2008/09/03/images-test/3/', 'qv' => array( 'name' => 'images-test', 'year' => '2008', 'monthnum' => '09', 'day' => '03', 'page' => '3' ) ) ), array( '/2008/09/03/images-test/?page=3', '/2008/09/03/images-test/3/' ), array( '/2008/09/03/images-te?page=3', '/2008/09/03/images-test/3/' ), diff --git a/tests/phpunit/tests/canonical/paged.php b/tests/phpunit/tests/canonical/paged.php new file mode 100644 index 0000000000..1239f4c883 --- /dev/null +++ b/tests/phpunit/tests/canonical/paged.php @@ -0,0 +1,25 @@ +'; + + $post_id = $this->factory->post->create( array( + 'post_status' => 'publish', + 'post_content' => "{$para}{$next}{$para}{$next}{$para}" + ) ); + + $link = parse_url( get_permalink( $post_id ), PHP_URL_PATH ); + $paged = $link . '4/'; + + $this->assertCanonical( $paged, $link ); + } +} \ No newline at end of file