Canonical/Rewrite: sanity check posts that are paged with <!--nextpage-->. Page numbers past the max number of pages are returning the last page of content and causing infinite duplicate content.

Awesome rewrite bug: the `page` query var was being set to `'/4'` in `$wp`. When cast to `int`, it returns `0` (Bless you, PHP). `WP_Query` calls `trim( $page, '/' )` when setting its own query var. The few places that were checking `page`	before posts were queried now have sanity checks, so that these changes work without flushing rewrites.	

Adds/updates unit tests.

Props wonderboymusic, dd32.
See #11694.


git-svn-id: https://develop.svn.wordpress.org/trunk@34492 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Scott Taylor 2015-09-24 14:03:05 +00:00
parent c9093207df
commit 0111ecef55
6 changed files with 54 additions and 8 deletions

View File

@ -148,6 +148,13 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) {
} }
} }
if ( get_query_var( 'page' ) && $wp_query->post &&
false !== strpos( $wp_query->post->post_content, '<!--nextpage-->' ) ) {
$redirect['path'] = rtrim( $redirect['path'], (int) get_query_var( 'page' ) . '/' );
$redirect['query'] = remove_query_arg( 'page', $redirect['query'] );
$redirect_url = get_permalink( $wp_query->post->ID );
}
} elseif ( is_object($wp_rewrite) && $wp_rewrite->using_permalinks() ) { } elseif ( is_object($wp_rewrite) && $wp_rewrite->using_permalinks() ) {
// rewriting of old ?p=X, ?m=2004, ?m=200401, ?m=20040101 // rewriting of old ?p=X, ?m=2004, ?m=200401, ?m=20040101
if ( is_attachment() && ! $redirect_url ) { if ( is_attachment() && ! $redirect_url ) {
@ -560,7 +567,7 @@ function redirect_guess_404_permalink() {
return false; return false;
if ( get_query_var( 'feed' ) ) if ( get_query_var( 'feed' ) )
return get_post_comments_feed_link( $post_id, get_query_var( 'feed' ) ); return get_post_comments_feed_link( $post_id, get_query_var( 'feed' ) );
elseif ( get_query_var( 'page' ) ) elseif ( get_query_var( 'page' ) && 1 < get_query_var( 'page' ) )
return trailingslashit( get_permalink( $post_id ) ) . user_trailingslashit( get_query_var( 'page' ), 'single_paged' ); return trailingslashit( get_permalink( $post_id ) ) . user_trailingslashit( get_query_var( 'page' ), 'single_paged' );
else else
return get_permalink( $post_id ); return get_permalink( $post_id );

View File

@ -1071,8 +1071,10 @@ class WP_Rewrite {
$sub1 .= '?$'; $sub1 .= '?$';
$sub2 .= '?$'; $sub2 .= '?$';
//post pagination, e.g. <permalink>/2/ // Post pagination, e.g. <permalink>/2/
$match = $match . '(/[0-9]+)?/?$'; // Previously: '(/[0-9]+)?/?$', which produced '/2' for page.
// When cast to int, returned 0.
$match = $match . '(?:/([0-9]+))?/?$';
$query = $index . '?' . $query . '&page=' . $this->preg_index($num_toks + 1); $query = $index . '?' . $query . '&page=' . $this->preg_index($num_toks + 1);
} else { //not matching a permalink so this is a lot simpler } else { //not matching a permalink so this is a lot simpler
//close the match and finalise the query //close the match and finalise the query

View File

@ -587,7 +587,7 @@ class WP {
* @global WP_Query $wp_query * @global WP_Query $wp_query
*/ */
public function handle_404() { public function handle_404() {
global $wp_query; global $wp_query, $wp;
// If we've already issued a 404, bail. // If we've already issued a 404, bail.
if ( is_404() ) if ( is_404() )
@ -596,17 +596,27 @@ class WP {
// Never 404 for the admin, robots, or if we found posts. // Never 404 for the admin, robots, or if we found posts.
if ( is_admin() || is_robots() || $wp_query->posts ) { if ( is_admin() || is_robots() || $wp_query->posts ) {
// Only set X-Pingback for single posts. $success = true;
if ( is_singular() ) { if ( is_singular() ) {
$p = clone $wp_query->post; $p = clone $wp_query->post;
// Only set X-Pingback for single posts that allow pings.
if ( $p && pings_open( $p ) ) { if ( $p && pings_open( $p ) ) {
@header( 'X-Pingback: ' . get_bloginfo( 'pingback_url' ) ); @header( 'X-Pingback: ' . get_bloginfo( 'pingback_url' ) );
} }
// check for paged content that exceeds the max number of pages
$next = '<!--nextpage-->';
if ( $p && false !== strpos( $p->post_content, $next ) && ! empty( $wp->query_vars['page'] ) ) {
$page = trim( $wp->query_vars['page'], '/' );
$success = (int) $page <= ( substr_count( $p->post_content, $next ) + 1 );
}
} }
if ( $success ) {
status_header( 200 ); status_header( 200 );
return; return;
} }
}
// We will 404 for paged queries, as no posts were found. // We will 404 for paged queries, as no posts were found.
if ( ! is_paged() ) { if ( ! is_paged() ) {

View File

@ -256,6 +256,8 @@ function wp_resolve_numeric_slug_conflicts( $query_vars = array() ) {
} elseif ( 'monthnum' === $compare && isset( $query_vars['day'] ) ) { } elseif ( 'monthnum' === $compare && isset( $query_vars['day'] ) ) {
$maybe_page = $query_vars['day']; $maybe_page = $query_vars['day'];
} }
// Bug found in #11694 - 'page' was returning '/4'
$maybe_page = (int) trim( $maybe_page, '/' );
$post_page_count = substr_count( $post->post_content, '<!--nextpage-->' ) + 1; $post_page_count = substr_count( $post->post_content, '<!--nextpage-->' ) + 1;

View File

@ -96,7 +96,7 @@ class Tests_Canonical extends WP_Canonical_UnitTestCase {
array( '/2010/post-format-test-au/', '/2008/06/02/post-format-test-audio/'), // A Year the post is not in array( '/2010/post-format-test-au/', '/2008/06/02/post-format-test-audio/'), // A Year the post is not in
array( '/post-format-test-au/', '/2008/06/02/post-format-test-audio/'), array( '/post-format-test-au/', '/2008/06/02/post-format-test-audio/'),
array( '/2008/09/03/images-test/3/', array( 'url' => '/2008/09/03/images-test/3/', 'qv' => array( 'name' => 'images-test', 'year' => '2008', 'monthnum' => '09', 'day' => '03', 'page' => '/3' ) ) ), // page = /3 ?! array( '/2008/09/03/images-test/3/', array( 'url' => '/2008/09/03/images-test/3/', 'qv' => array( 'name' => 'images-test', 'year' => '2008', 'monthnum' => '09', 'day' => '03', 'page' => '3' ) ) ),
array( '/2008/09/03/images-test/?page=3', '/2008/09/03/images-test/3/' ), array( '/2008/09/03/images-test/?page=3', '/2008/09/03/images-test/3/' ),
array( '/2008/09/03/images-te?page=3', '/2008/09/03/images-test/3/' ), array( '/2008/09/03/images-te?page=3', '/2008/09/03/images-test/3/' ),

View File

@ -0,0 +1,25 @@
<?php
/**
* @group canonical
* @group rewrite
* @group query
*/
class Tests_Canonical_Paged extends WP_Canonical_UnitTestCase {
function test_nextpage() {
$para = 'This is a paragraph.
This is a paragraph.
This is a paragraph.';
$next = '<!--nextpage-->';
$post_id = $this->factory->post->create( array(
'post_status' => 'publish',
'post_content' => "{$para}{$next}{$para}{$next}{$para}"
) );
$link = parse_url( get_permalink( $post_id ), PHP_URL_PATH );
$paged = $link . '4/';
$this->assertCanonical( $paged, $link );
}
}