From 8060823ff316fa52ff2bd30b7046092422ebfb49 Mon Sep 17 00:00:00 2001 From: Sergey Biryukov Date: Thu, 9 Mar 2017 20:04:17 +0000 Subject: [PATCH] Canonical: Strip trailing punctuation from permalinks. Props joostdevalk, lancewillett, SergeyBiryukov. Fixes #20383. git-svn-id: https://develop.svn.wordpress.org/trunk@40256 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/canonical.php | 24 +++++++-- tests/phpunit/tests/canonical/noRewrite.php | 58 +++++++++++++++++++-- 2 files changed, 75 insertions(+), 7 deletions(-) diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index 11c513d31a..2de5486430 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -391,12 +391,28 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { // trailing /index.php $redirect['path'] = preg_replace('|/' . preg_quote( $wp_rewrite->index, '|' ) . '/*?$|', '/', $redirect['path']); - // Remove trailing spaces from the path - $redirect['path'] = preg_replace( '#(%20| )+$#', '', $redirect['path'] ); + $punctuation_pattern = implode( '|', array_map( 'preg_quote', array( + ' ', '%20', // space + '!', '%21', // exclamation mark + '"', '%22', // double quote + "'", '%27', // single quote + '(', '%28', // opening bracket + ')', '%29', // closing bracket + ',', '%2C', // comma + '.', '%2E', // period + ';', '%3B', // semicolon + '{', '%7B', // opening curly bracket + '}', '%7D', // closing curly bracket + '“', '%E2%80%9C', // opening curly quote + '”', '%E2%80%9D', // closing curly quote + ) ) ); + + // Remove trailing spaces and end punctuation from the path. + $redirect['path'] = preg_replace( "#($punctuation_pattern)+$#", '', $redirect['path'] ); if ( !empty( $redirect['query'] ) ) { - // Remove trailing spaces from certain terminating query string args - $redirect['query'] = preg_replace( '#((p|page_id|cat|tag)=[^&]*?)(%20| )+$#', '$1', $redirect['query'] ); + // Remove trailing spaces and end punctuation from certain terminating query string args. + $redirect['query'] = preg_replace( "#((p|page_id|cat|tag)=[^&]*?)($punctuation_pattern)+$#", '$1', $redirect['query'] ); // Clean up empty query strings $redirect['query'] = trim(preg_replace( '#(^|&)(p|page_id|cat|tag)=?(&|$)#', '&', $redirect['query']), '&'); diff --git a/tests/phpunit/tests/canonical/noRewrite.php b/tests/phpunit/tests/canonical/noRewrite.php index d3b0d63949..a99979b593 100644 --- a/tests/phpunit/tests/canonical/noRewrite.php +++ b/tests/phpunit/tests/canonical/noRewrite.php @@ -46,15 +46,67 @@ class Tests_Canonical_NoRewrite extends WP_Canonical_UnitTestCase { // Strip an existing but incorrect post_type arg array( '/?post_type=page&page_id=1', '/?p=1' ), - array( '/?p=358 ', array('url' => '/?p=358', 'qv' => array('p' => '358') ) ), // Trailing spaces - array( '/?p=358%20', array('url' => '/?p=358', 'qv' => array('p' => '358') ) ), + // Trailing spaces and punctuation in query string args. + array( '/?p=358 ', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // space + array( '/?p=358%20', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded space + array( '/?p=358!', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // exclamation mark + array( '/?p=358%21', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded exclamation mark + array( '/?p=358"', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // double quote + array( '/?p=358%22', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded double quote + array( '/?p=358\'', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // single quote + array( '/?p=358%27', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded single quote + array( '/?p=358(', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // opening bracket + array( '/?p=358%28', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded opening bracket + array( '/?p=358)', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // closing bracket + array( '/?p=358%29', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded closing bracket + array( '/?p=358,', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // comma + array( '/?p=358%2C', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded comma + array( '/?p=358.', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // period + array( '/?p=358%2E', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded period + array( '/?p=358;', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // semicolon + array( '/?p=358%3B', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded semicolon + array( '/?p=358{', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // opening curly bracket + array( '/?p=358%7B', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded opening curly bracket + array( '/?p=358}', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // closing curly bracket + array( '/?p=358%7D', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded closing curly bracket + array( '/?p=358“', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // opening curly quote + array( '/?p=358%E2%80%9C', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded opening curly quote + array( '/?p=358”', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // closing curly quote + array( '/?p=358%E2%80%9D', array( 'url' => '/?p=358', 'qv' => array( 'p' => '358' ) ), 20383 ), // encoded closing curly quote + + // Trailing spaces and punctuation in permalinks. + array( '/page/2/ ', '/page/2/', 20383 ), // space + array( '/page/2/%20', '/page/2/', 20383 ), // encoded space + array( '/page/2/!', '/page/2/', 20383 ), // exclamation mark + array( '/page/2/%21', '/page/2/', 20383 ), // encoded exclamation mark + array( '/page/2/"', '/page/2/', 20383 ), // double quote + array( '/page/2/%22', '/page/2/', 20383 ), // encoded double quote + array( '/page/2/\'', '/page/2/', 20383 ), // single quote + array( '/page/2/%27', '/page/2/', 20383 ), // encoded single quote + array( '/page/2/(', '/page/2/', 20383 ), // opening bracket + array( '/page/2/%28', '/page/2/', 20383 ), // encoded opening bracket + array( '/page/2/)', '/page/2/', 20383 ), // closing bracket + array( '/page/2/%29', '/page/2/', 20383 ), // encoded closing bracket + array( '/page/2/,', '/page/2/', 20383 ), // comma + array( '/page/2/%2C', '/page/2/', 20383 ), // encoded comma + array( '/page/2/.', '/page/2/', 20383 ), // period + array( '/page/2/%2E', '/page/2/', 20383 ), // encoded period + array( '/page/2/;', '/page/2/', 20383 ), // semicolon + array( '/page/2/%3B', '/page/2/', 20383 ), // encoded semicolon + array( '/page/2/{', '/page/2/', 20383 ), // opening curly bracket + array( '/page/2/%7B', '/page/2/', 20383 ), // encoded opening curly bracket + array( '/page/2/}', '/page/2/', 20383 ), // closing curly bracket + array( '/page/2/%7D', '/page/2/', 20383 ), // encoded closing curly bracket + array( '/page/2/“', '/page/2/', 20383 ), // opening curly quote + array( '/page/2/%E2%80%9C', '/page/2/', 20383 ), // encoded opening curly quote + array( '/page/2/”', '/page/2/', 20383 ), // closing curly quote + array( '/page/2/%E2%80%9D', '/page/2/', 20383 ), // encoded closing curly quote array( '/?page_id=1', '/?p=1' ), // redirect page_id to p (should cover page_id|p|attachment_id to one another array( '/?page_id=1&post_type=revision', '/?p=1' ), array( '/?feed=rss2&p=1', '/?feed=rss2&p=1', 21841 ), array( '/?feed=rss&p=1', '/?feed=rss2&p=1', 24623 ), - ); } }