From b58973554da40b4965458d993a4703ec81e7ad28 Mon Sep 17 00:00:00 2001 From: Pascal Birchler Date: Wed, 17 Jun 2020 15:22:49 +0000 Subject: [PATCH] Sitemaps: Add XML sitemaps functionality to WordPress. While web crawlers are able to discover pages from links within the site and from other sites, XML sitemaps supplement this approach by allowing crawlers to quickly and comprehensively identify all URLs included in the sitemap and learn other signals about those URLs using the associated metadata. See https://make.wordpress.org/core/2020/06/10/merge-announcement-extensible-core-sitemaps/ for more details. This feature exposes the sitemap index via `/wp-sitemap.xml` and exposes a variety of new filters and hooks for developers to modify the behavior. Users can disable sitemaps completely by turning off search engine visibility in WordPress admin. This change also introduces a new `esc_xml()` function to escape strings for output in XML, as well as XML support to `wp_kses_normalize_entities()`. Props Adrian McShane, afragen, adamsilverstein, casiepa, flixos90, garrett-eclipse, joemcgill, kburgoine, kraftbj, milana_cap, pacifika, pbiron, pfefferle, Ruxandra Gradina, swissspidy, szepeviktor, tangrufus, tweetythierry. Fixes #50117. See #3670. See #19998. git-svn-id: https://develop.svn.wordpress.org/trunk@48072 602fd350-edb4-49c9-b593-d223f7449a82 --- phpcs.xml.dist | 1 + src/wp-includes/canonical.php | 10 + src/wp-includes/default-filters.php | 3 + src/wp-includes/formatting.php | 69 ++- src/wp-includes/kses.php | 65 ++- src/wp-includes/sitemaps.php | 119 ++++++ .../sitemaps/class-wp-sitemaps-index.php | 82 ++++ .../sitemaps/class-wp-sitemaps-provider.php | 190 +++++++++ .../sitemaps/class-wp-sitemaps-registry.php | 87 ++++ .../sitemaps/class-wp-sitemaps-renderer.php | 269 ++++++++++++ .../sitemaps/class-wp-sitemaps-stylesheet.php | 288 +++++++++++++ .../sitemaps/class-wp-sitemaps.php | 235 +++++++++++ .../providers/class-wp-sitemaps-posts.php | 221 ++++++++++ .../class-wp-sitemaps-taxonomies.php | 193 +++++++++ .../providers/class-wp-sitemaps-users.php | 163 +++++++ src/wp-settings.php | 10 + tests/phpunit/includes/bootstrap.php | 3 + .../class-wp-sitemaps-empty-test-provider.php | 38 ++ .../class-wp-sitemaps-test-provider.php | 52 +++ tests/phpunit/includes/normalize-xml.xsl | 76 ++++ tests/phpunit/includes/testcase-xml.php | 92 ++++ tests/phpunit/tests/canonical/sitemaps.php | 41 ++ tests/phpunit/tests/formatting/EscXml.php | 135 ++++++ tests/phpunit/tests/sitemaps/functions.php | 61 +++ .../phpunit/tests/sitemaps/sitemaps-index.php | 51 +++ .../phpunit/tests/sitemaps/sitemaps-posts.php | 49 +++ .../tests/sitemaps/sitemaps-registry.php | 33 ++ .../tests/sitemaps/sitemaps-renderer.php | 283 +++++++++++++ .../tests/sitemaps/sitemaps-stylesheet.php | 42 ++ .../tests/sitemaps/sitemaps-taxonomies.php | 192 +++++++++ .../phpunit/tests/sitemaps/sitemaps-users.php | 57 +++ tests/phpunit/tests/sitemaps/sitemaps.php | 396 ++++++++++++++++++ 32 files changed, 3598 insertions(+), 8 deletions(-) create mode 100644 src/wp-includes/sitemaps.php create mode 100644 src/wp-includes/sitemaps/class-wp-sitemaps-index.php create mode 100644 src/wp-includes/sitemaps/class-wp-sitemaps-provider.php create mode 100644 src/wp-includes/sitemaps/class-wp-sitemaps-registry.php create mode 100644 src/wp-includes/sitemaps/class-wp-sitemaps-renderer.php create mode 100644 src/wp-includes/sitemaps/class-wp-sitemaps-stylesheet.php create mode 100644 src/wp-includes/sitemaps/class-wp-sitemaps.php create mode 100644 src/wp-includes/sitemaps/providers/class-wp-sitemaps-posts.php create mode 100644 src/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php create mode 100644 src/wp-includes/sitemaps/providers/class-wp-sitemaps-users.php create mode 100644 tests/phpunit/includes/class-wp-sitemaps-empty-test-provider.php create mode 100644 tests/phpunit/includes/class-wp-sitemaps-test-provider.php create mode 100644 tests/phpunit/includes/normalize-xml.xsl create mode 100644 tests/phpunit/includes/testcase-xml.php create mode 100644 tests/phpunit/tests/canonical/sitemaps.php create mode 100644 tests/phpunit/tests/formatting/EscXml.php create mode 100644 tests/phpunit/tests/sitemaps/functions.php create mode 100644 tests/phpunit/tests/sitemaps/sitemaps-index.php create mode 100644 tests/phpunit/tests/sitemaps/sitemaps-posts.php create mode 100644 tests/phpunit/tests/sitemaps/sitemaps-registry.php create mode 100644 tests/phpunit/tests/sitemaps/sitemaps-renderer.php create mode 100644 tests/phpunit/tests/sitemaps/sitemaps-stylesheet.php create mode 100644 tests/phpunit/tests/sitemaps/sitemaps-taxonomies.php create mode 100644 tests/phpunit/tests/sitemaps/sitemaps-users.php create mode 100644 tests/phpunit/tests/sitemaps/sitemaps.php diff --git a/phpcs.xml.dist b/phpcs.xml.dist index 55924b04cd..7b804d3adb 100644 --- a/phpcs.xml.dist +++ b/phpcs.xml.dist @@ -247,6 +247,7 @@ + diff --git a/src/wp-includes/canonical.php b/src/wp-includes/canonical.php index 42d6dcf44a..befe58e684 100644 --- a/src/wp-includes/canonical.php +++ b/src/wp-includes/canonical.php @@ -509,6 +509,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { $redirect['path'] = trailingslashit( $redirect['path'] ) . $addl_path; } + // Remove trailing slash for sitemaps requests. + if ( ! empty( get_query_var( 'sitemap' ) ) ) { + $redirect['path'] = untrailingslashit( $redirect['path'] ); + } + $redirect_url = $redirect['scheme'] . '://' . $redirect['host'] . $redirect['path']; } @@ -651,6 +656,11 @@ function redirect_canonical( $requested_url = null, $do_redirect = true ) { $redirect['path'] = trailingslashit( $redirect['path'] ); } + // Remove trailing slash for sitemaps requests. + if ( ! empty( get_query_var( 'sitemap' ) ) || ! empty( get_query_var( 'sitemap-stylesheet' ) ) ) { + $redirect['path'] = untrailingslashit( $redirect['path'] ); + } + // Strip multiple slashes out of the URL. if ( strpos( $redirect['path'], '//' ) > -1 ) { $redirect['path'] = preg_replace( '|/+|', '/', $redirect['path'] ); diff --git a/src/wp-includes/default-filters.php b/src/wp-includes/default-filters.php index 43faaf6507..c9024e7ae0 100644 --- a/src/wp-includes/default-filters.php +++ b/src/wp-includes/default-filters.php @@ -456,6 +456,9 @@ add_action( 'rest_api_init', 'register_initial_settings', 10 ); add_action( 'rest_api_init', 'create_initial_rest_routes', 99 ); add_action( 'parse_request', 'rest_api_loaded' ); +// Sitemaps actions. +add_action( 'init', 'wp_sitemaps_get_server' ); + /** * Filters formerly mixed into wp-includes. */ diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index a5c52b0e6c..843dc897b9 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -935,6 +935,7 @@ function seems_utf8( $str ) { * ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. * * @since 1.2.2 + * @since 5.5.0 `$quote_style` also accepts '`ENT_XML1`. * @access private * * @staticvar string $_charset @@ -942,7 +943,10 @@ function seems_utf8( $str ) { * @param string $string The text which is to be encoded. * @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT, * both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. - * Also compatible with old values; converting single quotes if set to 'single', + * Converts single and double quotes, as well as converting HTML + * named entities (that are not also XML named entities) to their + * code points if set to ENT_XML1. Also compatible with old values; + * converting single quotes if set to 'single', * double if set to 'double' or both if otherwise set. * Default is ENT_NOQUOTES. * @param false|string $charset Optional. The character encoding of the string. Default is false. @@ -964,7 +968,9 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals // Account for the previous behaviour of the function when the $quote_style is not an accepted value. if ( empty( $quote_style ) ) { $quote_style = ENT_NOQUOTES; - } elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { + } elseif ( ENT_XML1 === $quote_style ) { + $quote_style = ENT_QUOTES | ENT_XML1; + } elseif ( ! in_array( $quote_style, array( ENT_NOQUOTES, ENT_COMPAT, ENT_QUOTES, 'single', 'double' ), true ) ) { $quote_style = ENT_QUOTES; } @@ -994,7 +1000,7 @@ function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = fals if ( ! $double_encode ) { // Guarantee every &entity; is valid, convert &garbage; into &garbage; // This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable. - $string = wp_kses_normalize_entities( $string ); + $string = wp_kses_normalize_entities( $string, ( $quote_style & ENT_XML1 ) ? 'xml' : 'html' ); } $string = htmlspecialchars( $string, $quote_style, $charset, $double_encode ); @@ -4536,6 +4542,63 @@ function esc_textarea( $text ) { return apply_filters( 'esc_textarea', $safe_text, $text ); } +/** + * Escaping for XML blocks. + * + * @since 5.5.0 + * + * @param string $text Text to escape. + * @return string Escaped text. + */ +function esc_xml( $text ) { + $safe_text = wp_check_invalid_utf8( $text ); + + $cdata_regex = '\<\!\[CDATA\[.*?\]\]\>'; + $regex = <<(.*?)) # the "anything" matched by the lookahead + (?({$cdata_regex})) # the CDATA Section matched by the lookahead + +| # alternative + + (?(.*)) # non-CDATA Section +/sx +EOF; + + $safe_text = (string) preg_replace_callback( + $regex, + static function( $matches ) { + if ( ! $matches[0] ) { + return ''; + } + + if ( ! empty( $matches['non_cdata'] ) ) { + // escape HTML entities in the non-CDATA Section. + return _wp_specialchars( $matches['non_cdata'], ENT_XML1 ); + } + + // Return the CDATA Section unchanged, escape HTML entities in the rest. + return _wp_specialchars( $matches['non_cdata_followed_by_cdata'], ENT_XML1 ) . $matches['cdata']; + }, + $safe_text + ); + + /** + * Filters a string cleaned and escaped for output in XML. + * + * Text passed to esc_xml() is stripped of invalid or special characters + * before output. HTML named character references are converted to their + * equivalent code points. + * + * @since 5.5.0 + * + * @param string $safe_text The text after it has been escaped. + * @param string $text The text prior to being escaped. + */ + return apply_filters( 'esc_xml', $safe_text, $text ); +} + /** * Escape an HTML tag name. * diff --git a/src/wp-includes/kses.php b/src/wp-includes/kses.php index 9c9d094d46..703d7c0002 100644 --- a/src/wp-includes/kses.php +++ b/src/wp-includes/kses.php @@ -47,7 +47,7 @@ if ( ! defined( 'CUSTOM_TAGS' ) ) { // Ensure that these variables are added to the global namespace // (e.g. if using namespaces / autoload in the current PHP environment). -global $allowedposttags, $allowedtags, $allowedentitynames; +global $allowedposttags, $allowedtags, $allowedentitynames, $allowedxmlentitynames; if ( ! CUSTOM_TAGS ) { /** @@ -704,6 +704,18 @@ if ( ! CUSTOM_TAGS ) { 'there4', ); + /** + * @var string[] $allowedxmlentitynames Array of KSES allowed XML entitity names. + * @since 5.5.0 + */ + $allowedxmlnamedentities = array( + 'amp', + 'lt', + 'gt', + 'apos', + 'quot', + ); + $allowedposttags = array_map( '_wp_add_global_attributes', $allowedposttags ); } else { $allowedtags = wp_kses_array_lc( $allowedtags ); @@ -1745,17 +1757,27 @@ function wp_kses_bad_protocol_once2( $string, $allowed_protocols ) { * This function normalizes HTML entities. It will convert `AT&T` to the correct * `AT&T`, `:` to `:`, `&#XYZZY;` to `&#XYZZY;` and so on. * - * @since 1.0.0 + * When `$context` is set to 'xml', HTML entities are converted to their code points. For + * example, `AT&T…&#XYZZY;` is converted to `AT&T…&#XYZZY;`. * - * @param string $string Content to normalize entities. + * @since 1.0.0 + * @since 5.5.0 Added `$context` parameter. + * + * @param string $string Content to normalize entities. + * @param string $context Context for normalization. Can be either 'html' or 'xml'. + * Default 'html'. * @return string Content with normalized entities. */ -function wp_kses_normalize_entities( $string ) { +function wp_kses_normalize_entities( $string, $context = 'html' ) { // Disarm all entities by converting & to & $string = str_replace( '&', '&', $string ); // Change back the allowed entities in our entity whitelist. - $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string ); + if ( 'xml' === $context ) { + $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_xml_named_entities', $string ); + } else { + $string = preg_replace_callback( '/&([A-Za-z]{2,8}[0-9]{0,2});/', 'wp_kses_named_entities', $string ); + } $string = preg_replace_callback( '/&#(0*[0-9]{1,7});/', 'wp_kses_normalize_entities2', $string ); $string = preg_replace_callback( '/&#[Xx](0*[0-9A-Fa-f]{1,6});/', 'wp_kses_normalize_entities3', $string ); @@ -1786,6 +1808,39 @@ function wp_kses_named_entities( $matches ) { return ( ! in_array( $i, $allowedentitynames, true ) ) ? "&$i;" : "&$i;"; } +/** + * Callback for `wp_kses_normalize_entities()` regular expression. + * + * This function only accepts valid named entity references, which are finite, + * case-sensitive, and highly scrutinized by XML validators. HTML named entity + * references are converted to their code points. + * + * @since 5.5.0 + * + * @global array $allowedentitynames + * @global array $allowedxmlnamedentities + * + * @param array $matches preg_replace_callback() matches array. + * @return string Correctly encoded entity. + */ +function wp_kses_xml_named_entities( $matches ) { + global $allowedentitynames, $allowedxmlnamedentities; + + if ( empty( $matches[1] ) ) { + return ''; + } + + $i = $matches[1]; + + if ( in_array( $i, $allowedxmlnamedentities, true ) ) { + return "&$i;"; + } elseif ( in_array( $i, $allowedentitynames, true ) ) { + return html_entity_decode( "&$i;", ENT_HTML5 ); + } + + return "&$i;"; +} + /** * Callback for `wp_kses_normalize_entities()` regular expression. * diff --git a/src/wp-includes/sitemaps.php b/src/wp-includes/sitemaps.php new file mode 100644 index 0000000000..6c92763270 --- /dev/null +++ b/src/wp-includes/sitemaps.php @@ -0,0 +1,119 @@ +init(); + + /** + * Fires when initializing the Sitemaps object. + * + * Additional sitemaps should be registered on this hook. + * + * @since 5.5.0 + * + * @param WP_Sitemaps $sitemaps Server object. + */ + do_action( 'wp_sitemaps_init', $wp_sitemaps ); + } + + return $wp_sitemaps; +} + +/** + * Gets a list of sitemap providers. + * + * @since 5.5.0 + * + * @return array $sitemaps A list of registered sitemap providers. + */ +function wp_get_sitemaps() { + $sitemaps = wp_sitemaps_get_server(); + + if ( ! $sitemaps ) { + return array(); + } + + return $sitemaps->registry->get_sitemaps(); +} + +/** + * Registers a new sitemap provider. + * + * @since 5.5.0 + * + * @param string $name Unique name for the sitemap provider. + * @param WP_Sitemaps_Provider $provider The `Sitemaps_Provider` instance implementing the sitemap. + * @return bool Returns true if the sitemap was added. False on failure. + */ +function wp_register_sitemap( $name, WP_Sitemaps_Provider $provider ) { + $sitemaps = wp_sitemaps_get_server(); + + if ( ! $sitemaps ) { + return false; + } + + return $sitemaps->registry->add_sitemap( $name, $provider ); +} + +/** + * Gets the maximum number of URLs for a sitemap. + * + * @since 5.5.0 + * + * @param string $object_type Object type for sitemap to be filtered (e.g. 'post', 'term', 'user'). + * @return int The maximum number of URLs. + */ +function wp_sitemaps_get_max_urls( $object_type ) { + /** + * Filters the maximum number of URLs displayed on a sitemap. + * + * @since 5.5.0 + * + * @param int $max_urls The maximum number of URLs included in a sitemap. Default 2000. + * @param string $object_type Object type for sitemap to be filtered (e.g. 'post', 'term', 'user'). + */ + return apply_filters( 'wp_sitemaps_max_urls', 2000, $object_type ); +} diff --git a/src/wp-includes/sitemaps/class-wp-sitemaps-index.php b/src/wp-includes/sitemaps/class-wp-sitemaps-index.php new file mode 100644 index 0000000000..abaf9e539c --- /dev/null +++ b/src/wp-includes/sitemaps/class-wp-sitemaps-index.php @@ -0,0 +1,82 @@ +registry = $registry; + } + + /** + * Gets a sitemap list for the index. + * + * @since 5.5.0 + * + * @return array List of all sitemaps. + */ + public function get_sitemap_list() { + $sitemaps = array(); + + $providers = $this->registry->get_sitemaps(); + /* @var WP_Sitemaps_Provider $provider */ + foreach ( $providers as $provider ) { + $sitemap_entries = $provider->get_sitemap_entries(); + + // Prevent issues with array_push and empty arrays on PHP < 7.3. + if ( ! $sitemap_entries ) { + continue; + } + + // Using array_push is more efficient than array_merge in a loop. + array_push( $sitemaps, ...$sitemap_entries ); + } + + return $sitemaps; + } + + /** + * Builds the URL for the sitemap index. + * + * @since 5.5.0 + * + * @return string The sitemap index url. + */ + public function get_index_url() { + /* @var WP_Rewrite $wp_rewrite */ + global $wp_rewrite; + + if ( ! $wp_rewrite->using_permalinks() ) { + return add_query_arg( 'sitemap', 'index', home_url( '/' ) ); + } + + return home_url( '/wp-sitemap.xml' ); + } +} diff --git a/src/wp-includes/sitemaps/class-wp-sitemaps-provider.php b/src/wp-includes/sitemaps/class-wp-sitemaps-provider.php new file mode 100644 index 0000000000..f89d9c9d33 --- /dev/null +++ b/src/wp-includes/sitemaps/class-wp-sitemaps-provider.php @@ -0,0 +1,190 @@ +get_object_subtypes(); + + // If there are no object subtypes, include a single sitemap for the + // entire object type. + if ( empty( $object_subtypes ) ) { + $sitemap_data[] = array( + 'name' => '', + 'pages' => $this->get_max_num_pages(), + ); + return $sitemap_data; + } + + // Otherwise, include individual sitemaps for every object subtype. + foreach ( $object_subtypes as $object_subtype_name => $data ) { + $object_subtype_name = (string) $object_subtype_name; + + $sitemap_data[] = array( + 'name' => $object_subtype_name, + 'pages' => $this->get_max_num_pages( $object_subtype_name ), + ); + } + + return $sitemap_data; + } + + /** + * Lists sitemap pages exposed by this provider. + * + * The returned data is used to populate the sitemap entries of the index. + * + * @since 5.5.0 + * + * @return array List of sitemaps. + */ + public function get_sitemap_entries() { + $sitemaps = array(); + + $sitemap_types = $this->get_sitemap_type_data(); + + foreach ( $sitemap_types as $type ) { + for ( $page = 1; $page <= $type['pages']; $page ++ ) { + $loc = $this->get_sitemap_url( $type['name'], $page ); + $sitemap_entry = array( + 'loc' => $this->get_sitemap_url( $type['name'], $page ), + ); + + /** + * Filters the sitemap entry for the sitemap index. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the post. + * @param string $object_type Object empty name. + * @param string $object_subtype Object subtype name. + * Empty string if the object type does not support subtypes. + * @param string $page Page of results. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_index_entry', $sitemap_entry, $this->object_type, $type['name'], $page ); + + $sitemaps[] = $sitemap_entry; + } + } + + return $sitemaps; + } + + /** + * Gets the URL of a sitemap entry. + * + * @since 5.5.0 + * + * @param string $name The name of the sitemap. + * @param int $page The page of the sitemap. + * @return string The composed URL for a sitemap entry. + */ + public function get_sitemap_url( $name, $page ) { + /* @var WP_Rewrite $wp_rewrite */ + global $wp_rewrite; + + if ( ! $wp_rewrite->using_permalinks() ) { + return add_query_arg( + // Accounts for cases where name is not included, ex: sitemaps-users-1.xml. + array_filter( + array( + 'sitemap' => $this->name, + 'sitemap-subtype' => $name, + 'paged' => $page, + ) + ), + home_url( '/' ) + ); + } + + $basename = sprintf( + '/wp-sitemap-%1$s.xml', + implode( + '-', + // Accounts for cases where name is not included, ex: sitemaps-users-1.xml. + array_filter( + array( + $this->name, + $name, + (string) $page, + ) + ) + ) + ); + + return home_url( $basename ); + } + + /** + * Returns the list of supported object subtypes exposed by the provider. + * + * @since 5.5.0 + * + * @return array List of object subtypes objects keyed by their name. + */ + public function get_object_subtypes() { + return array(); + } +} diff --git a/src/wp-includes/sitemaps/class-wp-sitemaps-registry.php b/src/wp-includes/sitemaps/class-wp-sitemaps-registry.php new file mode 100644 index 0000000000..fe1bad398b --- /dev/null +++ b/src/wp-includes/sitemaps/class-wp-sitemaps-registry.php @@ -0,0 +1,87 @@ +sitemaps[ $name ] ) ) { + return false; + } + + $this->sitemaps[ $name ] = $provider; + + return true; + } + + /** + * Returns a single registered sitemaps provider. + * + * @since 5.5.0 + * + * @param string $name Sitemap provider name. + * @return WP_Sitemaps_Provider|null Sitemaps provider if it exists, null otherwise. + */ + public function get_sitemap( $name ) { + if ( ! isset( $this->sitemaps[ $name ] ) ) { + return null; + } + + return $this->sitemaps[ $name ]; + } + + /** + * Lists all registered sitemaps. + * + * @since 5.5.0 + * + * @return array List of sitemaps. + */ + public function get_sitemaps() { + $total_sitemaps = count( $this->sitemaps ); + + if ( $total_sitemaps > $this->max_sitemaps ) { + return array_slice( $this->sitemaps, 0, $this->max_sitemaps, true ); + } + + return $this->sitemaps; + } +} diff --git a/src/wp-includes/sitemaps/class-wp-sitemaps-renderer.php b/src/wp-includes/sitemaps/class-wp-sitemaps-renderer.php new file mode 100644 index 0000000000..860f74242d --- /dev/null +++ b/src/wp-includes/sitemaps/class-wp-sitemaps-renderer.php @@ -0,0 +1,269 @@ +get_sitemap_stylesheet_url(); + if ( $stylesheet_url ) { + $this->stylesheet = ''; + } + $stylesheet_index_url = $this->get_sitemap_index_stylesheet_url(); + if ( $stylesheet_index_url ) { + $this->stylesheet_index = ''; + } + } + + /** + * Gets the URL for the sitemap stylesheet. + * + * @since 5.5.0 + * + * @return string The sitemap stylesheet url. + */ + public function get_sitemap_stylesheet_url() { + /* @var WP_Rewrite $wp_rewrite */ + global $wp_rewrite; + + $sitemap_url = home_url( '/wp-sitemap.xsl' ); + + if ( ! $wp_rewrite->using_permalinks() ) { + $sitemap_url = add_query_arg( 'sitemap-stylesheet', 'sitemap', home_url( '/' ) ); + } + + /** + * Filters the URL for the sitemap stylesheet. + * + * If a falsy value is returned, no stylesheet will be used and + * the "raw" XML of the sitemap will be displayed. + * + * @since 5.5.0 + * + * @param string $sitemap_url Full URL for the sitemaps xsl file. + */ + return apply_filters( 'wp_sitemaps_stylesheet_url', $sitemap_url ); + } + + /** + * Gets the URL for the sitemap index stylesheet. + * + * @since 5.5.0 + * + * @return string The sitemap index stylesheet url. + */ + public function get_sitemap_index_stylesheet_url() { + /* @var WP_Rewrite $wp_rewrite */ + global $wp_rewrite; + + $sitemap_url = home_url( '/wp-sitemap-index.xsl' ); + + if ( ! $wp_rewrite->using_permalinks() ) { + $sitemap_url = add_query_arg( 'sitemap-stylesheet', 'index', home_url( '/' ) ); + } + + /** + * Filters the URL for the sitemap index stylesheet. + * + * If a falsy value is returned, no stylesheet will be used and + * the "raw" XML of the sitemap index will be displayed. + * + * @since 5.5.0 + * + * @param string $sitemap_url Full URL for the sitemaps index xsl file. + */ + return apply_filters( 'wp_sitemaps_stylesheet_index_url', $sitemap_url ); + } + + /** + * Renders a sitemap index. + * + * @since 5.5.0 + * + * @param array $sitemaps Array of sitemap URLs. + */ + public function render_index( $sitemaps ) { + header( 'Content-type: application/xml; charset=UTF-8' ); + + $this->check_for_simple_xml_availability(); + + $index_xml = $this->get_sitemap_index_xml( $sitemaps ); + + if ( ! empty( $index_xml ) ) { + // All output is escaped within get_sitemap_index_xml(). + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped + echo $index_xml; + } + } + + /** + * Gets XML for a sitemap index. + * + * @since 5.5.0 + * + * @param array $sitemaps Array of sitemap URLs. + * @return string|false A well-formed XML string for a sitemap index. False on error. + */ + public function get_sitemap_index_xml( $sitemaps ) { + $sitemap_index = new SimpleXMLElement( + sprintf( + '%1$s%2$s%3$s', + '', + $this->stylesheet_index, + '' + ) + ); + + foreach ( $sitemaps as $entry ) { + $sitemap = $sitemap_index->addChild( 'sitemap' ); + + // Add each element as a child node to the entry. + foreach ( $entry as $name => $value ) { + if ( 'loc' === $name ) { + $sitemap->addChild( $name, esc_url( $value ) ); + } elseif ( 'lastmod' === $name ) { + $sitemap->addChild( $name, esc_xml( $value ) ); + } else { + _doing_it_wrong( + __METHOD__, + /* translators: %s: list of element names */ + sprintf( + __( 'Fields other than %s are not currently supported for the sitemap index.' ), + implode( ',', array( 'loc', 'lastmod' ) ) + ), + '5.5.0' + ); + } + } + } + + return $sitemap_index->asXML(); + } + + /** + * Renders a sitemap. + * + * @since 5.5.0 + * + * @param array $url_list Array of URLs for a sitemap. + */ + public function render_sitemap( $url_list ) { + header( 'Content-type: application/xml; charset=UTF-8' ); + + $this->check_for_simple_xml_availability(); + + $sitemap_xml = $this->get_sitemap_xml( $url_list ); + + if ( ! empty( $sitemap_xml ) ) { + // All output is escaped within get_sitemap_xml(). + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped + echo $sitemap_xml; + } + } + + /** + * Gets XML for a sitemap. + * + * @since 5.5.0 + * + * @param array $url_list Array of URLs for a sitemap. + * @return string|false A well-formed XML string for a sitemap index. False on error. + */ + public function get_sitemap_xml( $url_list ) { + $urlset = new SimpleXMLElement( + sprintf( + '%1$s%2$s%3$s', + '', + $this->stylesheet, + '' + ) + ); + + foreach ( $url_list as $url_item ) { + $url = $urlset->addChild( 'url' ); + + // Add each element as a child node to the entry. + foreach ( $url_item as $name => $value ) { + if ( 'loc' === $name ) { + $url->addChild( $name, esc_url( $value ) ); + } elseif ( in_array( $name, array( 'lastmod', 'changefreq', 'priority' ), true ) ) { + $url->addChild( $name, esc_xml( $value ) ); + } else { + _doing_it_wrong( + __METHOD__, + /* translators: %s: list of element names */ + sprintf( + __( 'Fields other than %s are not currently supported for sitemaps.' ), + implode( ',', array( 'loc', 'lastmod', 'changefreq', 'priority' ) ) + ), + '5.5.0' + ); + } + } + } + + return $urlset->asXML(); + } + + /** + * Checks for the availability of the SimpleXML extension and errors if missing. + * + * @since 5.5.0 + */ + private function check_for_simple_xml_availability() { + if ( ! class_exists( 'SimpleXMLElement' ) ) { + add_filter( + 'wp_die_handler', + static function () { + return '_xml_wp_die_handler'; + } + ); + + wp_die( + sprintf( + /* translators: %s: SimpleXML */ + esc_xml( __( 'Could not generate XML sitemap due to missing %s extension' ) ), + 'SimpleXML' + ), + esc_xml( __( 'WordPress › Error' ) ), + array( + 'response' => 501, // "Not implemented". + ) + ); + } + } +} diff --git a/src/wp-includes/sitemaps/class-wp-sitemaps-stylesheet.php b/src/wp-includes/sitemaps/class-wp-sitemaps-stylesheet.php new file mode 100644 index 0000000000..3a80a7e2cd --- /dev/null +++ b/src/wp-includes/sitemaps/class-wp-sitemaps-stylesheet.php @@ -0,0 +1,288 @@ +get_sitemap_stylesheet(); + } + + if ( 'index' === $type ) { + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- All content escaped below. + echo $this->get_sitemap_index_stylesheet(); + } + + exit; + } + + /** + * Returns the escaped xsl for all sitemaps, except index. + * + * @since 5.5.0 + */ + public function get_sitemap_stylesheet() { + $css = $this->get_stylesheet_css(); + $title = esc_xml( __( 'XML Sitemap' ) ); + $description = esc_xml( __( 'This XML Sitemap is generated by WordPress to make your content more visible for search engines.' ) ); + $learn_more = sprintf( + '%s', + esc_url( __( 'https://www.sitemaps.org/' ) ), + esc_xml( __( 'Learn more about XML sitemaps.' ) ) + ); + + $text = sprintf( + /* translators: %s: number of URLs. */ + esc_xml( __( 'Number of URLs in this XML Sitemap: %s.' ) ), + '' + ); + + $lang = get_language_attributes( 'html' ); + $url = esc_xml( __( 'URL' ) ); + $lastmod = esc_xml( __( 'Last Modified' ) ); + $changefreq = esc_xml( __( 'Change Frequency' ) ); + $priority = esc_xml( __( 'Priority' ) ); + + $xsl_content = << + + + + + + + + + + + + + {$title} + + + +
+

{$title}

+

{$description}

+

{$learn_more}

+
+
+

{$text}

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
{$url}{$lastmod}{$changefreq}{$priority}
+
+ + +
+
+ +XSL; + + /** + * Filters the content of the sitemap stylesheet. + * + * @since 5.5.0 + * + * @param string $xsl Full content for the xml stylesheet. + */ + return apply_filters( 'wp_sitemaps_stylesheet_content', $xsl_content ); + } + + /** + * Returns the escaped xsl for the index sitemaps. + * + * @since 5.5.0 + */ + public function get_sitemap_index_stylesheet() { + $css = $this->get_stylesheet_css(); + $title = esc_xml( __( 'XML Sitemap' ) ); + $description = esc_xml( __( 'This XML Sitemap is generated by WordPress to make your content more visible for search engines.' ) ); + $learn_more = sprintf( + '%s', + esc_url( __( 'https://www.sitemaps.org/' ) ), + esc_xml( __( 'Learn more about XML sitemaps.' ) ) + ); + + $text = sprintf( + /* translators: %s: number of URLs. */ + esc_xml( __( 'Number of URLs in this XML Sitemap: %s.' ) ), + '' + ); + + $lang = get_language_attributes( 'html' ); + $url = esc_xml( __( 'URL' ) ); + $lastmod = esc_xml( __( 'Last Modified' ) ); + + $xsl_content = << + + + + + + + + + + + {$title} + + + +
+

{$title}

+

{$description}

+

{$learn_more}

+
+
+

{$text}

+ + + + + + + + + + + + + + + + + + + +
{$url}{$lastmod}
+
+ + +
+
+ +XSL; + + /** + * Filters the content of the sitemap index stylesheet. + * + * @since 5.5.0 + * + * @param string $xsl Full content for the xml stylesheet. + */ + return apply_filters( 'wp_sitemaps_stylesheet_index_content', $xsl_content ); + } + + /** + * Gets the CSS to be included in sitemap XSL stylesheets. + * + * @since 5.5.0 + * + * @return string The CSS. + */ + public function get_stylesheet_css() { + $css = ' + body { + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; + color: #444; + } + + #sitemap__table { + border: solid 1px #ccc; + border-collapse: collapse; + } + + #sitemap__table tr th { + text-align: left; + } + + #sitemap__table tr td, + #sitemap__table tr th { + padding: 10px; + } + + #sitemap__table tr:nth-child(odd) td { + background-color: #eee; + } + + a:hover { + text-decoration: none; + }'; + + /** + * Filters the css only for the sitemap stylesheet. + * + * @since 5.5.0 + * + * @param string $css CSS to be applied to default xsl file. + */ + return apply_filters( 'wp_sitemaps_stylesheet_css', $css ); + } +} diff --git a/src/wp-includes/sitemaps/class-wp-sitemaps.php b/src/wp-includes/sitemaps/class-wp-sitemaps.php new file mode 100644 index 0000000000..6c6a795af8 --- /dev/null +++ b/src/wp-includes/sitemaps/class-wp-sitemaps.php @@ -0,0 +1,235 @@ +registry = new WP_Sitemaps_Registry(); + $this->renderer = new WP_Sitemaps_Renderer(); + $this->index = new WP_Sitemaps_Index( $this->registry ); + } + + /** + * Initiates all sitemap functionality. + * + * @since 5.5.0 + */ + public function init() { + // These will all fire on the init hook. + $this->register_rewrites(); + $this->register_sitemaps(); + + // Add additional action callbacks. + add_action( 'template_redirect', array( $this, 'render_sitemaps' ) ); + add_filter( 'pre_handle_404', array( $this, 'redirect_sitemapxml' ), 10, 2 ); + add_filter( 'robots_txt', array( $this, 'add_robots' ), 0, 2 ); + } + + /** + * Registers and sets up the functionality for all supported sitemaps. + * + * @since 5.5.0 + */ + public function register_sitemaps() { + /** + * Filters the list of registered sitemap providers. + * + * @since 5.5.0 + * + * @param array $providers { + * Array of WP_Sitemaps_Provider objects keyed by their name. + * + * @type object $posts The WP_Sitemaps_Posts object. + * @type object $taxonomies The WP_Sitemaps_Taxonomies object. + * @type object $users The WP_Sitemaps_Users object. + * } + */ + $providers = apply_filters( + 'wp_sitemaps_register_providers', + array( + 'posts' => new WP_Sitemaps_Posts(), + 'taxonomies' => new WP_Sitemaps_Taxonomies(), + 'users' => new WP_Sitemaps_Users(), + ) + ); + + // Register each supported provider. + /* @var WP_Sitemaps_Provider $provider */ + foreach ( $providers as $name => $provider ) { + $this->registry->add_sitemap( $name, $provider ); + } + } + + /** + * Registers sitemap rewrite tags and routing rules. + * + * @since 5.5.0 + */ + public function register_rewrites() { + // Add rewrite tags. + add_rewrite_tag( '%sitemap%', '([^?]+)' ); + add_rewrite_tag( '%sitemap-subtype%', '([^?]+)' ); + + // Register index route. + add_rewrite_rule( '^wp-sitemap\.xml$', 'index.php?sitemap=index', 'top' ); + + // Register rewrites for the XSL stylesheet. + add_rewrite_tag( '%sitemap-stylesheet%', '([^?]+)' ); + add_rewrite_rule( '^wp-sitemap\.xsl$', 'index.php?sitemap-stylesheet=sitemap', 'top' ); + add_rewrite_rule( '^wp-sitemap-index\.xsl$', 'index.php?sitemap-stylesheet=index', 'top' ); + + // Register routes for providers. + add_rewrite_rule( + '^wp-sitemap-([a-z]+?)-([a-z\d_-]+?)-(\d+?)\.xml$', + 'index.php?sitemap=$matches[1]&sitemap-subtype=$matches[2]&paged=$matches[3]', + 'top' + ); + add_rewrite_rule( + '^wp-sitemap-([a-z]+?)-(\d+?)\.xml$', + 'index.php?sitemap=$matches[1]&paged=$matches[2]', + 'top' + ); + } + + /** + * Renders sitemap templates based on rewrite rules. + * + * @since 5.5.0 + */ + public function render_sitemaps() { + global $wp_query; + + $sitemap = sanitize_text_field( get_query_var( 'sitemap' ) ); + $object_subtype = sanitize_text_field( get_query_var( 'sitemap-subtype' ) ); + $stylesheet_type = sanitize_text_field( get_query_var( 'sitemap-stylesheet' ) ); + $paged = absint( get_query_var( 'paged' ) ); + + // Bail early if this isn't a sitemap or stylesheet route. + if ( ! ( $sitemap || $stylesheet_type ) ) { + return; + } + + // Render stylesheet if this is stylesheet route. + if ( $stylesheet_type ) { + $stylesheet = new WP_Sitemaps_Stylesheet(); + + $stylesheet->render_stylesheet( $stylesheet_type ); + exit; + } + + // Render the index. + if ( 'index' === $sitemap ) { + $sitemap_list = $this->index->get_sitemap_list(); + + $this->renderer->render_index( $sitemap_list ); + exit; + } + + $provider = $this->registry->get_sitemap( $sitemap ); + + if ( ! $provider ) { + return; + } + + if ( empty( $paged ) ) { + $paged = 1; + } + + $url_list = $provider->get_url_list( $paged, $object_subtype ); + + // Force a 404 and bail early if no URLs are present. + if ( empty( $url_list ) ) { + $wp_query->set_404(); + return; + } + + $this->renderer->render_sitemap( $url_list ); + exit; + } + + /** + * Redirects a URL to the wp-sitemap.xml + * + * @since 5.5.0 + * + * @param bool $bypass Pass-through of the pre_handle_404 filter value. + * @param WP_Query $query The WP_Query object. + * @return bool Bypass value. + */ + public function redirect_sitemapxml( $bypass, $query ) { + // If a plugin has already utilized the pre_handle_404 function, return without action to avoid conflicts. + if ( $bypass ) { + return $bypass; + } + + // 'pagename' is for most permalink types, name is for when the %postname% is used as a top-level field. + if ( 'sitemap-xml' === $query->get( 'pagename' ) || + 'sitemap-xml' === $query->get( 'name' ) ) { + wp_safe_redirect( $this->index->get_index_url() ); + exit(); + } + + return $bypass; + } + + /** + * Adds the sitemap index to robots.txt. + * + * @since 5.5.0 + * + * @param string $output robots.txt output. + * @param bool $public Whether the site is public or not. + * @return string The robots.txt output. + */ + public function add_robots( $output, $public ) { + if ( $public ) { + $output .= "\nSitemap: " . esc_url( $this->index->get_index_url() ) . "\n"; + } + + return $output; + } +} diff --git a/src/wp-includes/sitemaps/providers/class-wp-sitemaps-posts.php b/src/wp-includes/sitemaps/providers/class-wp-sitemaps-posts.php new file mode 100644 index 0000000000..4cca6ea6ec --- /dev/null +++ b/src/wp-includes/sitemaps/providers/class-wp-sitemaps-posts.php @@ -0,0 +1,221 @@ +name = 'posts'; + $this->object_type = 'post'; + } + + /** + * Returns the public post types, which excludes nav_items and similar types. + * Attachments are also excluded. This includes custom post types with public = true. + * + * @since 5.5.0 + * + * @return array Map of registered post type objects (WP_Post_Type) keyed by their name. + */ + public function get_object_subtypes() { + $post_types = get_post_types( array( 'public' => true ), 'objects' ); + unset( $post_types['attachment'] ); + + /** + * Filters the list of post object sub types available within the sitemap. + * + * @since 5.5.0 + * + * @param array $post_types Map of registered post type objects (WP_Post_Type) keyed by their name. + */ + return apply_filters( 'wp_sitemaps_post_types', $post_types ); + } + + /** + * Gets a URL list for a post type sitemap. + * + * @since 5.5.0 + * + * @param int $page_num Page of results. + * @param string $post_type Optional. Post type name. Default empty. + * @return array $url_list Array of URLs for a sitemap. + */ + public function get_url_list( $page_num, $post_type = '' ) { + // Bail early if the queried post type is not supported. + $supported_types = $this->get_object_subtypes(); + + if ( ! isset( $supported_types[ $post_type ] ) ) { + return array(); + } + + /** + * Filters the posts URL list before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param array $url_list The URL list. Default null. + * @param string $post_type Post type name. + * @param int $page_num Page of results. + */ + $url_list = apply_filters( + 'wp_sitemaps_posts_pre_url_list', + null, + $post_type, + $page_num + ); + + if ( null !== $url_list ) { + return $url_list; + } + + $args = $this->get_posts_query_args( $post_type ); + $args['paged'] = $page_num; + + $query = new WP_Query( $args ); + + /** + * Returns an array of posts. + * + * @var array $posts + */ + $posts = $query->get_posts(); + + $url_list = array(); + + /* + * Add a URL for the homepage in the pages sitemap. + * Shows only on the first page if the reading settings are set to display latest posts. + */ + if ( 'page' === $post_type && 1 === $page_num && 'posts' === get_option( 'show_on_front' ) ) { + // Extract the data needed for home URL to add to the array. + $sitemap_entry = array( + 'loc' => home_url(), + ); + + /** + * Filters the sitemap entry for the home page when the 'show_on_front' option equals 'posts'. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the home page. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_posts_show_on_front_entry', $sitemap_entry ); + $url_list[] = $sitemap_entry; + } + + foreach ( $posts as $post ) { + $sitemap_entry = array( + 'loc' => get_permalink( $post ), + ); + + /** + * Filters the sitemap entry for an individual post. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the post. + * @param WP_Post $post Post object. + * @param string $post_type Name of the post_type. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_posts_entry', $sitemap_entry, $post, $post_type ); + $url_list[] = $sitemap_entry; + } + + return $url_list; + } + + /** + * Gets the max number of pages available for the object type. + * + * @since 5.5.0 + * + * @param string $post_type Optional. Post type name. Default empty. + * @return int Total number of pages. + */ + public function get_max_num_pages( $post_type = '' ) { + if ( empty( $post_type ) ) { + return 0; + } + + /** + * Filters the max number of pages before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param int $max_num_pages The maximum number of pages. Default null. + * @param string $post_type Post type name. + */ + $max_num_pages = apply_filters( 'wp_sitemaps_posts_pre_max_num_pages', null, $post_type ); + + if ( null !== $max_num_pages ) { + return $max_num_pages; + } + + $args = $this->get_posts_query_args( $post_type ); + $args['fields'] = 'ids'; + $args['no_found_rows'] = false; + + $query = new WP_Query( $args ); + + return isset( $query->max_num_pages ) ? $query->max_num_pages : 1; + } + + /** + * Returns the query args for retrieving posts to list in the sitemap. + * + * @since 5.5.0 + * + * @param string $post_type Post type name. + * @return array $args Array of WP_Query arguments. + */ + protected function get_posts_query_args( $post_type ) { + /** + * Filters the query arguments for post type sitemap queries. + * + * @see WP_Query for a full list of arguments. + * + * @since 5.5.0 + * + * @param array $args Array of WP_Query arguments. + * @param string $post_type Post type name. + */ + $args = apply_filters( + 'wp_sitemaps_posts_query_args', + array( + 'orderby' => 'ID', + 'order' => 'ASC', + 'post_type' => $post_type, + 'posts_per_page' => wp_sitemaps_get_max_urls( $this->object_type ), + 'post_status' => array( 'publish' ), + 'no_found_rows' => true, + 'update_post_term_cache' => false, + 'update_post_meta_cache' => false, + ), + $post_type + ); + + return $args; + } +} diff --git a/src/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php b/src/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php new file mode 100644 index 0000000000..45c65145b2 --- /dev/null +++ b/src/wp-includes/sitemaps/providers/class-wp-sitemaps-taxonomies.php @@ -0,0 +1,193 @@ +name = 'taxonomies'; + $this->object_type = 'term'; + } + + /** + * Returns all public, registered taxonomies. + * + * @since 5.5.0 + * + * @return array Map of registered taxonomy objects keyed by their name. + */ + public function get_object_subtypes() { + $taxonomies = get_taxonomies( array( 'public' => true ), 'objects' ); + + /** + * Filter the list of taxonomy object subtypes available within the sitemap. + * + * @since 5.5.0 + * + * @param array $taxonomies Map of registered taxonomy objects keyed by their name. + */ + return apply_filters( 'wp_sitemaps_taxonomies', $taxonomies ); + } + + /** + * Gets a URL list for a taxonomy sitemap. + * + * @since 5.5.0 + * + * @param int $page_num Page of results. + * @param string $taxonomy Optional. Taxonomy name. Default empty. + * @return array $url_list Array of URLs for a sitemap. + */ + public function get_url_list( $page_num, $taxonomy = '' ) { + $supported_types = $this->get_object_subtypes(); + + // Bail early if the queried taxonomy is not supported. + if ( ! isset( $supported_types[ $taxonomy ] ) ) { + return array(); + } + + /** + * Filters the taxonomies URL list before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param array $url_list The URL list. Default null. + * @param string $taxonomy Taxonomy name. + * @param int $page_num Page of results. + */ + $url_list = apply_filters( + 'wp_sitemaps_taxonomies_pre_url_list', + null, + $taxonomy, + $page_num + ); + + if ( null !== $url_list ) { + return $url_list; + } + + $url_list = array(); + + // Offset by how many terms should be included in previous pages. + $offset = ( $page_num - 1 ) * wp_sitemaps_get_max_urls( $this->object_type ); + + $args = $this->get_taxonomies_query_args( $taxonomy ); + $args['offset'] = $offset; + + $taxonomy_terms = new WP_Term_Query( $args ); + + if ( ! empty( $taxonomy_terms->terms ) ) { + foreach ( $taxonomy_terms->terms as $term ) { + $sitemap_entry = array( + 'loc' => get_term_link( $term ), + ); + + /** + * Filters the sitemap entry for an individual term. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the term. + * @param WP_Term $term Term object. + * @param string $taxonomy Taxonomy name. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_taxonomies_entry', $sitemap_entry, $term, $taxonomy ); + $url_list[] = $sitemap_entry; + } + } + + return $url_list; + } + + /** + * Gets the max number of pages available for the object type. + * + * @since 5.5.0 + * + * @param string $taxonomy Taxonomy name. + * @return int Total number of pages. + */ + public function get_max_num_pages( $taxonomy = '' ) { + if ( empty( $taxonomy ) ) { + return 0; + } + + /** + * Filters the max number of pages before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param int $max_num_pages The maximum number of pages. Default null. + * @param string $taxonomy Taxonomy name. + */ + $max_num_pages = apply_filters( 'wp_sitemaps_taxonomies_pre_max_num_pages', null, $taxonomy ); + + if ( null !== $max_num_pages ) { + return $max_num_pages; + } + + $term_count = wp_count_terms( $taxonomy, $this->get_taxonomies_query_args( $taxonomy ) ); + + return (int) ceil( $term_count / wp_sitemaps_get_max_urls( $this->object_type ) ); + } + + /** + * Returns the query args for retrieving taxonomy terms to list in the sitemap. + * + * @since 5.5.0 + * + * @param string $taxonomy Taxonomy name. + * @return array $args Array of WP_Term_Query arguments. + */ + protected function get_taxonomies_query_args( $taxonomy ) { + /** + * Filters the taxonomy terms query arguments. + * + * Allows modification of the taxonomy query arguments before querying. + * + * @see WP_Term_Query for a full list of arguments + * + * @since 5.5.0 + * + * @param array $args Array of WP_Term_Query arguments. + * @param string $taxonomy Taxonomy name. + */ + $args = apply_filters( + 'wp_sitemaps_taxonomies_query_args', + array( + 'fields' => 'ids', + 'taxonomy' => $taxonomy, + 'orderby' => 'term_order', + 'number' => wp_sitemaps_get_max_urls( $this->object_type ), + 'hide_empty' => true, + 'hierarchical' => false, + 'update_term_meta_cache' => false, + ), + $taxonomy + ); + + return $args; + } +} diff --git a/src/wp-includes/sitemaps/providers/class-wp-sitemaps-users.php b/src/wp-includes/sitemaps/providers/class-wp-sitemaps-users.php new file mode 100644 index 0000000000..1be9a81db1 --- /dev/null +++ b/src/wp-includes/sitemaps/providers/class-wp-sitemaps-users.php @@ -0,0 +1,163 @@ +name = 'users'; + $this->object_type = 'user'; + } + + /** + * Gets a URL list for a user sitemap. + * + * @since 5.5.0 + * + * @param int $page_num Page of results. + * @param string $object_subtype Optional. Not applicable for Users but + * required for compatibility with the parent + * provider class. Default empty. + * @return array $url_list Array of URLs for a sitemap. + */ + public function get_url_list( $page_num, $object_subtype = '' ) { + /** + * Filters the users URL list before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param array $url_list The URL list. Default null. + * @param int $page_num Page of results. + */ + $url_list = apply_filters( + 'wp_sitemaps_users_pre_url_list', + null, + $page_num + ); + + if ( null !== $url_list ) { + return $url_list; + } + + $args = $this->get_users_query_args(); + $args['paged'] = $page_num; + + $query = new WP_User_Query( $args ); + $users = $query->get_results(); + $url_list = array(); + + foreach ( $users as $user ) { + $sitemap_entry = array( + 'loc' => get_author_posts_url( $user->ID ), + ); + + /** + * Filters the sitemap entry for an individual user. + * + * @since 5.5.0 + * + * @param array $sitemap_entry Sitemap entry for the user. + * @param WP_User $user User object. + */ + $sitemap_entry = apply_filters( 'wp_sitemaps_users_entry', $sitemap_entry, $user ); + $url_list[] = $sitemap_entry; + } + + return $url_list; + } + + /** + * Gets the max number of pages available for the object type. + * + * @since 5.5.0 + * + * @see WP_Sitemaps_Provider::max_num_pages + * + * @param string $object_subtype Optional. Not applicable for Users but + * required for compatibility with the parent + * provider class. Default empty. + * @return int Total page count. + */ + public function get_max_num_pages( $object_subtype = '' ) { + /** + * Filters the max number of pages before it is generated. + * + * Passing a non-null value will effectively short-circuit the generation, + * returning that value instead. + * + * @since 5.5.0 + * + * @param int $max_num_pages The maximum number of pages. Default null. + */ + $max_num_pages = apply_filters( 'wp_sitemaps_users_pre_max_num_pages', null ); + + if ( null !== $max_num_pages ) { + return $max_num_pages; + } + + $args = $this->get_users_query_args(); + $query = new WP_User_Query( $args ); + + $total_users = $query->get_total(); + + return (int) ceil( $total_users / wp_sitemaps_get_max_urls( $this->object_type ) ); + } + + /** + * Returns the query args for retrieving users to list in the sitemap. + * + * @since 5.5.0 + * + * @return array $args Array of WP_User_Query arguments. + */ + protected function get_users_query_args() { + $public_post_types = get_post_types( + array( + 'public' => true, + ) + ); + + // We're not supporting sitemaps for author pages for attachments. + unset( $public_post_types['attachment'] ); + + /** + * Filters the query arguments for authors with public posts. + * + * Allows modification of the authors query arguments before querying. + * + * @see WP_User_Query for a full list of arguments + * + * @since 5.5.0 + * + * @param array $args Array of WP_User_Query arguments. + */ + $args = apply_filters( + 'wp_sitemaps_users_query_args', + array( + 'has_published_posts' => array_keys( $public_post_types ), + 'number' => wp_sitemaps_get_max_urls( $this->object_type ), + ) + ); + + return $args; + } +} diff --git a/src/wp-settings.php b/src/wp-settings.php index 1a5f296733..a78be1c091 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -263,6 +263,16 @@ require ABSPATH . WPINC . '/rest-api/fields/class-wp-rest-term-meta-fields.php'; require ABSPATH . WPINC . '/rest-api/fields/class-wp-rest-user-meta-fields.php'; require ABSPATH . WPINC . '/rest-api/search/class-wp-rest-search-handler.php'; require ABSPATH . WPINC . '/rest-api/search/class-wp-rest-post-search-handler.php'; +require ABSPATH . WPINC . '/sitemaps.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-index.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-provider.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-registry.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-renderer.php'; +require ABSPATH . WPINC . '/sitemaps/class-wp-sitemaps-stylesheet.php'; +require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-posts.php'; +require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-taxonomies.php'; +require ABSPATH . WPINC . '/sitemaps/providers/class-wp-sitemaps-users.php'; require ABSPATH . WPINC . '/class-wp-block-type.php'; require ABSPATH . WPINC . '/class-wp-block-styles-registry.php'; require ABSPATH . WPINC . '/class-wp-block-type-registry.php'; diff --git a/tests/phpunit/includes/bootstrap.php b/tests/phpunit/includes/bootstrap.php index 460454b1b7..93a33bf96c 100644 --- a/tests/phpunit/includes/bootstrap.php +++ b/tests/phpunit/includes/bootstrap.php @@ -153,12 +153,15 @@ require __DIR__ . '/testcase-rest-post-type-controller.php'; require __DIR__ . '/testcase-xmlrpc.php'; require __DIR__ . '/testcase-ajax.php'; require __DIR__ . '/testcase-canonical.php'; +require __DIR__ . '/testcase-xml.php'; require __DIR__ . '/exceptions.php'; require __DIR__ . '/utils.php'; require __DIR__ . '/spy-rest-server.php'; require __DIR__ . '/class-wp-rest-test-search-handler.php'; require __DIR__ . '/class-wp-rest-test-configurable-controller.php'; require __DIR__ . '/class-wp-fake-block-type.php'; +require __DIR__ . '/class-wp-sitemaps-test-provider.php'; +require __DIR__ . '/class-wp-sitemaps-empty-test-provider.php'; /** * A class to handle additional command line arguments passed to the script. diff --git a/tests/phpunit/includes/class-wp-sitemaps-empty-test-provider.php b/tests/phpunit/includes/class-wp-sitemaps-empty-test-provider.php new file mode 100644 index 0000000000..c772c3941c --- /dev/null +++ b/tests/phpunit/includes/class-wp-sitemaps-empty-test-provider.php @@ -0,0 +1,38 @@ +object_type = $object_type; + } + + /** + * Gets a URL list for a sitemap. + * + * @param int $page_num Page of results. + * @param string $object_subtype Optional. Object subtype name. Default empty. + * @return array List of URLs for a sitemap. + */ + public function get_url_list( $page_num, $object_subtype = '' ) { + return array(); + } + + /** + * Query for determining the number of pages. + * + * @param string $object_subtype Optional. Object subtype. Default empty. + * @return int Total number of pages. + */ + public function get_max_num_pages( $object_subtype = '' ) { + return 0; + } +} diff --git a/tests/phpunit/includes/class-wp-sitemaps-test-provider.php b/tests/phpunit/includes/class-wp-sitemaps-test-provider.php new file mode 100644 index 0000000000..389bb6c7bf --- /dev/null +++ b/tests/phpunit/includes/class-wp-sitemaps-test-provider.php @@ -0,0 +1,52 @@ +object_type = $object_type; + } + + /** + * Return the public post types, which excludes nav_items and similar types. + * Attachments are also excluded. This includes custom post types with public = true + * + * @return array Map of object subtype objects (WP_Post_Type) keyed by their name. + */ + public function get_object_subtypes() { + return array( + 'type-1' => (object) array( 'name' => 'type-1' ), + 'type-2' => (object) array( 'name' => 'type-2' ), + 'type-3' => (object) array( 'name' => 'type-3' ), + ); + } + + /** + * Gets a URL list for a sitemap. + * + * @param int $page_num Page of results. + * @param string $object_subtype Optional. Object subtype name. Default empty. + * @return array List of URLs for a sitemap. + */ + public function get_url_list( $page_num, $object_subtype = '' ) { + return array(); + } + + /** + * Query for determining the number of pages. + * + * @param string $object_subtype Optional. Object subtype. Default empty. + * @return int Total number of pages. + */ + public function get_max_num_pages( $object_subtype = '' ) { + return 4; + } +} diff --git a/tests/phpunit/includes/normalize-xml.xsl b/tests/phpunit/includes/normalize-xml.xsl new file mode 100644 index 0000000000..135556c61b --- /dev/null +++ b/tests/phpunit/includes/normalize-xml.xsl @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/phpunit/includes/testcase-xml.php b/tests/phpunit/includes/testcase-xml.php new file mode 100644 index 0000000000..c7b78b2e06 --- /dev/null +++ b/tests/phpunit/includes/testcase-xml.php @@ -0,0 +1,92 @@ +loadXML( $xml, $options ); + $libxml_last_error = libxml_get_last_error(); + + $this->assertFalse( + isset( $libxml_last_error->message ), + isset( $libxml_last_error->message ) ? sprintf( 'Non-well-formed XML: %s.', $libxml_last_error->message ) : '' + ); + + // Restore default error handler. + libxml_use_internal_errors( $internal ); + libxml_clear_errors(); + + return $xml_dom; + } + + /** + * Normalize an XML document to make comparing two documents easier. + * + * @param string $xml + * @param int $options Bitwise OR of the {@link https://www.php.net/manual/en/libxml.constants.php libxml option constants}. + * Default is 0. + * @return string The normalized form of `$xml`. + */ + public function normalizeXML( $xml, $options = 0 ) { + if ( ! class_exists( 'XSLTProcessor' ) ) { + $this->markTestSkipped( 'This test requires the XSL extension.' ); + } + + static $xslt_proc; + + if ( ! $xslt_proc ) { + $xslt_proc = new XSLTProcessor(); + $xslt_proc->importStyleSheet( simplexml_load_file( __DIR__ . '/normalize-xml.xsl' ) ); + } + + return $xslt_proc->transformToXML( $this->loadXML( $xml, $options ) ); + } + + /** + * Reports an error identified by `$message` if the namespace normalized form of the XML document in `$actualXml` + * is equal to the namespace normalized form of the XML document in `$expectedXml`. + * + * This is similar to {@link https://phpunit.de/manual/6.5/en/appendixes.assertions.html#appendixes.assertions.assertXmlStringEqualsXmlString assertXmlStringEqualsXmlString()} + * except that differences in namespace prefixes are normalized away, such that given + * `$actualXml = "";` and + * `$expectedXml = "";` + * then `$this->assertXMLEquals( $expectedXml, $actualXml )` will succeed. + * + * @param string $expectedXml + * @param string $actualXml + * @param string $message Optional. Message to display when the assertion fails. + */ + public function assertXMLEquals( $expectedXml, $actualXml, $message = '' ) { // phpcs:ignore WordPress.NamingConventions.ValidVariableName.VariableNotSnakeCase + $this->assertEquals( $this->normalizeXML( $expectedXml ), $this->normalizeXML( $actualXml ), $message ); //phpcs:ignore WordPress.NamingConventions.ValidVariableName.VariableNotSnakeCase + } + + /** + * Reports an error identified by `$message` if the namespace normalized form of the XML document in `$actualXml` + * is not equal to the namespace normalized form of the XML document in `$expectedXml`. + * + * This is similar to {@link https://phpunit.de/manual/6.5/en/appendixes.assertions.html#appendixes.assertions.assertXmlStringEqualsXmlString assertXmlStringNotEqualsXmlString()} + * except that differences in namespace prefixes are normalized away, such that given + * `$actualXml = "";` and + * `$expectedXml = "";` + * then `$this->assertXMLNotEquals( $expectedXml, $actualXml )` will fail. + * + * @param string $expectedXml + * @param string $actualXml + * @param string $message Optional. Message to display when the assertion fails. + */ + public function assertXMLNotEquals( $expectedXml, $actualXml, $message = '' ) { //phpcs:ignore WordPress.NamingConventions.ValidVariableName.VariableNotSnakeCase + $this->assertNotEquals( $this->normalizeXML( $expectedXml ), $this->normalizeXML( $actualXml ), $message ); //phpcs:ignore WordPress.NamingConventions.ValidVariableName.VariableNotSnakeCase + } +} diff --git a/tests/phpunit/tests/canonical/sitemaps.php b/tests/phpunit/tests/canonical/sitemaps.php new file mode 100644 index 0000000000..579247a33c --- /dev/null +++ b/tests/phpunit/tests/canonical/sitemaps.php @@ -0,0 +1,41 @@ +init(); + } + + public function test_remove_trailing_slashes_for_sitemap_index_requests() { + $this->set_permalink_structure( '/%postname%/' ); + $this->assertCanonical( '/wp-sitemap.xml', '/wp-sitemap.xml' ); + $this->assertCanonical( '/wp-sitemap.xml/', '/wp-sitemap.xml' ); + } + + public function test_remove_trailing_slashes_for_sitemap_index_stylesheet_requests() { + $this->set_permalink_structure( '/%postname%/' ); + $this->assertCanonical( '/wp-sitemap-index.xsl', '/wp-sitemap-index.xsl' ); + $this->assertCanonical( '/wp-sitemap-index.xsl/', '/wp-sitemap-index.xsl' ); + } + + public function test_remove_trailing_slashes_for_sitemap_requests() { + $this->set_permalink_structure( '/%postname%/' ); + $this->assertCanonical( '/wp-sitemap-posts-post-1.xml', '/wp-sitemap-posts-post-1.xml' ); + $this->assertCanonical( '/wp-sitemap-posts-post-1.xml/', '/wp-sitemap-posts-post-1.xml' ); + $this->assertCanonical( '/wp-sitemap-users-1.xml', '/wp-sitemap-users-1.xml' ); + $this->assertCanonical( '/wp-sitemap-users-1.xml/', '/wp-sitemap-users-1.xml' ); + } + + public function test_remove_trailing_slashes_for_sitemap_stylesheet_requests() { + $this->set_permalink_structure( '/%postname%/' ); + $this->assertCanonical( '/wp-sitemap.xsl', '/wp-sitemap.xsl' ); + $this->assertCanonical( '/wp-sitemap.xsl/', '/wp-sitemap.xsl' ); + } +} diff --git a/tests/phpunit/tests/formatting/EscXml.php b/tests/phpunit/tests/formatting/EscXml.php new file mode 100644 index 0000000000..9e188e3fcb --- /dev/null +++ b/tests/phpunit/tests/formatting/EscXml.php @@ -0,0 +1,135 @@ +assertEquals( $expected, $actual ); + } + + /** + * Data provider for `test_esc_xml_basics()`. + * + * @return array { + * @type string $source The source string to be escaped. + * @type string $expected The expected escaped value of `$source`. + * } + */ + public function _test_esc_xml_basics_dataprovider() { + return array( + // Simple string. + array( + 'The quick brown fox.', + 'The quick brown fox.', + ), + // URL with &. + array( + 'http://localhost/trunk/wp-login.php?action=logout&_wpnonce=cd57d75985', + 'http://localhost/trunk/wp-login.php?action=logout&_wpnonce=cd57d75985', + ), + // SQL query w/ single quotes. + array( + "SELECT meta_key, meta_value FROM wp_trunk_sitemeta WHERE meta_key IN ('site_name', 'siteurl', 'active_sitewide_plugins', '_site_transient_timeout_theme_roots', '_site_transient_theme_roots', 'site_admins', 'can_compress_scripts', 'global_terms_enabled') AND site_id = 1", + 'SELECT meta_key, meta_value FROM wp_trunk_sitemeta WHERE meta_key IN ('site_name', 'siteurl', 'active_sitewide_plugins', '_site_transient_timeout_theme_roots', '_site_transient_theme_roots', 'site_admins', 'can_compress_scripts', 'global_terms_enabled') AND site_id = 1', + ), + ); + } + + public function test_escapes_ampersands() { + $source = 'penn & teller & at&t'; + $expected = 'penn & teller & at&t'; + $actual = esc_xml( $source ); + $this->assertEquals( $expected, $actual ); + } + + public function test_escapes_greater_and_less_than() { + $source = 'this > that < that '; + $expected = 'this > that < that <randomhtml />'; + $actual = esc_xml( $source ); + $this->assertEquals( $expected, $actual ); + } + + public function test_escapes_html_named_entities() { + $source = 'this & is a … followed by › and more and a &nonexistent; entity'; + $expected = 'this & is a … followed by › and more and a &nonexistent; entity'; + $actual = esc_xml( $source ); + $this->assertEquals( $expected, $actual ); + } + + public function test_ignores_existing_entities() { + $source = '& £ " &'; + // note that _wp_specialchars() strips leading 0's from numeric character references. + $expected = '& £ " &'; + $actual = esc_xml( $source ); + $this->assertEquals( $expected, $actual ); + } + + /** + * Test that CDATA Sections are not escaped. + * + * @dataProvider _test_ignores_cdata_sections_dataprovider + * + * @param string $source The source string to be escaped. + * @param string $expected The expected escaped value of `$source`. + */ + public function test_ignores_cdata_sections( $source, $expected ) { + $actual = esc_xml( $source ); + $this->assertEquals( $expected, $actual ); + } + + /** + * Data provider for `test_ignores_cdata_sections()`. + * + * @return array { + * @type string $source The source string to be escaped. + * @type string $expected The expected escaped value of `$source`. + * } + */ + public function _test_ignores_cdata_sections_dataprovider() { + return array( + // basic CDATA Section containing chars that would otherwise be escaped if not in a CDATA Section + // not to mention the CDATA Section markup itself :-) + // $source contains embedded newlines to test that the regex that ignores CDATA Sections + // correctly handles that case. + array( + "This is\na]]>\nbroadcast system", + "This is\na]]>\nbroadcast system", + ), + // string with chars that should be escaped as well as a CDATA Section that should be not be. + array( + 'This is … a ]]> broadcast ', + 'This is … a ]]> broadcast <system />', + ), + // Same as above, but with the CDATA Section at the start of the string. + array( + ']]> This is … a broadcast ', + ']]> This is … a broadcast <system />', + ), + // Same as above, but with the CDATA Section at the end of the string. + array( + 'This is … a broadcast ]]>', + 'This is … a broadcast <system />]]>', + ), + // Multiple CDATA Sections. + array( + 'This is … a ]]> &broadcast; ]]>', + 'This is … a ]]> &broadcast; ]]>', + ), + // Ensure that ']]>' that does not mark the end of a CDATA Section is escaped. + array( + ']]>', + ']]>', + ), + ); + } +} diff --git a/tests/phpunit/tests/sitemaps/functions.php b/tests/phpunit/tests/sitemaps/functions.php new file mode 100644 index 0000000000..dd4796b438 --- /dev/null +++ b/tests/phpunit/tests/sitemaps/functions.php @@ -0,0 +1,61 @@ +assertEquals( $expected_posts, 300, 'Can not confirm max URL number for posts.' ); + $this->assertEquals( $expected_taxonomies, 50, 'Can not confirm max URL number for taxonomies.' ); + $this->assertEquals( $expected_users, 1, 'Can not confirm max URL number for users.' ); + } + + /** + * Callback function for testing the `sitemaps_max_urls` filter. + * + * @param int $max_urls The maximum number of URLs included in a sitemap. Default 2000. + * @param string $type Optional. The type of sitemap to be filtered. Default empty. + * @return int The maximum number of URLs. + */ + public function _filter_max_url_value( $max_urls, $type ) { + switch ( $type ) { + case 'post': + return 300; + case 'term': + return 50; + case 'user': + return 1; + default: + return $max_urls; + } + } + + /** + * Test wp_get_sitemaps default functionality + */ + public function test_wp_get_sitemaps() { + $sitemaps = wp_get_sitemaps(); + + $expected = array( + 'posts' => 'WP_Sitemaps_Posts', + 'taxonomies' => 'WP_Sitemaps_Taxonomies', + 'users' => 'WP_Sitemaps_Users', + ); + + $this->assertEquals( array_keys( $expected ), array_keys( $sitemaps ), 'Unable to confirm default sitemap types are registered.' ); + + foreach ( $expected as $name => $provider ) { + $this->assertTrue( is_a( $sitemaps[ $name ], $provider ), "Default $name sitemap is not a $provider object." ); + } + } +} diff --git a/tests/phpunit/tests/sitemaps/sitemaps-index.php b/tests/phpunit/tests/sitemaps/sitemaps-index.php new file mode 100644 index 0000000000..5df604af8a --- /dev/null +++ b/tests/phpunit/tests/sitemaps/sitemaps-index.php @@ -0,0 +1,51 @@ +add_sitemap( 'foo', new WP_Sitemaps_Test_Provider( 'foo' ) ); + $registry->add_sitemap( 'bar', new WP_Sitemaps_Test_Provider( 'bar' ) ); + + $sitemap_index = new WP_Sitemaps_Index( $registry ); + $this->assertCount( 24, $sitemap_index->get_sitemap_list() ); + } + + public function test_get_sitemap_list_no_entries() { + $registry = new WP_Sitemaps_Registry(); + + $registry->add_sitemap( 'foo', new WP_Sitemaps_Empty_Test_Provider( 'foo' ) ); + + $sitemap_index = new WP_Sitemaps_Index( $registry ); + $this->assertCount( 0, $sitemap_index->get_sitemap_list() ); + } + + public function test_get_index_url() { + $sitemap_index = new WP_Sitemaps_Index( new WP_Sitemaps_Registry() ); + $index_url = $sitemap_index->get_index_url(); + + $this->assertStringEndsWith( '/?sitemap=index', $index_url ); + } + + public function test_get_index_url_pretty_permalinks() { + // Set permalinks for testing. + $this->set_permalink_structure( '/%year%/%postname%/' ); + + $sitemap_index = new WP_Sitemaps_Index( new WP_Sitemaps_Registry() ); + $index_url = $sitemap_index->get_index_url(); + + // Clean up permalinks. + $this->set_permalink_structure(); + + $this->assertStringEndsWith( '/wp-sitemap.xml', $index_url ); + } +} diff --git a/tests/phpunit/tests/sitemaps/sitemaps-posts.php b/tests/phpunit/tests/sitemaps/sitemaps-posts.php new file mode 100644 index 0000000000..8d173942f4 --- /dev/null +++ b/tests/phpunit/tests/sitemaps/sitemaps-posts.php @@ -0,0 +1,49 @@ +get_object_subtypes(); + + $this->assertEquals( array(), $subtypes, 'Could not filter posts subtypes.' ); + } + + /** + * Test `wp_sitemaps_posts_show_on_front_entry` filter. + */ + public function test_posts_show_on_front_entry() { + $posts_provider = new WP_Sitemaps_Posts(); + update_option( 'show_on_front', 'page' ); + + add_filter( 'wp_sitemaps_posts_show_on_front_entry', array( $this, '_show_on_front_entry' ) ); + + $url_list = $posts_provider->get_url_list( 1, 'page' ); + + $this->assertEquals( array(), $url_list ); + + update_option( 'show_on_front', 'posts' ); + + $url_list = $posts_provider->get_url_list( 1, 'page' ); + $sitemap_entry = array_shift( $url_list ); + + $this->assertTrue( isset( $sitemap_entry['lastmod'] ) ); + } + + /** + * Callback for 'wp_sitemaps_posts_show_on_front_entry' filter. + */ + public function _show_on_front_entry( $sitemap_entry ) { + $sitemap_entry['lastmod'] = wp_date( DATE_W3C, time() ); + + return $sitemap_entry; + } +} diff --git a/tests/phpunit/tests/sitemaps/sitemaps-registry.php b/tests/phpunit/tests/sitemaps/sitemaps-registry.php new file mode 100644 index 0000000000..c6c664a65b --- /dev/null +++ b/tests/phpunit/tests/sitemaps/sitemaps-registry.php @@ -0,0 +1,33 @@ +add_sitemap( 'foo', $provider ); + $sitemaps = $registry->get_sitemaps(); + + $this->assertTrue( $actual ); + $this->assertCount( 1, $sitemaps ); + $this->assertSame( $sitemaps['foo'], $provider, 'Can not confirm sitemap registration is working.' ); + } + + public function test_add_sitemap_prevent_duplicates() { + $provider1 = new WP_Sitemaps_Test_Provider(); + $provider2 = new WP_Sitemaps_Test_Provider(); + $registry = new WP_Sitemaps_Registry(); + + $actual1 = $registry->add_sitemap( 'foo', $provider1 ); + $actual2 = $registry->add_sitemap( 'foo', $provider2 ); + $sitemaps = $registry->get_sitemaps(); + + $this->assertTrue( $actual1 ); + $this->assertFalse( $actual2 ); + $this->assertCount( 1, $sitemaps ); + $this->assertSame( $sitemaps['foo'], $provider1, 'Can not confirm sitemap registration is working.' ); + } +} diff --git a/tests/phpunit/tests/sitemaps/sitemaps-renderer.php b/tests/phpunit/tests/sitemaps/sitemaps-renderer.php new file mode 100644 index 0000000000..f2ee1de1c7 --- /dev/null +++ b/tests/phpunit/tests/sitemaps/sitemaps-renderer.php @@ -0,0 +1,283 @@ +get_sitemap_stylesheet_url(); + + $this->assertStringEndsWith( '/?sitemap-stylesheet=sitemap', $stylesheet_url ); + } + + public function test_get_sitemap_stylesheet_url_pretty_permalinks() { + // Set permalinks for testing. + $this->set_permalink_structure( '/%year%/%postname%/' ); + + $sitemap_renderer = new WP_Sitemaps_Renderer(); + $stylesheet_url = $sitemap_renderer->get_sitemap_stylesheet_url(); + + // Clean up permalinks. + $this->set_permalink_structure(); + + $this->assertStringEndsWith( '/wp-sitemap.xsl', $stylesheet_url ); + } + + public function test_get_sitemap_index_stylesheet_url() { + $sitemap_renderer = new WP_Sitemaps_Renderer(); + $stylesheet_url = $sitemap_renderer->get_sitemap_index_stylesheet_url(); + + $this->assertStringEndsWith( '/?sitemap-stylesheet=index', $stylesheet_url ); + } + + public function test_get_sitemap_index_stylesheet_url_pretty_permalinks() { + // Set permalinks for testing. + $this->set_permalink_structure( '/%year%/%postname%/' ); + + $sitemap_renderer = new WP_Sitemaps_Renderer(); + $stylesheet_url = $sitemap_renderer->get_sitemap_index_stylesheet_url(); + + // Clean up permalinks. + $this->set_permalink_structure(); + + $this->assertStringEndsWith( '/wp-sitemap-index.xsl', $stylesheet_url ); + } + + /** + * Test XML output for the sitemap index renderer. + */ + public function test_get_sitemap_index_xml() { + $entries = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-post-1.xml', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-page-1.xml', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-category-1.xml', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-post_tag-1.xml', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-users-1.xml', + ), + ); + + $renderer = new WP_Sitemaps_Renderer(); + + $actual = $renderer->get_sitemap_index_xml( $entries ); + $expected = '' . + '' . + '' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-post-1.xml' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-page-1.xml' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-category-1.xml' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-post_tag-1.xml' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-users-1.xml' . + ''; + + $this->assertXMLEquals( $expected, $actual, 'Sitemap index markup incorrect.' ); + } + + /** + * Test XML output for the sitemap index renderer with lastmod attributes. + */ + public function test_get_sitemap_index_xml_with_lastmod() { + $entries = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-post-1.xml', + 'lastmod' => '2005-01-01', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-page-1.xml', + 'lastmod' => '2005-01-01', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-category-1.xml', + 'lastmod' => '2005-01-01', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-post_tag-1.xml', + 'lastmod' => '2005-01-01', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-users-1.xml', + 'lastmod' => '2005-01-01', + ), + ); + + $renderer = new WP_Sitemaps_Renderer(); + + $actual = $renderer->get_sitemap_index_xml( $entries ); + $expected = '' . + '' . + '' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-post-1.xml2005-01-01' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-page-1.xml2005-01-01' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-category-1.xml2005-01-01' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-post_tag-1.xml2005-01-01' . + 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-users-1.xml2005-01-01' . + ''; + + $this->assertXMLEquals( $expected, $actual, 'Sitemap index markup incorrect.' ); + } + + /** + * Test that all children of Q{http://www.sitemaps.org/schemas/sitemap/0.9}sitemap in the + * rendered index XML are defined in the Sitemaps spec (i.e., loc, lastmod). + * + * Note that when a means of adding elements in extension namespaces is settled on, + * this test will need to be updated accordingly. + * + * @expectedIncorrectUsage WP_Sitemaps_Renderer::get_sitemap_index_xml + */ + public function test_get_sitemap_index_xml_extra_elements() { + $url_list = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-post-1.xml', + 'unknown' => 'this is a test', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-page-1.xml', + 'unknown' => 'that was a test', + ), + ); + + $renderer = new WP_Sitemaps_Renderer(); + + $xml_dom = $this->loadXML( $renderer->get_sitemap_index_xml( $url_list ) ); + $xpath = new DOMXPath( $xml_dom ); + $xpath->registerNamespace( 'sitemap', 'http://www.sitemaps.org/schemas/sitemap/0.9' ); + + $this->assertEquals( + 0, + $xpath->evaluate( "count( /sitemap:sitemapindex/sitemap:sitemap/*[ namespace-uri() != 'http://www.sitemaps.org/schemas/sitemap/0.9' or not( local-name() = 'loc' or local-name() = 'lastmod' ) ] )" ), + 'Invalid child of "sitemap:sitemap" in rendered index XML.' + ); + } + + /** + * Test XML output for the sitemap index renderer when stylesheet is disabled. + */ + public function test_get_sitemap_index_xml_without_stylesheet() { + $entries = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-post-1.xml', + ), + ); + + add_filter( 'wp_sitemaps_stylesheet_index_url', '__return_false' ); + + $renderer = new WP_Sitemaps_Renderer(); + + $xml_dom = $this->loadXML( $renderer->get_sitemap_index_xml( $entries ) ); + $xpath = new DOMXPath( $xml_dom ); + + $this->assertSame( + 0, + $xpath->query( '//processing-instruction( "xml-stylesheet" )' )->length, + 'Sitemap index incorrectly contains the xml-stylesheet processing instruction.' + ); + } + + /** + * Test XML output for the sitemap page renderer. + */ + public function test_get_sitemap_xml() { + $url_list = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-1', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-2', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-3', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-4', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-5', + ), + ); + + $renderer = new WP_Sitemaps_Renderer(); + + $actual = $renderer->get_sitemap_xml( $url_list ); + $expected = '' . + '' . + '' . + 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-1' . + 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-2' . + 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-3' . + 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-4' . + 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-5' . + ''; + + $this->assertXMLEquals( $expected, $actual, 'Sitemap page markup incorrect.' ); + } + + /** + * Test XML output for the sitemap page renderer when stylesheet is disabled. + */ + public function test_get_sitemap_xml_without_stylesheet() { + $url_list = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-1', + ), + ); + + add_filter( 'wp_sitemaps_stylesheet_url', '__return_false' ); + + $renderer = new WP_Sitemaps_Renderer(); + + $xml_dom = $this->loadXML( $renderer->get_sitemap_xml( $url_list ) ); + $xpath = new DOMXPath( $xml_dom ); + + $this->assertSame( + 0, + $xpath->query( '//processing-instruction( "xml-stylesheet" )' )->length, + 'Sitemap incorrectly contains the xml-stylesheet processing instruction.' + ); + } + + /** + * Test that all children of Q{http://www.sitemaps.org/schemas/sitemap/0.9}url in the + * rendered sitemap XML are defined in the Sitemaps spec (i.e., loc, lastmod, changefreq, priority). + * + * Note that when a means of adding elements in extension namespaces is settled on, + * this test will need to be updated accordingly. + * + * @expectedIncorrectUsage WP_Sitemaps_Renderer::get_sitemap_xml + */ + public function test_get_sitemap_xml_extra_elements() { + $url_list = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-1', + 'string' => 'value', + 'number' => 200, + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/2019/10/post-2', + 'string' => 'another value', + 'number' => 300, + ), + ); + + $renderer = new WP_Sitemaps_Renderer(); + + $xml_dom = $this->loadXML( $renderer->get_sitemap_xml( $url_list ) ); + $xpath = new DOMXPath( $xml_dom ); + $xpath->registerNamespace( 'sitemap', 'http://www.sitemaps.org/schemas/sitemap/0.9' ); + + $this->assertEquals( + 0, + $xpath->evaluate( "count( /sitemap:urlset/sitemap:url/*[ namespace-uri() != 'http://www.sitemaps.org/schemas/sitemap/0.9' or not( local-name() = 'loc' or local-name() = 'lastmod' or local-name() = 'changefreq' or local-name() = 'priority' ) ] )" ), + 'Invalid child of "sitemap:url" in rendered XML.' + ); + } +} diff --git a/tests/phpunit/tests/sitemaps/sitemaps-stylesheet.php b/tests/phpunit/tests/sitemaps/sitemaps-stylesheet.php new file mode 100644 index 0000000000..09aacf0440 --- /dev/null +++ b/tests/phpunit/tests/sitemaps/sitemaps-stylesheet.php @@ -0,0 +1,42 @@ +get_sitemap_stylesheet(); + + $this->assertSame( '', $content, 'Could not filter stylesheet content' ); + } + + /** + * Test that sitemap index stylesheet content can be filtered. + */ + public function test_filter_sitemaps_stylesheet_index_content() { + $stylesheet = new WP_Sitemaps_Stylesheet(); + + add_filter( 'wp_sitemaps_stylesheet_index_content', '__return_empty_string' ); + $content = $stylesheet->get_sitemap_index_stylesheet(); + + $this->assertSame( '', $content, 'Could not filter sitemap index stylesheet content' ); + } + + /** + * Test that sitemap stylesheet CSS can be filtered. + */ + public function test_filter_sitemaps_stylesheet_css() { + $stylesheet = new WP_Sitemaps_Stylesheet(); + + add_filter( 'wp_sitemaps_stylesheet_css', '__return_empty_string' ); + $css = $stylesheet->get_stylesheet_css(); + + $this->assertSame( '', $css, 'Could not filter sitemap stylesheet CSS' ); + } +} diff --git a/tests/phpunit/tests/sitemaps/sitemaps-taxonomies.php b/tests/phpunit/tests/sitemaps/sitemaps-taxonomies.php new file mode 100644 index 0000000000..044e868e74 --- /dev/null +++ b/tests/phpunit/tests/sitemaps/sitemaps-taxonomies.php @@ -0,0 +1,192 @@ +term->create_many( 10, array( 'taxonomy' => 'category' ) ); + self::$post_tags = $factory->term->create_many( 10 ); + self::$editor_id = $factory->user->create( array( 'role' => 'editor' ) ); + } + + /** + * Test getting a URL list for default taxonomies via + * WP_Sitemaps_Taxonomies::get_url_list(). + */ + public function test_get_url_list_taxonomies() { + // Add the default category to the list of categories we're testing. + $categories = array_merge( array( 1 ), self::$cats ); + + // Create a test post to calculate update times. + $post = self::factory()->post->create_and_get( + array( + 'tags_input' => self::$post_tags, + 'post_category' => $categories, + ) + ); + + $tax_provider = new WP_Sitemaps_Taxonomies(); + + $cat_list = $tax_provider->get_url_list( 1, 'category' ); + + $expected_cats = array_map( + static function ( $id ) use ( $post ) { + return array( + 'loc' => get_term_link( $id, 'category' ), + ); + }, + $categories + ); + + $this->assertSame( $expected_cats, $cat_list, 'Category URL list does not match.' ); + + $tag_list = $tax_provider->get_url_list( 1, 'post_tag' ); + + $expected_tags = array_map( + static function ( $id ) use ( $post ) { + return array( + 'loc' => get_term_link( $id, 'post_tag' ), + ); + }, + self::$post_tags + ); + + $this->assertSame( $expected_tags, $tag_list, 'Post Tags URL list does not match.' ); + } + + /** + * Test getting a URL list for a custom taxonomy via + * WP_Sitemaps_Taxonomies::get_url_list(). + */ + public function test_get_url_list_custom_taxonomy() { + wp_set_current_user( self::$editor_id ); + + // Create a custom taxonomy for this test. + $taxonomy = 'test_taxonomy'; + register_taxonomy( $taxonomy, 'post' ); + + // Create test terms in the custom taxonomy. + $terms = self::factory()->term->create_many( 10, array( 'taxonomy' => $taxonomy ) ); + + // Create a test post applied to all test terms. + $post = self::factory()->post->create_and_get( array( 'tax_input' => array( $taxonomy => $terms ) ) ); + + $expected = array_map( + static function ( $id ) use ( $taxonomy, $post ) { + return array( + 'loc' => get_term_link( $id, $taxonomy ), + ); + }, + $terms + ); + + $tax_provider = new WP_Sitemaps_Taxonomies(); + + $post_list = $tax_provider->get_url_list( 1, $taxonomy ); + + // Clean up. + unregister_taxonomy_for_object_type( $taxonomy, 'post' ); + + $this->assertEquals( $expected, $post_list, 'Custom taxonomy term links are not visible.' ); + } + + /** + * Test getting a URL list for a private custom taxonomy via + * WP_Sitemaps_Taxonomies::get_url_list(). + */ + public function test_get_url_list_custom_taxonomy_private() { + // Create a custom taxonomy for this test. + $taxonomy = 'private_taxonomy'; + register_taxonomy( $taxonomy, 'post', array( 'public' => false ) ); + + // Create test terms in the custom taxonomy. + $terms = self::factory()->term->create_many( 10, array( 'taxonomy' => $taxonomy ) ); + + // Create a test post applied to all test terms. + self::factory()->post->create( array( 'tax_input' => array( $taxonomy => $terms ) ) ); + + $tax_provider = new WP_Sitemaps_Taxonomies(); + + $post_list = $tax_provider->get_url_list( 1, $taxonomy ); + + // Clean up. + unregister_taxonomy_for_object_type( $taxonomy, 'post' ); + + $this->assertEmpty( $post_list, 'Private taxonomy term links are visible.' ); + } + + /** + * Test sitemap index entries with public and private taxonomies. + */ + public function test_get_sitemap_entries_custom_taxonomies() { + wp_set_current_user( self::$editor_id ); + + // Create a custom public and private taxonomies for this test. + register_taxonomy( 'public_taxonomy', 'post' ); + register_taxonomy( 'private_taxonomy', 'post', array( 'public' => false ) ); + + // Create test terms in the custom taxonomy. + $public_term = self::factory()->term->create( array( 'taxonomy' => 'public_taxonomy' ) ); + $private_term = self::factory()->term->create( array( 'taxonomy' => 'private_taxonomy' ) ); + + // Create a test post applied to all test terms. + self::factory()->post->create_and_get( + array( + 'tax_input' => array( + 'public_taxonomy' => array( $public_term ), + 'private_taxonomy' => array( $private_term ), + ), + ) + ); + + $tax_provider = new WP_Sitemaps_Taxonomies(); + $entries = wp_list_pluck( $tax_provider->get_sitemap_entries(), 'loc' ); + + // Clean up. + unregister_taxonomy_for_object_type( 'public_taxonomy', 'post' ); + unregister_taxonomy_for_object_type( 'private_taxonomy', 'post' ); + + $this->assertContains( 'http://' . WP_TESTS_DOMAIN . '/?sitemap=taxonomies&sitemap-subtype=public_taxonomy&paged=1', $entries, 'Public Taxonomies are not in the index.' ); + $this->assertNotContains( 'http://' . WP_TESTS_DOMAIN . '/?sitemap=taxonomies&sitemap-subtype=private_taxonomy&paged=1', $entries, 'Private Taxonomies are visible in the index.' ); + } + + /** + * Test ability to filter object subtypes. + */ + public function test_filter_sitemaps_taxonomies() { + $taxonomies_provider = new WP_Sitemaps_Taxonomies(); + + // Return an empty array to show that the list of subtypes is filterable. + add_filter( 'wp_sitemaps_taxonomies', '__return_empty_array' ); + $subtypes = $taxonomies_provider->get_object_subtypes(); + + $this->assertEquals( array(), $subtypes, 'Could not filter taxonomies subtypes.' ); + } +} diff --git a/tests/phpunit/tests/sitemaps/sitemaps-users.php b/tests/phpunit/tests/sitemaps/sitemaps-users.php new file mode 100644 index 0000000000..975462044c --- /dev/null +++ b/tests/phpunit/tests/sitemaps/sitemaps-users.php @@ -0,0 +1,57 @@ +user->create_many( 10, array( 'role' => 'editor' ) ); + self::$editor_id = self::$users[0]; + } + + /** + * Test getting a URL list for a users sitemap page via + * WP_Sitemaps_Users::get_url_list(). + */ + public function test_get_url_list_users() { + // Set up the user to an editor to assign posts to other users. + wp_set_current_user( self::$editor_id ); + + // Create a set of posts for each user and generate the expected URL list data. + $expected = array_map( + static function ( $user_id ) { + $post = self::factory()->post->create_and_get( array( 'post_author' => $user_id ) ); + + return array( + 'loc' => get_author_posts_url( $user_id ), + ); + }, + self::$users + ); + + $user_provider = new WP_Sitemaps_Users(); + + $url_list = $user_provider->get_url_list( 1 ); + + $this->assertEqualSets( $expected, $url_list ); + } +} diff --git a/tests/phpunit/tests/sitemaps/sitemaps.php b/tests/phpunit/tests/sitemaps/sitemaps.php new file mode 100644 index 0000000000..16a259c298 --- /dev/null +++ b/tests/phpunit/tests/sitemaps/sitemaps.php @@ -0,0 +1,396 @@ +user->create_many( 10 ); + self::$post_tags = $factory->term->create_many( 10 ); + self::$cats = $factory->term->create_many( 10, array( 'taxonomy' => 'category' ) ); + self::$pages = $factory->post->create_many( 10, array( 'post_type' => 'page' ) ); + + // Create a set of posts pre-assigned to tags and authors. + self::$posts = $factory->post->create_many( + 10, + array( + 'tags_input' => self::$post_tags, + 'post_author' => reset( self::$users ), + ) + ); + + // Create a user with an editor role to complete some tests. + self::$editor_id = $factory->user->create( array( 'role' => 'editor' ) ); + + self::$test_provider = new WP_Sitemaps_Test_Provider(); + } + + /** + * Helper function to get all sitemap entries data. + * + * @return array A list of sitemap entires. + */ + public function _get_sitemap_entries() { + $entries = array(); + + $providers = wp_get_sitemaps(); + + foreach ( $providers as $provider ) { + // Using `array_push` is more efficient than `array_merge` in the loop. + array_push( $entries, ...$provider->get_sitemap_entries() ); + } + + return $entries; + } + + /** + * Test default sitemap entries. + */ + public function test_get_sitemap_entries() { + $entries = $this->_get_sitemap_entries(); + + $expected = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/?sitemap=posts&sitemap-subtype=post&paged=1', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/?sitemap=posts&sitemap-subtype=page&paged=1', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/?sitemap=taxonomies&sitemap-subtype=category&paged=1', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/?sitemap=taxonomies&sitemap-subtype=post_tag&paged=1', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/?sitemap=users&paged=1', + ), + ); + + $this->assertSame( $expected, $entries ); + } + + /** + * Test default sitemap entries with permalinks on. + */ + public function test_get_sitemap_entries_post_with_permalinks() { + $this->set_permalink_structure( '/%year%/%postname%/' ); + + $entries = $this->_get_sitemap_entries(); + + $expected = array( + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-post-1.xml', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-posts-page-1.xml', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-category-1.xml', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-taxonomies-post_tag-1.xml', + ), + array( + 'loc' => 'http://' . WP_TESTS_DOMAIN . '/wp-sitemap-users-1.xml', + ), + ); + + // Clean up permalinks. + $this->set_permalink_structure(); + + $this->assertSame( $expected, $entries ); + } + + /** + * Test sitemap index entries with public and private custom post types. + */ + public function test_get_sitemap_entries_custom_post_types() { + // Register and create a public post type post. + register_post_type( 'public_cpt', array( 'public' => true ) ); + self::factory()->post->create( array( 'post_type' => 'public_cpt' ) ); + + // Register and create a private post type post. + register_post_type( 'private_cpt', array( 'public' => false ) ); + self::factory()->post->create( array( 'post_type' => 'private_cpt' ) ); + + $entries = wp_list_pluck( $this->_get_sitemap_entries(), 'loc' ); + + // Clean up. + unregister_post_type( 'public_cpt' ); + unregister_post_type( 'private_cpt' ); + + $this->assertContains( 'http://' . WP_TESTS_DOMAIN . '/?sitemap=posts&sitemap-subtype=public_cpt&paged=1', $entries, 'Public CPTs are not in the index.' ); + $this->assertNotContains( 'http://' . WP_TESTS_DOMAIN . '/?sitemap=posts&sitemap-subtype=private_cpt&paged=1', $entries, 'Private CPTs are visible in the index.' ); + } + + /** + * Tests getting a URL list for post type post. + */ + public function test_get_url_list_post() { + $providers = wp_get_sitemaps(); + + $post_list = $providers['posts']->get_url_list( 1, 'post' ); + + $expected = $this->_get_expected_url_list( 'post', self::$posts ); + + $this->assertEquals( $expected, $post_list ); + } + + /** + * Tests getting a URL list for post type page. + */ + public function test_get_url_list_page() { + // Short circuit the show on front option. + add_filter( 'pre_option_show_on_front', '__return_true' ); + + $providers = wp_get_sitemaps(); + + $post_list = $providers['posts']->get_url_list( 1, 'page' ); + + $expected = $this->_get_expected_url_list( 'page', self::$pages ); + + $this->assertEquals( $expected, $post_list ); + } + + /** + * Tests getting a URL list for post type page with included home page. + */ + public function test_get_url_list_page_with_home() { + $providers = wp_get_sitemaps(); + + $post_list = $providers['posts']->get_url_list( 1, 'page' ); + + $expected = $this->_get_expected_url_list( 'page', self::$pages ); + + // Add the homepage to the front of the URL list. + array_unshift( + $expected, + array( + 'loc' => home_url(), + ) + ); + + $this->assertEquals( $expected, $post_list ); + } + + /** + * Tests getting a URL list for post with private post. + */ + public function test_get_url_list_private_post() { + wp_set_current_user( self::$editor_id ); + + $providers = wp_get_sitemaps(); + + $post_list_before = $providers['posts']->get_url_list( 1, 'post' ); + + $private_post_id = self::factory()->post->create( array( 'post_status' => 'private' ) ); + + $post_list_after = $providers['posts']->get_url_list( 1, 'post' ); + + $private_post = array( + 'loc' => get_permalink( $private_post_id ), + ); + + $this->assertNotContains( $private_post, $post_list_after ); + $this->assertEqualSets( $post_list_before, $post_list_after ); + } + + /** + * Tests getting a URL list for a custom post type. + */ + public function test_get_url_list_cpt() { + $post_type = 'custom_type'; + + // Registered post types are private unless explicitly set to public. + register_post_type( $post_type, array( 'public' => true ) ); + + $ids = self::factory()->post->create_many( 10, array( 'post_type' => $post_type ) ); + + $providers = wp_get_sitemaps(); + + $post_list = $providers['posts']->get_url_list( 1, $post_type ); + + $expected = $this->_get_expected_url_list( $post_type, $ids ); + + // Clean up. + unregister_post_type( $post_type ); + + $this->assertEquals( $expected, $post_list, 'Custom post type posts are not visible.' ); + } + + /** + * Tests getting a URL list for a private custom post type. + */ + public function test_get_url_list_cpt_private() { + $post_type = 'private_type'; + + // Create a private post type for testing against data leaking. + register_post_type( $post_type, array( 'public' => false ) ); + + self::factory()->post->create_many( 10, array( 'post_type' => $post_type ) ); + + $providers = wp_get_sitemaps(); + + $post_list = $providers['posts']->get_url_list( 1, $post_type ); + + // Clean up. + unregister_post_type( $post_type ); + + $this->assertEmpty( $post_list, 'Private post types may be returned by the post provider.' ); + } + + /** + * Helper function for building an expected url list. + * + * @param string $type An object sub type, e.g., post type. + * @param array $ids Array of object IDs. + * @return array A formed URL list. + */ + public function _get_expected_url_list( $type, $ids ) { + $posts = get_posts( + array( + 'include' => $ids, + 'orderby' => 'ID', + 'order' => 'ASC', + 'post_type' => $type, + ) + ); + + return array_map( + static function ( $post ) { + return array( + 'loc' => get_permalink( $post ), + ); + }, + $posts + ); + } + + /** + * Test functionality that adds a new sitemap provider to the registry. + */ + public function test_register_sitemap_provider() { + wp_register_sitemap( 'test_sitemap', self::$test_provider ); + + $sitemaps = wp_get_sitemaps(); + + $this->assertEquals( $sitemaps['test_sitemap'], self::$test_provider, 'Can not confirm sitemap registration is working.' ); + } + + /** + * Test robots.txt output. + */ + public function test_robots_text() { + // Get the text added to the default robots text output. + $robots_text = apply_filters( 'robots_txt', '', true ); + $sitemap_string = 'Sitemap: http://' . WP_TESTS_DOMAIN . '/?sitemap=index'; + + $this->assertContains( $sitemap_string, $robots_text, 'Sitemap URL not included in robots text.' ); + } + + /** + * Test robots.txt output for a private site. + */ + public function test_robots_text_private_site() { + $robots_text = apply_filters( 'robots_txt', '', false ); + $sitemap_string = 'Sitemap: http://' . WP_TESTS_DOMAIN . '/?sitemap=index'; + + $this->assertNotContains( $sitemap_string, $robots_text ); + } + + /** + * Test robots.txt output with permalinks set. + */ + public function test_robots_text_with_permalinks() { + // Set permalinks for testing. + $this->set_permalink_structure( '/%year%/%postname%/' ); + + // Get the text added to the default robots text output. + $robots_text = apply_filters( 'robots_txt', '', true ); + $sitemap_string = 'Sitemap: http://' . WP_TESTS_DOMAIN . '/wp-sitemap.xml'; + + // Clean up permalinks. + $this->set_permalink_structure(); + + $this->assertContains( $sitemap_string, $robots_text, 'Sitemap URL not included in robots text.' ); + } + + /** + * Test robots.txt output with line feed prefix. + */ + public function test_robots_text_prefixed_with_line_feed() { + // Get the text added to the default robots text output. + $robots_text = apply_filters( 'robots_txt', '', true ); + $sitemap_string = "\nSitemap: "; + + $this->assertContains( $sitemap_string, $robots_text, 'Sitemap URL not prefixed with "\n".' ); + } +}