diff --git a/src/wp-includes/wp-db.php b/src/wp-includes/wp-db.php index d261254926..50d71c6c13 100644 --- a/src/wp-includes/wp-db.php +++ b/src/wp-includes/wp-db.php @@ -735,41 +735,62 @@ class wpdb { */ public function init_charset() { if ( function_exists('is_multisite') && is_multisite() ) { - $this->charset = 'utf8'; + $charset = 'utf8'; if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) { - $this->collate = DB_COLLATE; + $collate = DB_COLLATE; } else { - $this->collate = 'utf8_general_ci'; + $collate = 'utf8_general_ci'; } } elseif ( defined( 'DB_COLLATE' ) ) { - $this->collate = DB_COLLATE; + $collate = DB_COLLATE; } if ( defined( 'DB_CHARSET' ) ) { - $this->charset = DB_CHARSET; + $charset = DB_CHARSET; } + $charset_collate = $this->determine_charset( $charset, $collate ); + + $this->charset = $charset_collate['charset']; + $this->collate = $charset_collate['collate']; + } + + /** + * Given a charset and collation, determine the best charset and collation to use. + * + * For example, when able, utf8mb4 should be used instead of utf8. + * + * @since 4.6.0 + * + * @param string $charset The character set to check. + * @param string $collate The collation to check. + * + * @return array The most appropriate character set and collation to use. + */ + public function determine_charset( $charset, $collate ) { if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) || empty( $this->dbh ) ) { - return; + return compact( 'charset', 'collate' ); } - if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) { - $this->charset = 'utf8mb4'; + if ( 'utf8' === $charset && $this->has_cap( 'utf8mb4' ) ) { + $charset = 'utf8mb4'; } - if ( 'utf8mb4' === $this->charset ) { + if ( 'utf8mb4' === $charset ) { // _general_ is outdated, so we can upgrade it to _unicode_, instead. - if ( ! $this->collate || 'utf8_general_ci' === $this->collate ) { - $this->collate = 'utf8mb4_unicode_ci'; + if ( ! $collate || 'utf8_general_ci' === $collate ) { + $collate = 'utf8mb4_unicode_ci'; } else { - $this->collate = str_replace( 'utf8_', 'utf8mb4_', $this->collate ); + $collate = str_replace( 'utf8_', 'utf8mb4_', $collate ); } } // _unicode_520_ is a better collation, we should use that when it's available. - if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $this->collate ) { - $this->collate = 'utf8mb4_unicode_520_ci'; + if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $collate ) { + $collate = 'utf8mb4_unicode_520_ci'; } + + return compact( 'charset', 'collate' ); } /** diff --git a/tests/phpunit/tests/db.php b/tests/phpunit/tests/db.php index f49a3e07b6..62077b0c12 100644 --- a/tests/phpunit/tests/db.php +++ b/tests/phpunit/tests/db.php @@ -955,4 +955,78 @@ class Tests_DB extends WP_UnitTestCase { $wpdb->check_connection(); } + + /** + * @ticket 36917 + */ + function test_charset_not_determined_when_disconnected() { + global $wpdb; + + $charset = 'utf8'; + $collate = 'this_isnt_a_collation'; + + $wpdb->close(); + + $result = $wpdb->determine_charset( $charset, $collate ); + + $this->assertSame( compact( 'charset', 'collate' ), $result ); + + $wpdb->check_connection(); + } + + /** + * @ticket 36917 + */ + function test_charset_switched_to_utf8mb4() { + global $wpdb; + + if ( ! $wpdb->has_cap( 'utf8mb4' ) ) { + $this->markTestSkipped( 'This test requires utf8mb4 support.' ); + } + + $charset = 'utf8'; + $collate = 'utf8_general_ci'; + + $result = $wpdb->determine_charset( $charset, $collate ); + + $this->assertSame( 'utf8mb4', $result['charset'] ); + } + + /** + * @ticket 32105 + * @ticket 36917 + */ + function test_collate_switched_to_utf8mb4_520() { + global $wpdb; + + if ( ! $wpdb->has_cap( 'utf8mb4_520' ) ) { + $this->markTestSkipped( 'This test requires utf8mb4_520 support.' ); + } + + $charset = 'utf8'; + $collate = 'utf8_general_ci'; + + $result = $wpdb->determine_charset( $charset, $collate ); + + $this->assertSame( 'utf8mb4_unicode_520_ci', $result['collate'] ); + } + + /** + * @ticket 36917 + * @ticket 37522 + */ + function test_non_unicode_collations() { + global $wpdb; + + if ( ! $wpdb->has_cap( 'utf8mb4' ) ) { + $this->markTestSkipped( 'This test requires utf8mb4 support.' ); + } + + $charset = 'utf8'; + $collate = 'utf8_swedish_ci'; + + $result = $wpdb->determine_charset( $charset, $collate ); + + $this->assertSame( 'utf8mb4_swedish_ci', $result['collate'] ); + } }