Database: Split the logic of wpdb::init_charset()
into a separate method.
The logic for determining the appropriate character set and collation to use is becoming more complex, particularly with the recent additions of [37522] and [37523]. As `init_charset()` has side effects, and makes use of constants instead of parameters, it's not possible to unit test this logic. This commit splits the logic part of `init_charset()` out into a new method, `wpdb::determine_charset()`, along with appropriate unit tests. See #32105, #37522. Fixes #36917. git-svn-id: https://develop.svn.wordpress.org/trunk@37601 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
parent
5fdf2b4b44
commit
bc975d28d4
@ -735,41 +735,62 @@ class wpdb {
|
||||
*/
|
||||
public function init_charset() {
|
||||
if ( function_exists('is_multisite') && is_multisite() ) {
|
||||
$this->charset = 'utf8';
|
||||
$charset = 'utf8';
|
||||
if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) {
|
||||
$this->collate = DB_COLLATE;
|
||||
$collate = DB_COLLATE;
|
||||
} else {
|
||||
$this->collate = 'utf8_general_ci';
|
||||
$collate = 'utf8_general_ci';
|
||||
}
|
||||
} elseif ( defined( 'DB_COLLATE' ) ) {
|
||||
$this->collate = DB_COLLATE;
|
||||
$collate = DB_COLLATE;
|
||||
}
|
||||
|
||||
if ( defined( 'DB_CHARSET' ) ) {
|
||||
$this->charset = DB_CHARSET;
|
||||
$charset = DB_CHARSET;
|
||||
}
|
||||
|
||||
$charset_collate = $this->determine_charset( $charset, $collate );
|
||||
|
||||
$this->charset = $charset_collate['charset'];
|
||||
$this->collate = $charset_collate['collate'];
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a charset and collation, determine the best charset and collation to use.
|
||||
*
|
||||
* For example, when able, utf8mb4 should be used instead of utf8.
|
||||
*
|
||||
* @since 4.6.0
|
||||
*
|
||||
* @param string $charset The character set to check.
|
||||
* @param string $collate The collation to check.
|
||||
*
|
||||
* @return array The most appropriate character set and collation to use.
|
||||
*/
|
||||
public function determine_charset( $charset, $collate ) {
|
||||
if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) || empty( $this->dbh ) ) {
|
||||
return;
|
||||
return compact( 'charset', 'collate' );
|
||||
}
|
||||
|
||||
if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
|
||||
$this->charset = 'utf8mb4';
|
||||
if ( 'utf8' === $charset && $this->has_cap( 'utf8mb4' ) ) {
|
||||
$charset = 'utf8mb4';
|
||||
}
|
||||
|
||||
if ( 'utf8mb4' === $this->charset ) {
|
||||
if ( 'utf8mb4' === $charset ) {
|
||||
// _general_ is outdated, so we can upgrade it to _unicode_, instead.
|
||||
if ( ! $this->collate || 'utf8_general_ci' === $this->collate ) {
|
||||
$this->collate = 'utf8mb4_unicode_ci';
|
||||
if ( ! $collate || 'utf8_general_ci' === $collate ) {
|
||||
$collate = 'utf8mb4_unicode_ci';
|
||||
} else {
|
||||
$this->collate = str_replace( 'utf8_', 'utf8mb4_', $this->collate );
|
||||
$collate = str_replace( 'utf8_', 'utf8mb4_', $collate );
|
||||
}
|
||||
}
|
||||
|
||||
// _unicode_520_ is a better collation, we should use that when it's available.
|
||||
if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $this->collate ) {
|
||||
$this->collate = 'utf8mb4_unicode_520_ci';
|
||||
if ( $this->has_cap( 'utf8mb4_520' ) && 'utf8mb4_unicode_ci' === $collate ) {
|
||||
$collate = 'utf8mb4_unicode_520_ci';
|
||||
}
|
||||
|
||||
return compact( 'charset', 'collate' );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -955,4 +955,78 @@ class Tests_DB extends WP_UnitTestCase {
|
||||
|
||||
$wpdb->check_connection();
|
||||
}
|
||||
|
||||
/**
|
||||
* @ticket 36917
|
||||
*/
|
||||
function test_charset_not_determined_when_disconnected() {
|
||||
global $wpdb;
|
||||
|
||||
$charset = 'utf8';
|
||||
$collate = 'this_isnt_a_collation';
|
||||
|
||||
$wpdb->close();
|
||||
|
||||
$result = $wpdb->determine_charset( $charset, $collate );
|
||||
|
||||
$this->assertSame( compact( 'charset', 'collate' ), $result );
|
||||
|
||||
$wpdb->check_connection();
|
||||
}
|
||||
|
||||
/**
|
||||
* @ticket 36917
|
||||
*/
|
||||
function test_charset_switched_to_utf8mb4() {
|
||||
global $wpdb;
|
||||
|
||||
if ( ! $wpdb->has_cap( 'utf8mb4' ) ) {
|
||||
$this->markTestSkipped( 'This test requires utf8mb4 support.' );
|
||||
}
|
||||
|
||||
$charset = 'utf8';
|
||||
$collate = 'utf8_general_ci';
|
||||
|
||||
$result = $wpdb->determine_charset( $charset, $collate );
|
||||
|
||||
$this->assertSame( 'utf8mb4', $result['charset'] );
|
||||
}
|
||||
|
||||
/**
|
||||
* @ticket 32105
|
||||
* @ticket 36917
|
||||
*/
|
||||
function test_collate_switched_to_utf8mb4_520() {
|
||||
global $wpdb;
|
||||
|
||||
if ( ! $wpdb->has_cap( 'utf8mb4_520' ) ) {
|
||||
$this->markTestSkipped( 'This test requires utf8mb4_520 support.' );
|
||||
}
|
||||
|
||||
$charset = 'utf8';
|
||||
$collate = 'utf8_general_ci';
|
||||
|
||||
$result = $wpdb->determine_charset( $charset, $collate );
|
||||
|
||||
$this->assertSame( 'utf8mb4_unicode_520_ci', $result['collate'] );
|
||||
}
|
||||
|
||||
/**
|
||||
* @ticket 36917
|
||||
* @ticket 37522
|
||||
*/
|
||||
function test_non_unicode_collations() {
|
||||
global $wpdb;
|
||||
|
||||
if ( ! $wpdb->has_cap( 'utf8mb4' ) ) {
|
||||
$this->markTestSkipped( 'This test requires utf8mb4 support.' );
|
||||
}
|
||||
|
||||
$charset = 'utf8';
|
||||
$collate = 'utf8_swedish_ci';
|
||||
|
||||
$result = $wpdb->determine_charset( $charset, $collate );
|
||||
|
||||
$this->assertSame( 'utf8mb4_swedish_ci', $result['collate'] );
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user