WPDB: If a site is using the utf8 charset, and their version of MySQL supports utf8mb4, auto-upgrade them to utf8mb4.

This patch also resizes some indexes, to allow for the 767 byte index size limit in standard MySQL installs.

See #21212



git-svn-id: https://develop.svn.wordpress.org/trunk@31349 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Gary Pendergast 2015-02-06 04:50:19 +00:00
parent 43789f822a
commit 9f6ddf8944
6 changed files with 151 additions and 25 deletions

View File

@ -44,6 +44,13 @@ function wp_get_db_schema( $scope = 'all', $blog_id = null ) {
// Engage multisite if in the middle of turning it on from network.php.
$is_multisite = is_multisite() || ( defined( 'WP_INSTALLING_NETWORK' ) && WP_INSTALLING_NETWORK );
/*
* Indexes have a maximum size of 767 bytes. Historically, we haven't need to be concerned about that.
* As of 4.2, however, we moved to utf8mb4, which uses 4 bytes per character. This means that an index which
* used to have room for floor(767/3) = 255 characters, now only has room for floor(767/4) = 191 characters.
*/
$max_index_length = 191;
// Blog specific tables.
$blog_tables = "CREATE TABLE $wpdb->terms (
term_id bigint(20) unsigned NOT NULL auto_increment,
@ -51,8 +58,8 @@ function wp_get_db_schema( $scope = 'all', $blog_id = null ) {
slug varchar(200) NOT NULL default '',
term_group bigint(10) NOT NULL default 0,
PRIMARY KEY (term_id),
KEY slug (slug),
KEY name (name)
KEY slug (slug($max_index_length)),
KEY name (name($max_index_length))
) $charset_collate;
CREATE TABLE $wpdb->term_taxonomy (
term_taxonomy_id bigint(20) unsigned NOT NULL auto_increment,
@ -79,7 +86,7 @@ CREATE TABLE $wpdb->commentmeta (
meta_value longtext,
PRIMARY KEY (meta_id),
KEY comment_id (comment_id),
KEY meta_key (meta_key)
KEY meta_key (meta_key($max_index_length))
) $charset_collate;
CREATE TABLE $wpdb->comments (
comment_ID bigint(20) unsigned NOT NULL auto_increment,
@ -136,7 +143,7 @@ CREATE TABLE $wpdb->postmeta (
meta_value longtext,
PRIMARY KEY (meta_id),
KEY post_id (post_id),
KEY meta_key (meta_key)
KEY meta_key (meta_key($max_index_length))
) $charset_collate;
CREATE TABLE $wpdb->posts (
ID bigint(20) unsigned NOT NULL auto_increment,
@ -163,7 +170,7 @@ CREATE TABLE $wpdb->posts (
post_mime_type varchar(100) NOT NULL default '',
comment_count bigint(20) NOT NULL default '0',
PRIMARY KEY (ID),
KEY post_name (post_name),
KEY post_name (post_name($max_index_length)),
KEY type_status_date (post_type,post_status,post_date,ID),
KEY post_parent (post_parent),
KEY post_author (post_author)
@ -213,7 +220,7 @@ CREATE TABLE $wpdb->posts (
meta_value longtext,
PRIMARY KEY (umeta_id),
KEY user_id (user_id),
KEY meta_key (meta_key)
KEY meta_key (meta_key($max_index_length))
) $charset_collate;\n";
// Global tables
@ -261,7 +268,7 @@ CREATE TABLE $wpdb->site (
domain varchar(200) NOT NULL default '',
path varchar(100) NOT NULL default '',
PRIMARY KEY (id),
KEY domain (domain,path)
KEY domain (domain(140),path(51))
) $charset_collate;
CREATE TABLE $wpdb->sitemeta (
meta_id bigint(20) NOT NULL auto_increment,
@ -269,7 +276,7 @@ CREATE TABLE $wpdb->sitemeta (
meta_key varchar(255) default NULL,
meta_value longtext,
PRIMARY KEY (meta_id),
KEY meta_key (meta_key),
KEY meta_key (meta_key($max_index_length)),
KEY site_id (site_id)
) $charset_collate;
CREATE TABLE $wpdb->signups (
@ -288,7 +295,7 @@ CREATE TABLE $wpdb->signups (
KEY activation_key (activation_key),
KEY user_email (user_email),
KEY user_login_email (user_login,user_email),
KEY domain_path (domain,path)
KEY domain_path (domain(140),path(51))
) $charset_collate;";
switch ( $scope ) {

View File

@ -519,6 +519,9 @@ function upgrade_all() {
if ( $wp_current_db_version < 29630 )
upgrade_400();
if ( $wp_current_db_version < 31349 )
upgrade_420();
maybe_disable_link_manager();
maybe_disable_automattic_widgets();
@ -1406,6 +1409,27 @@ function upgrade_400() {
}
}
/**
* Execute changes made in WordPress 4.2.0.
*
* @since 4.2.0
*/
function upgrade_420() {
global $wp_current_db_version, $wpdb;
if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) {
if ( is_multisite() ) {
$tables = $wpdb->tables( 'blog' );
} else {
$tables = $wpdb->tables( 'all' );
}
foreach ( $tables as $table ) {
maybe_convert_table_to_utf8mb4( $table );
}
}
}
/**
* Executes network-level upgrade routines.
*
@ -1502,6 +1526,21 @@ function upgrade_network() {
update_site_option( 'illegal_names', $illegal_names );
}
}
// 4.2
if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) {
if ( ! ( defined( 'DO_NOT_UPGRADE_GLOBAL_TABLES' ) && DO_NOT_UPGRADE_GLOBAL_TABLES ) ) {
$wpdb->query( "ALTER TABLE $wpdb->site DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" );
$wpdb->query( "ALTER TABLE $wpdb->sitemeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
$wpdb->query( "ALTER TABLE $wpdb->signups DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" );
$tables = $wpdb->tables( 'global' );
foreach ( $tables as $table ) {
maybe_convert_table_to_utf8mb4( $table );
}
}
}
}
//
@ -1607,6 +1646,42 @@ function maybe_add_column($table_name, $column_name, $create_ddl) {
return false;
}
/**
* If a table only contains utf8 or utf8mb4 columns, convert it to utf8mb4.
*
* @since 4.2.0
*
* @param string $table The table to convert.
* @return bool true if the table was converted, false if it wasn't.
*/
function maybe_convert_table_to_utf8mb4( $table ) {
global $wpdb;
$results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `$table`" );
if ( ! $results ) {
return false;
}
$has_utf8 = false;
foreach ( $results as $column ) {
if ( $column->Collation ) {
if ( 'utf8' === $column->Collation ) {
$has_utf8 = true;
} elseif ( 'utf8mb4' !== $column->Collation ) {
// Don't upgrade tables that have non-utf8 columns.
return false;
}
}
}
if ( ! $has_utf8 ) {
// Don't bother upgrading tables that don't have utf8 columns.
return false;
}
return $wpdb->query( "ALTER TABLE $table CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" );
}
/**
* Retrieve all options as it was for 1.2.
*
@ -2284,6 +2359,17 @@ function pre_schema_upgrade() {
// dbDelta() can recreate but can't drop the index.
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" );
}
// Upgrade versions prior to 4.2.
if ( $wp_current_db_version < 31349 ) {
// So that we can change tables to utf8mb4, we need to shorten the index lengths to less than 767 bytes
$wpdb->query( "ALTER TABLE $wpdb->usermeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug, ADD INDEX slug(slug(191))" );
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX name, ADD INDEX name(name(191))" );
$wpdb->query( "ALTER TABLE $wpdb->commentmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
$wpdb->query( "ALTER TABLE $wpdb->postmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
$wpdb->query( "ALTER TABLE $wpdb->posts DROP INDEX post_name, ADD INDEX post_name(post_name(191))" );
}
}
/**

View File

@ -280,6 +280,11 @@ switch($step) {
case 'DB_HOST' :
$config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n";
break;
case 'DB_CHARSET' :
if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) {
$config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n";
}
break;
case 'AUTH_KEY' :
case 'SECURE_AUTH_KEY' :
case 'LOGGED_IN_KEY' :

View File

@ -11,7 +11,7 @@ $wp_version = '4.2-alpha-31007-src';
*
* @global int $wp_db_version
*/
$wp_db_version = 30133;
$wp_db_version = 31349;
/**
* Holds the TinyMCE version

View File

@ -624,8 +624,6 @@ class wpdb {
}
}
$this->init_charset();
$this->dbuser = $dbuser;
$this->dbpassword = $dbpassword;
$this->dbname = $dbname;
@ -717,16 +715,31 @@ class wpdb {
public function init_charset() {
if ( function_exists('is_multisite') && is_multisite() ) {
$this->charset = 'utf8';
if ( defined( 'DB_COLLATE' ) && DB_COLLATE )
if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) {
$this->collate = DB_COLLATE;
else
} else {
$this->collate = 'utf8_general_ci';
}
} elseif ( defined( 'DB_COLLATE' ) ) {
$this->collate = DB_COLLATE;
}
if ( defined( 'DB_CHARSET' ) )
if ( defined( 'DB_CHARSET' ) ) {
$this->charset = DB_CHARSET;
}
if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) )
|| ( empty( $this->dbh ) || ! ( $this->dbh instanceof mysqli ) ) ) {
return;
}
if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
$this->charset = 'utf8mb4';
}
if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) {
$this->collate = 'utf8mb4_unicode_ci';
}
}
/**
@ -1476,8 +1489,14 @@ class wpdb {
return false;
} elseif ( $this->dbh ) {
if ( ! $this->has_connected ) {
$this->init_charset();
}
$this->has_connected = true;
$this->set_charset( $this->dbh );
$this->ready = true;
$this->set_sql_mode();
$this->select( $this->dbname, $this->dbh );
@ -2249,14 +2268,14 @@ class wpdb {
* Retrieves the character set for the given column.
*
* @since 4.2.0
* @access protected
* @access public
*
* @param string $table Table name.
* @param string $column Column name.
* @return mixed Column character set as a string. False if the column has no
* character set. {@see WP_Error} object if there was an error.
*/
protected function get_col_charset( $table, $column ) {
public function get_col_charset( $table, $column ) {
$tablekey = strtolower( $table );
$columnkey = strtolower( $column );
@ -2356,7 +2375,6 @@ class wpdb {
'gb2312' => 'EUC-CN',
'ujis' => 'EUC-JP',
'utf32' => 'UTF-32',
'utf8mb4' => 'UTF-8',
);
$supported_charsets = array();
@ -2391,8 +2409,8 @@ class wpdb {
}
}
// utf8(mb3) can be handled by regex, which is a bunch faster than a DB lookup.
if ( 'utf8' === $charset || 'utf8mb3' === $charset ) {
// utf8 can be handled by regex, which is a bunch faster than a DB lookup.
if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) {
$regex = '/
(
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
@ -2400,8 +2418,17 @@ class wpdb {
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [\xE1-\xEC][\x80-\xBF]{2}
| \xED[\x80-\x9F][\x80-\xBF]
| [\xEE-\xEF][\x80-\xBF]{2}
){1,50} # ...one or more times
| [\xEE-\xEF][\x80-\xBF]{2}';
if ( 'utf8mb4' === $charset) {
$regex .= '
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
| [\xF1-\xF3][\x80-\xBF]{3}
| \xF4[\x80-\x8F][\x80-\xBF]{2}
';
}
$regex .= '){1,50} # ...one or more times
)
| . # anything else
/x';

View File

@ -130,11 +130,12 @@ class Tests_DB_Charset extends WP_UnitTestCase {
}
/**
* @ ticket 21212
* @ticket 21212
*/
function test_process_fields_failure() {
global $wpdb;
$data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" );
// \xf0\xff\xff\xff is invalid in utf8 and utf8mb4.
$data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" );
$this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) );
}
@ -436,6 +437,6 @@ class Tests_DB_Charset extends WP_UnitTestCase {
*/
function test_invalid_characters_in_query() {
global $wpdb;
$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x9f\x98\x88bar')" ) );
$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) );
}
}