WPDB: If a site is using the utf8 charset, and their version of MySQL supports utf8mb4, auto-upgrade them to utf8mb4.

This patch also resizes some indexes, to allow for the 767 byte index size limit in standard MySQL installs.

See #21212



git-svn-id: https://develop.svn.wordpress.org/trunk@31349 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Gary Pendergast 2015-02-06 04:50:19 +00:00
parent 43789f822a
commit 9f6ddf8944
6 changed files with 151 additions and 25 deletions

View File

@ -44,6 +44,13 @@ function wp_get_db_schema( $scope = 'all', $blog_id = null ) {
// Engage multisite if in the middle of turning it on from network.php. // Engage multisite if in the middle of turning it on from network.php.
$is_multisite = is_multisite() || ( defined( 'WP_INSTALLING_NETWORK' ) && WP_INSTALLING_NETWORK ); $is_multisite = is_multisite() || ( defined( 'WP_INSTALLING_NETWORK' ) && WP_INSTALLING_NETWORK );
/*
* Indexes have a maximum size of 767 bytes. Historically, we haven't need to be concerned about that.
* As of 4.2, however, we moved to utf8mb4, which uses 4 bytes per character. This means that an index which
* used to have room for floor(767/3) = 255 characters, now only has room for floor(767/4) = 191 characters.
*/
$max_index_length = 191;
// Blog specific tables. // Blog specific tables.
$blog_tables = "CREATE TABLE $wpdb->terms ( $blog_tables = "CREATE TABLE $wpdb->terms (
term_id bigint(20) unsigned NOT NULL auto_increment, term_id bigint(20) unsigned NOT NULL auto_increment,
@ -51,8 +58,8 @@ function wp_get_db_schema( $scope = 'all', $blog_id = null ) {
slug varchar(200) NOT NULL default '', slug varchar(200) NOT NULL default '',
term_group bigint(10) NOT NULL default 0, term_group bigint(10) NOT NULL default 0,
PRIMARY KEY (term_id), PRIMARY KEY (term_id),
KEY slug (slug), KEY slug (slug($max_index_length)),
KEY name (name) KEY name (name($max_index_length))
) $charset_collate; ) $charset_collate;
CREATE TABLE $wpdb->term_taxonomy ( CREATE TABLE $wpdb->term_taxonomy (
term_taxonomy_id bigint(20) unsigned NOT NULL auto_increment, term_taxonomy_id bigint(20) unsigned NOT NULL auto_increment,
@ -79,7 +86,7 @@ CREATE TABLE $wpdb->commentmeta (
meta_value longtext, meta_value longtext,
PRIMARY KEY (meta_id), PRIMARY KEY (meta_id),
KEY comment_id (comment_id), KEY comment_id (comment_id),
KEY meta_key (meta_key) KEY meta_key (meta_key($max_index_length))
) $charset_collate; ) $charset_collate;
CREATE TABLE $wpdb->comments ( CREATE TABLE $wpdb->comments (
comment_ID bigint(20) unsigned NOT NULL auto_increment, comment_ID bigint(20) unsigned NOT NULL auto_increment,
@ -136,7 +143,7 @@ CREATE TABLE $wpdb->postmeta (
meta_value longtext, meta_value longtext,
PRIMARY KEY (meta_id), PRIMARY KEY (meta_id),
KEY post_id (post_id), KEY post_id (post_id),
KEY meta_key (meta_key) KEY meta_key (meta_key($max_index_length))
) $charset_collate; ) $charset_collate;
CREATE TABLE $wpdb->posts ( CREATE TABLE $wpdb->posts (
ID bigint(20) unsigned NOT NULL auto_increment, ID bigint(20) unsigned NOT NULL auto_increment,
@ -163,7 +170,7 @@ CREATE TABLE $wpdb->posts (
post_mime_type varchar(100) NOT NULL default '', post_mime_type varchar(100) NOT NULL default '',
comment_count bigint(20) NOT NULL default '0', comment_count bigint(20) NOT NULL default '0',
PRIMARY KEY (ID), PRIMARY KEY (ID),
KEY post_name (post_name), KEY post_name (post_name($max_index_length)),
KEY type_status_date (post_type,post_status,post_date,ID), KEY type_status_date (post_type,post_status,post_date,ID),
KEY post_parent (post_parent), KEY post_parent (post_parent),
KEY post_author (post_author) KEY post_author (post_author)
@ -213,7 +220,7 @@ CREATE TABLE $wpdb->posts (
meta_value longtext, meta_value longtext,
PRIMARY KEY (umeta_id), PRIMARY KEY (umeta_id),
KEY user_id (user_id), KEY user_id (user_id),
KEY meta_key (meta_key) KEY meta_key (meta_key($max_index_length))
) $charset_collate;\n"; ) $charset_collate;\n";
// Global tables // Global tables
@ -261,7 +268,7 @@ CREATE TABLE $wpdb->site (
domain varchar(200) NOT NULL default '', domain varchar(200) NOT NULL default '',
path varchar(100) NOT NULL default '', path varchar(100) NOT NULL default '',
PRIMARY KEY (id), PRIMARY KEY (id),
KEY domain (domain,path) KEY domain (domain(140),path(51))
) $charset_collate; ) $charset_collate;
CREATE TABLE $wpdb->sitemeta ( CREATE TABLE $wpdb->sitemeta (
meta_id bigint(20) NOT NULL auto_increment, meta_id bigint(20) NOT NULL auto_increment,
@ -269,7 +276,7 @@ CREATE TABLE $wpdb->sitemeta (
meta_key varchar(255) default NULL, meta_key varchar(255) default NULL,
meta_value longtext, meta_value longtext,
PRIMARY KEY (meta_id), PRIMARY KEY (meta_id),
KEY meta_key (meta_key), KEY meta_key (meta_key($max_index_length)),
KEY site_id (site_id) KEY site_id (site_id)
) $charset_collate; ) $charset_collate;
CREATE TABLE $wpdb->signups ( CREATE TABLE $wpdb->signups (
@ -288,7 +295,7 @@ CREATE TABLE $wpdb->signups (
KEY activation_key (activation_key), KEY activation_key (activation_key),
KEY user_email (user_email), KEY user_email (user_email),
KEY user_login_email (user_login,user_email), KEY user_login_email (user_login,user_email),
KEY domain_path (domain,path) KEY domain_path (domain(140),path(51))
) $charset_collate;"; ) $charset_collate;";
switch ( $scope ) { switch ( $scope ) {

View File

@ -519,6 +519,9 @@ function upgrade_all() {
if ( $wp_current_db_version < 29630 ) if ( $wp_current_db_version < 29630 )
upgrade_400(); upgrade_400();
if ( $wp_current_db_version < 31349 )
upgrade_420();
maybe_disable_link_manager(); maybe_disable_link_manager();
maybe_disable_automattic_widgets(); maybe_disable_automattic_widgets();
@ -1406,6 +1409,27 @@ function upgrade_400() {
} }
} }
/**
* Execute changes made in WordPress 4.2.0.
*
* @since 4.2.0
*/
function upgrade_420() {
global $wp_current_db_version, $wpdb;
if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) {
if ( is_multisite() ) {
$tables = $wpdb->tables( 'blog' );
} else {
$tables = $wpdb->tables( 'all' );
}
foreach ( $tables as $table ) {
maybe_convert_table_to_utf8mb4( $table );
}
}
}
/** /**
* Executes network-level upgrade routines. * Executes network-level upgrade routines.
* *
@ -1502,6 +1526,21 @@ function upgrade_network() {
update_site_option( 'illegal_names', $illegal_names ); update_site_option( 'illegal_names', $illegal_names );
} }
} }
// 4.2
if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) {
if ( ! ( defined( 'DO_NOT_UPGRADE_GLOBAL_TABLES' ) && DO_NOT_UPGRADE_GLOBAL_TABLES ) ) {
$wpdb->query( "ALTER TABLE $wpdb->site DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" );
$wpdb->query( "ALTER TABLE $wpdb->sitemeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
$wpdb->query( "ALTER TABLE $wpdb->signups DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" );
$tables = $wpdb->tables( 'global' );
foreach ( $tables as $table ) {
maybe_convert_table_to_utf8mb4( $table );
}
}
}
} }
// //
@ -1607,6 +1646,42 @@ function maybe_add_column($table_name, $column_name, $create_ddl) {
return false; return false;
} }
/**
* If a table only contains utf8 or utf8mb4 columns, convert it to utf8mb4.
*
* @since 4.2.0
*
* @param string $table The table to convert.
* @return bool true if the table was converted, false if it wasn't.
*/
function maybe_convert_table_to_utf8mb4( $table ) {
global $wpdb;
$results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `$table`" );
if ( ! $results ) {
return false;
}
$has_utf8 = false;
foreach ( $results as $column ) {
if ( $column->Collation ) {
if ( 'utf8' === $column->Collation ) {
$has_utf8 = true;
} elseif ( 'utf8mb4' !== $column->Collation ) {
// Don't upgrade tables that have non-utf8 columns.
return false;
}
}
}
if ( ! $has_utf8 ) {
// Don't bother upgrading tables that don't have utf8 columns.
return false;
}
return $wpdb->query( "ALTER TABLE $table CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" );
}
/** /**
* Retrieve all options as it was for 1.2. * Retrieve all options as it was for 1.2.
* *
@ -2284,6 +2359,17 @@ function pre_schema_upgrade() {
// dbDelta() can recreate but can't drop the index. // dbDelta() can recreate but can't drop the index.
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" ); $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" );
} }
// Upgrade versions prior to 4.2.
if ( $wp_current_db_version < 31349 ) {
// So that we can change tables to utf8mb4, we need to shorten the index lengths to less than 767 bytes
$wpdb->query( "ALTER TABLE $wpdb->usermeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug, ADD INDEX slug(slug(191))" );
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX name, ADD INDEX name(name(191))" );
$wpdb->query( "ALTER TABLE $wpdb->commentmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
$wpdb->query( "ALTER TABLE $wpdb->postmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
$wpdb->query( "ALTER TABLE $wpdb->posts DROP INDEX post_name, ADD INDEX post_name(post_name(191))" );
}
} }
/** /**

View File

@ -280,6 +280,11 @@ switch($step) {
case 'DB_HOST' : case 'DB_HOST' :
$config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n"; $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n";
break; break;
case 'DB_CHARSET' :
if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) {
$config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n";
}
break;
case 'AUTH_KEY' : case 'AUTH_KEY' :
case 'SECURE_AUTH_KEY' : case 'SECURE_AUTH_KEY' :
case 'LOGGED_IN_KEY' : case 'LOGGED_IN_KEY' :

View File

@ -11,7 +11,7 @@ $wp_version = '4.2-alpha-31007-src';
* *
* @global int $wp_db_version * @global int $wp_db_version
*/ */
$wp_db_version = 30133; $wp_db_version = 31349;
/** /**
* Holds the TinyMCE version * Holds the TinyMCE version

View File

@ -624,8 +624,6 @@ class wpdb {
} }
} }
$this->init_charset();
$this->dbuser = $dbuser; $this->dbuser = $dbuser;
$this->dbpassword = $dbpassword; $this->dbpassword = $dbpassword;
$this->dbname = $dbname; $this->dbname = $dbname;
@ -717,18 +715,33 @@ class wpdb {
public function init_charset() { public function init_charset() {
if ( function_exists('is_multisite') && is_multisite() ) { if ( function_exists('is_multisite') && is_multisite() ) {
$this->charset = 'utf8'; $this->charset = 'utf8';
if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) {
$this->collate = DB_COLLATE; $this->collate = DB_COLLATE;
else } else {
$this->collate = 'utf8_general_ci'; $this->collate = 'utf8_general_ci';
}
} elseif ( defined( 'DB_COLLATE' ) ) { } elseif ( defined( 'DB_COLLATE' ) ) {
$this->collate = DB_COLLATE; $this->collate = DB_COLLATE;
} }
if ( defined( 'DB_CHARSET' ) ) if ( defined( 'DB_CHARSET' ) ) {
$this->charset = DB_CHARSET; $this->charset = DB_CHARSET;
} }
if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) )
|| ( empty( $this->dbh ) || ! ( $this->dbh instanceof mysqli ) ) ) {
return;
}
if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
$this->charset = 'utf8mb4';
}
if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) {
$this->collate = 'utf8mb4_unicode_ci';
}
}
/** /**
* Sets the connection's character set. * Sets the connection's character set.
* *
@ -1476,8 +1489,14 @@ class wpdb {
return false; return false;
} elseif ( $this->dbh ) { } elseif ( $this->dbh ) {
if ( ! $this->has_connected ) {
$this->init_charset();
}
$this->has_connected = true; $this->has_connected = true;
$this->set_charset( $this->dbh ); $this->set_charset( $this->dbh );
$this->ready = true; $this->ready = true;
$this->set_sql_mode(); $this->set_sql_mode();
$this->select( $this->dbname, $this->dbh ); $this->select( $this->dbname, $this->dbh );
@ -2249,14 +2268,14 @@ class wpdb {
* Retrieves the character set for the given column. * Retrieves the character set for the given column.
* *
* @since 4.2.0 * @since 4.2.0
* @access protected * @access public
* *
* @param string $table Table name. * @param string $table Table name.
* @param string $column Column name. * @param string $column Column name.
* @return mixed Column character set as a string. False if the column has no * @return mixed Column character set as a string. False if the column has no
* character set. {@see WP_Error} object if there was an error. * character set. {@see WP_Error} object if there was an error.
*/ */
protected function get_col_charset( $table, $column ) { public function get_col_charset( $table, $column ) {
$tablekey = strtolower( $table ); $tablekey = strtolower( $table );
$columnkey = strtolower( $column ); $columnkey = strtolower( $column );
@ -2356,7 +2375,6 @@ class wpdb {
'gb2312' => 'EUC-CN', 'gb2312' => 'EUC-CN',
'ujis' => 'EUC-JP', 'ujis' => 'EUC-JP',
'utf32' => 'UTF-32', 'utf32' => 'UTF-32',
'utf8mb4' => 'UTF-8',
); );
$supported_charsets = array(); $supported_charsets = array();
@ -2391,8 +2409,8 @@ class wpdb {
} }
} }
// utf8(mb3) can be handled by regex, which is a bunch faster than a DB lookup. // utf8 can be handled by regex, which is a bunch faster than a DB lookup.
if ( 'utf8' === $charset || 'utf8mb3' === $charset ) { if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) {
$regex = '/ $regex = '/
( (
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
@ -2400,8 +2418,17 @@ class wpdb {
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [\xE1-\xEC][\x80-\xBF]{2} | [\xE1-\xEC][\x80-\xBF]{2}
| \xED[\x80-\x9F][\x80-\xBF] | \xED[\x80-\x9F][\x80-\xBF]
| [\xEE-\xEF][\x80-\xBF]{2} | [\xEE-\xEF][\x80-\xBF]{2}';
){1,50} # ...one or more times
if ( 'utf8mb4' === $charset) {
$regex .= '
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
| [\xF1-\xF3][\x80-\xBF]{3}
| \xF4[\x80-\x8F][\x80-\xBF]{2}
';
}
$regex .= '){1,50} # ...one or more times
) )
| . # anything else | . # anything else
/x'; /x';

View File

@ -130,11 +130,12 @@ class Tests_DB_Charset extends WP_UnitTestCase {
} }
/** /**
* @ ticket 21212 * @ticket 21212
*/ */
function test_process_fields_failure() { function test_process_fields_failure() {
global $wpdb; global $wpdb;
$data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" ); // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4.
$data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" );
$this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) );
} }
@ -436,6 +437,6 @@ class Tests_DB_Charset extends WP_UnitTestCase {
*/ */
function test_invalid_characters_in_query() { function test_invalid_characters_in_query() {
global $wpdb; global $wpdb;
$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x9f\x98\x88bar')" ) ); $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) );
} }
} }