WPDB: If a site is using the utf8
charset, and their version of MySQL supports utf8mb4
, auto-upgrade them to utf8mb4
.
This patch also resizes some indexes, to allow for the 767 byte index size limit in standard MySQL installs. See #21212 git-svn-id: https://develop.svn.wordpress.org/trunk@31349 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
parent
43789f822a
commit
9f6ddf8944
@ -44,6 +44,13 @@ function wp_get_db_schema( $scope = 'all', $blog_id = null ) {
|
||||
// Engage multisite if in the middle of turning it on from network.php.
|
||||
$is_multisite = is_multisite() || ( defined( 'WP_INSTALLING_NETWORK' ) && WP_INSTALLING_NETWORK );
|
||||
|
||||
/*
|
||||
* Indexes have a maximum size of 767 bytes. Historically, we haven't need to be concerned about that.
|
||||
* As of 4.2, however, we moved to utf8mb4, which uses 4 bytes per character. This means that an index which
|
||||
* used to have room for floor(767/3) = 255 characters, now only has room for floor(767/4) = 191 characters.
|
||||
*/
|
||||
$max_index_length = 191;
|
||||
|
||||
// Blog specific tables.
|
||||
$blog_tables = "CREATE TABLE $wpdb->terms (
|
||||
term_id bigint(20) unsigned NOT NULL auto_increment,
|
||||
@ -51,8 +58,8 @@ function wp_get_db_schema( $scope = 'all', $blog_id = null ) {
|
||||
slug varchar(200) NOT NULL default '',
|
||||
term_group bigint(10) NOT NULL default 0,
|
||||
PRIMARY KEY (term_id),
|
||||
KEY slug (slug),
|
||||
KEY name (name)
|
||||
KEY slug (slug($max_index_length)),
|
||||
KEY name (name($max_index_length))
|
||||
) $charset_collate;
|
||||
CREATE TABLE $wpdb->term_taxonomy (
|
||||
term_taxonomy_id bigint(20) unsigned NOT NULL auto_increment,
|
||||
@ -79,7 +86,7 @@ CREATE TABLE $wpdb->commentmeta (
|
||||
meta_value longtext,
|
||||
PRIMARY KEY (meta_id),
|
||||
KEY comment_id (comment_id),
|
||||
KEY meta_key (meta_key)
|
||||
KEY meta_key (meta_key($max_index_length))
|
||||
) $charset_collate;
|
||||
CREATE TABLE $wpdb->comments (
|
||||
comment_ID bigint(20) unsigned NOT NULL auto_increment,
|
||||
@ -136,7 +143,7 @@ CREATE TABLE $wpdb->postmeta (
|
||||
meta_value longtext,
|
||||
PRIMARY KEY (meta_id),
|
||||
KEY post_id (post_id),
|
||||
KEY meta_key (meta_key)
|
||||
KEY meta_key (meta_key($max_index_length))
|
||||
) $charset_collate;
|
||||
CREATE TABLE $wpdb->posts (
|
||||
ID bigint(20) unsigned NOT NULL auto_increment,
|
||||
@ -163,7 +170,7 @@ CREATE TABLE $wpdb->posts (
|
||||
post_mime_type varchar(100) NOT NULL default '',
|
||||
comment_count bigint(20) NOT NULL default '0',
|
||||
PRIMARY KEY (ID),
|
||||
KEY post_name (post_name),
|
||||
KEY post_name (post_name($max_index_length)),
|
||||
KEY type_status_date (post_type,post_status,post_date,ID),
|
||||
KEY post_parent (post_parent),
|
||||
KEY post_author (post_author)
|
||||
@ -213,7 +220,7 @@ CREATE TABLE $wpdb->posts (
|
||||
meta_value longtext,
|
||||
PRIMARY KEY (umeta_id),
|
||||
KEY user_id (user_id),
|
||||
KEY meta_key (meta_key)
|
||||
KEY meta_key (meta_key($max_index_length))
|
||||
) $charset_collate;\n";
|
||||
|
||||
// Global tables
|
||||
@ -261,7 +268,7 @@ CREATE TABLE $wpdb->site (
|
||||
domain varchar(200) NOT NULL default '',
|
||||
path varchar(100) NOT NULL default '',
|
||||
PRIMARY KEY (id),
|
||||
KEY domain (domain,path)
|
||||
KEY domain (domain(140),path(51))
|
||||
) $charset_collate;
|
||||
CREATE TABLE $wpdb->sitemeta (
|
||||
meta_id bigint(20) NOT NULL auto_increment,
|
||||
@ -269,7 +276,7 @@ CREATE TABLE $wpdb->sitemeta (
|
||||
meta_key varchar(255) default NULL,
|
||||
meta_value longtext,
|
||||
PRIMARY KEY (meta_id),
|
||||
KEY meta_key (meta_key),
|
||||
KEY meta_key (meta_key($max_index_length)),
|
||||
KEY site_id (site_id)
|
||||
) $charset_collate;
|
||||
CREATE TABLE $wpdb->signups (
|
||||
@ -288,7 +295,7 @@ CREATE TABLE $wpdb->signups (
|
||||
KEY activation_key (activation_key),
|
||||
KEY user_email (user_email),
|
||||
KEY user_login_email (user_login,user_email),
|
||||
KEY domain_path (domain,path)
|
||||
KEY domain_path (domain(140),path(51))
|
||||
) $charset_collate;";
|
||||
|
||||
switch ( $scope ) {
|
||||
|
@ -519,6 +519,9 @@ function upgrade_all() {
|
||||
if ( $wp_current_db_version < 29630 )
|
||||
upgrade_400();
|
||||
|
||||
if ( $wp_current_db_version < 31349 )
|
||||
upgrade_420();
|
||||
|
||||
maybe_disable_link_manager();
|
||||
|
||||
maybe_disable_automattic_widgets();
|
||||
@ -1406,6 +1409,27 @@ function upgrade_400() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute changes made in WordPress 4.2.0.
|
||||
*
|
||||
* @since 4.2.0
|
||||
*/
|
||||
function upgrade_420() {
|
||||
global $wp_current_db_version, $wpdb;
|
||||
|
||||
if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) {
|
||||
if ( is_multisite() ) {
|
||||
$tables = $wpdb->tables( 'blog' );
|
||||
} else {
|
||||
$tables = $wpdb->tables( 'all' );
|
||||
}
|
||||
|
||||
foreach ( $tables as $table ) {
|
||||
maybe_convert_table_to_utf8mb4( $table );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes network-level upgrade routines.
|
||||
*
|
||||
@ -1502,6 +1526,21 @@ function upgrade_network() {
|
||||
update_site_option( 'illegal_names', $illegal_names );
|
||||
}
|
||||
}
|
||||
|
||||
// 4.2
|
||||
if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) {
|
||||
if ( ! ( defined( 'DO_NOT_UPGRADE_GLOBAL_TABLES' ) && DO_NOT_UPGRADE_GLOBAL_TABLES ) ) {
|
||||
$wpdb->query( "ALTER TABLE $wpdb->site DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" );
|
||||
$wpdb->query( "ALTER TABLE $wpdb->sitemeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
|
||||
$wpdb->query( "ALTER TABLE $wpdb->signups DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" );
|
||||
|
||||
$tables = $wpdb->tables( 'global' );
|
||||
|
||||
foreach ( $tables as $table ) {
|
||||
maybe_convert_table_to_utf8mb4( $table );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
@ -1607,6 +1646,42 @@ function maybe_add_column($table_name, $column_name, $create_ddl) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* If a table only contains utf8 or utf8mb4 columns, convert it to utf8mb4.
|
||||
*
|
||||
* @since 4.2.0
|
||||
*
|
||||
* @param string $table The table to convert.
|
||||
* @return bool true if the table was converted, false if it wasn't.
|
||||
*/
|
||||
function maybe_convert_table_to_utf8mb4( $table ) {
|
||||
global $wpdb;
|
||||
|
||||
$results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `$table`" );
|
||||
if ( ! $results ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$has_utf8 = false;
|
||||
foreach ( $results as $column ) {
|
||||
if ( $column->Collation ) {
|
||||
if ( 'utf8' === $column->Collation ) {
|
||||
$has_utf8 = true;
|
||||
} elseif ( 'utf8mb4' !== $column->Collation ) {
|
||||
// Don't upgrade tables that have non-utf8 columns.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! $has_utf8 ) {
|
||||
// Don't bother upgrading tables that don't have utf8 columns.
|
||||
return false;
|
||||
}
|
||||
|
||||
return $wpdb->query( "ALTER TABLE $table CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve all options as it was for 1.2.
|
||||
*
|
||||
@ -2284,6 +2359,17 @@ function pre_schema_upgrade() {
|
||||
// dbDelta() can recreate but can't drop the index.
|
||||
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" );
|
||||
}
|
||||
|
||||
// Upgrade versions prior to 4.2.
|
||||
if ( $wp_current_db_version < 31349 ) {
|
||||
// So that we can change tables to utf8mb4, we need to shorten the index lengths to less than 767 bytes
|
||||
$wpdb->query( "ALTER TABLE $wpdb->usermeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
|
||||
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug, ADD INDEX slug(slug(191))" );
|
||||
$wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX name, ADD INDEX name(name(191))" );
|
||||
$wpdb->query( "ALTER TABLE $wpdb->commentmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
|
||||
$wpdb->query( "ALTER TABLE $wpdb->postmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" );
|
||||
$wpdb->query( "ALTER TABLE $wpdb->posts DROP INDEX post_name, ADD INDEX post_name(post_name(191))" );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -280,6 +280,11 @@ switch($step) {
|
||||
case 'DB_HOST' :
|
||||
$config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n";
|
||||
break;
|
||||
case 'DB_CHARSET' :
|
||||
if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) {
|
||||
$config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n";
|
||||
}
|
||||
break;
|
||||
case 'AUTH_KEY' :
|
||||
case 'SECURE_AUTH_KEY' :
|
||||
case 'LOGGED_IN_KEY' :
|
||||
|
@ -11,7 +11,7 @@ $wp_version = '4.2-alpha-31007-src';
|
||||
*
|
||||
* @global int $wp_db_version
|
||||
*/
|
||||
$wp_db_version = 30133;
|
||||
$wp_db_version = 31349;
|
||||
|
||||
/**
|
||||
* Holds the TinyMCE version
|
||||
|
@ -624,8 +624,6 @@ class wpdb {
|
||||
}
|
||||
}
|
||||
|
||||
$this->init_charset();
|
||||
|
||||
$this->dbuser = $dbuser;
|
||||
$this->dbpassword = $dbpassword;
|
||||
$this->dbname = $dbname;
|
||||
@ -717,18 +715,33 @@ class wpdb {
|
||||
public function init_charset() {
|
||||
if ( function_exists('is_multisite') && is_multisite() ) {
|
||||
$this->charset = 'utf8';
|
||||
if ( defined( 'DB_COLLATE' ) && DB_COLLATE )
|
||||
if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) {
|
||||
$this->collate = DB_COLLATE;
|
||||
else
|
||||
} else {
|
||||
$this->collate = 'utf8_general_ci';
|
||||
}
|
||||
} elseif ( defined( 'DB_COLLATE' ) ) {
|
||||
$this->collate = DB_COLLATE;
|
||||
}
|
||||
|
||||
if ( defined( 'DB_CHARSET' ) )
|
||||
if ( defined( 'DB_CHARSET' ) ) {
|
||||
$this->charset = DB_CHARSET;
|
||||
}
|
||||
|
||||
if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) )
|
||||
|| ( empty( $this->dbh ) || ! ( $this->dbh instanceof mysqli ) ) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) {
|
||||
$this->charset = 'utf8mb4';
|
||||
}
|
||||
|
||||
if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) {
|
||||
$this->collate = 'utf8mb4_unicode_ci';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the connection's character set.
|
||||
*
|
||||
@ -1476,8 +1489,14 @@ class wpdb {
|
||||
|
||||
return false;
|
||||
} elseif ( $this->dbh ) {
|
||||
if ( ! $this->has_connected ) {
|
||||
$this->init_charset();
|
||||
}
|
||||
|
||||
$this->has_connected = true;
|
||||
|
||||
$this->set_charset( $this->dbh );
|
||||
|
||||
$this->ready = true;
|
||||
$this->set_sql_mode();
|
||||
$this->select( $this->dbname, $this->dbh );
|
||||
@ -2249,14 +2268,14 @@ class wpdb {
|
||||
* Retrieves the character set for the given column.
|
||||
*
|
||||
* @since 4.2.0
|
||||
* @access protected
|
||||
* @access public
|
||||
*
|
||||
* @param string $table Table name.
|
||||
* @param string $column Column name.
|
||||
* @return mixed Column character set as a string. False if the column has no
|
||||
* character set. {@see WP_Error} object if there was an error.
|
||||
*/
|
||||
protected function get_col_charset( $table, $column ) {
|
||||
public function get_col_charset( $table, $column ) {
|
||||
$tablekey = strtolower( $table );
|
||||
$columnkey = strtolower( $column );
|
||||
|
||||
@ -2356,7 +2375,6 @@ class wpdb {
|
||||
'gb2312' => 'EUC-CN',
|
||||
'ujis' => 'EUC-JP',
|
||||
'utf32' => 'UTF-32',
|
||||
'utf8mb4' => 'UTF-8',
|
||||
);
|
||||
|
||||
$supported_charsets = array();
|
||||
@ -2391,8 +2409,8 @@ class wpdb {
|
||||
}
|
||||
}
|
||||
|
||||
// utf8(mb3) can be handled by regex, which is a bunch faster than a DB lookup.
|
||||
if ( 'utf8' === $charset || 'utf8mb3' === $charset ) {
|
||||
// utf8 can be handled by regex, which is a bunch faster than a DB lookup.
|
||||
if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) {
|
||||
$regex = '/
|
||||
(
|
||||
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
|
||||
@ -2400,8 +2418,17 @@ class wpdb {
|
||||
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
|
||||
| [\xE1-\xEC][\x80-\xBF]{2}
|
||||
| \xED[\x80-\x9F][\x80-\xBF]
|
||||
| [\xEE-\xEF][\x80-\xBF]{2}
|
||||
){1,50} # ...one or more times
|
||||
| [\xEE-\xEF][\x80-\xBF]{2}';
|
||||
|
||||
if ( 'utf8mb4' === $charset) {
|
||||
$regex .= '
|
||||
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
|
||||
| [\xF1-\xF3][\x80-\xBF]{3}
|
||||
| \xF4[\x80-\x8F][\x80-\xBF]{2}
|
||||
';
|
||||
}
|
||||
|
||||
$regex .= '){1,50} # ...one or more times
|
||||
)
|
||||
| . # anything else
|
||||
/x';
|
||||
|
@ -134,7 +134,8 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
*/
|
||||
function test_process_fields_failure() {
|
||||
global $wpdb;
|
||||
$data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" );
|
||||
// \xf0\xff\xff\xff is invalid in utf8 and utf8mb4.
|
||||
$data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" );
|
||||
$this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) );
|
||||
}
|
||||
|
||||
@ -436,6 +437,6 @@ class Tests_DB_Charset extends WP_UnitTestCase {
|
||||
*/
|
||||
function test_invalid_characters_in_query() {
|
||||
global $wpdb;
|
||||
$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x9f\x98\x88bar')" ) );
|
||||
$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) );
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user