From 9f6ddf8944ec18861fea3bbd4ede48261d44e08d Mon Sep 17 00:00:00 2001 From: Gary Pendergast Date: Fri, 6 Feb 2015 04:50:19 +0000 Subject: [PATCH] WPDB: If a site is using the `utf8` charset, and their version of MySQL supports `utf8mb4`, auto-upgrade them to `utf8mb4`. This patch also resizes some indexes, to allow for the 767 byte index size limit in standard MySQL installs. See #21212 git-svn-id: https://develop.svn.wordpress.org/trunk@31349 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-admin/includes/schema.php | 25 +++++---- src/wp-admin/includes/upgrade.php | 86 ++++++++++++++++++++++++++++++ src/wp-admin/setup-config.php | 5 ++ src/wp-includes/version.php | 2 +- src/wp-includes/wp-db.php | 51 +++++++++++++----- tests/phpunit/tests/db/charset.php | 7 +-- 6 files changed, 151 insertions(+), 25 deletions(-) diff --git a/src/wp-admin/includes/schema.php b/src/wp-admin/includes/schema.php index 296a6992e1..020854b452 100644 --- a/src/wp-admin/includes/schema.php +++ b/src/wp-admin/includes/schema.php @@ -44,6 +44,13 @@ function wp_get_db_schema( $scope = 'all', $blog_id = null ) { // Engage multisite if in the middle of turning it on from network.php. $is_multisite = is_multisite() || ( defined( 'WP_INSTALLING_NETWORK' ) && WP_INSTALLING_NETWORK ); + /* + * Indexes have a maximum size of 767 bytes. Historically, we haven't need to be concerned about that. + * As of 4.2, however, we moved to utf8mb4, which uses 4 bytes per character. This means that an index which + * used to have room for floor(767/3) = 255 characters, now only has room for floor(767/4) = 191 characters. + */ + $max_index_length = 191; + // Blog specific tables. $blog_tables = "CREATE TABLE $wpdb->terms ( term_id bigint(20) unsigned NOT NULL auto_increment, @@ -51,8 +58,8 @@ function wp_get_db_schema( $scope = 'all', $blog_id = null ) { slug varchar(200) NOT NULL default '', term_group bigint(10) NOT NULL default 0, PRIMARY KEY (term_id), - KEY slug (slug), - KEY name (name) + KEY slug (slug($max_index_length)), + KEY name (name($max_index_length)) ) $charset_collate; CREATE TABLE $wpdb->term_taxonomy ( term_taxonomy_id bigint(20) unsigned NOT NULL auto_increment, @@ -79,7 +86,7 @@ CREATE TABLE $wpdb->commentmeta ( meta_value longtext, PRIMARY KEY (meta_id), KEY comment_id (comment_id), - KEY meta_key (meta_key) + KEY meta_key (meta_key($max_index_length)) ) $charset_collate; CREATE TABLE $wpdb->comments ( comment_ID bigint(20) unsigned NOT NULL auto_increment, @@ -136,7 +143,7 @@ CREATE TABLE $wpdb->postmeta ( meta_value longtext, PRIMARY KEY (meta_id), KEY post_id (post_id), - KEY meta_key (meta_key) + KEY meta_key (meta_key($max_index_length)) ) $charset_collate; CREATE TABLE $wpdb->posts ( ID bigint(20) unsigned NOT NULL auto_increment, @@ -163,7 +170,7 @@ CREATE TABLE $wpdb->posts ( post_mime_type varchar(100) NOT NULL default '', comment_count bigint(20) NOT NULL default '0', PRIMARY KEY (ID), - KEY post_name (post_name), + KEY post_name (post_name($max_index_length)), KEY type_status_date (post_type,post_status,post_date,ID), KEY post_parent (post_parent), KEY post_author (post_author) @@ -213,7 +220,7 @@ CREATE TABLE $wpdb->posts ( meta_value longtext, PRIMARY KEY (umeta_id), KEY user_id (user_id), - KEY meta_key (meta_key) + KEY meta_key (meta_key($max_index_length)) ) $charset_collate;\n"; // Global tables @@ -261,7 +268,7 @@ CREATE TABLE $wpdb->site ( domain varchar(200) NOT NULL default '', path varchar(100) NOT NULL default '', PRIMARY KEY (id), - KEY domain (domain,path) + KEY domain (domain(140),path(51)) ) $charset_collate; CREATE TABLE $wpdb->sitemeta ( meta_id bigint(20) NOT NULL auto_increment, @@ -269,7 +276,7 @@ CREATE TABLE $wpdb->sitemeta ( meta_key varchar(255) default NULL, meta_value longtext, PRIMARY KEY (meta_id), - KEY meta_key (meta_key), + KEY meta_key (meta_key($max_index_length)), KEY site_id (site_id) ) $charset_collate; CREATE TABLE $wpdb->signups ( @@ -288,7 +295,7 @@ CREATE TABLE $wpdb->signups ( KEY activation_key (activation_key), KEY user_email (user_email), KEY user_login_email (user_login,user_email), - KEY domain_path (domain,path) + KEY domain_path (domain(140),path(51)) ) $charset_collate;"; switch ( $scope ) { diff --git a/src/wp-admin/includes/upgrade.php b/src/wp-admin/includes/upgrade.php index c2e6cd21ea..5df932cebf 100644 --- a/src/wp-admin/includes/upgrade.php +++ b/src/wp-admin/includes/upgrade.php @@ -519,6 +519,9 @@ function upgrade_all() { if ( $wp_current_db_version < 29630 ) upgrade_400(); + if ( $wp_current_db_version < 31349 ) + upgrade_420(); + maybe_disable_link_manager(); maybe_disable_automattic_widgets(); @@ -1406,6 +1409,27 @@ function upgrade_400() { } } +/** + * Execute changes made in WordPress 4.2.0. + * + * @since 4.2.0 + */ +function upgrade_420() { + global $wp_current_db_version, $wpdb; + + if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) { + if ( is_multisite() ) { + $tables = $wpdb->tables( 'blog' ); + } else { + $tables = $wpdb->tables( 'all' ); + } + + foreach ( $tables as $table ) { + maybe_convert_table_to_utf8mb4( $table ); + } + } +} + /** * Executes network-level upgrade routines. * @@ -1502,6 +1526,21 @@ function upgrade_network() { update_site_option( 'illegal_names', $illegal_names ); } } + + // 4.2 + if ( $wp_current_db_version < 31349 && $wpdb->charset === 'utf8mb4' ) { + if ( ! ( defined( 'DO_NOT_UPGRADE_GLOBAL_TABLES' ) && DO_NOT_UPGRADE_GLOBAL_TABLES ) ) { + $wpdb->query( "ALTER TABLE $wpdb->site DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" ); + $wpdb->query( "ALTER TABLE $wpdb->sitemeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); + $wpdb->query( "ALTER TABLE $wpdb->signups DROP INDEX domain, ADD INDEX domain(domain(140),path(51))" ); + + $tables = $wpdb->tables( 'global' ); + + foreach ( $tables as $table ) { + maybe_convert_table_to_utf8mb4( $table ); + } + } + } } // @@ -1607,6 +1646,42 @@ function maybe_add_column($table_name, $column_name, $create_ddl) { return false; } +/** + * If a table only contains utf8 or utf8mb4 columns, convert it to utf8mb4. + * + * @since 4.2.0 + * + * @param string $table The table to convert. + * @return bool true if the table was converted, false if it wasn't. + */ +function maybe_convert_table_to_utf8mb4( $table ) { + global $wpdb; + + $results = $wpdb->get_results( "SHOW FULL COLUMNS FROM `$table`" ); + if ( ! $results ) { + return false; + } + + $has_utf8 = false; + foreach ( $results as $column ) { + if ( $column->Collation ) { + if ( 'utf8' === $column->Collation ) { + $has_utf8 = true; + } elseif ( 'utf8mb4' !== $column->Collation ) { + // Don't upgrade tables that have non-utf8 columns. + return false; + } + } + } + + if ( ! $has_utf8 ) { + // Don't bother upgrading tables that don't have utf8 columns. + return false; + } + + return $wpdb->query( "ALTER TABLE $table CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci" ); +} + /** * Retrieve all options as it was for 1.2. * @@ -2284,6 +2359,17 @@ function pre_schema_upgrade() { // dbDelta() can recreate but can't drop the index. $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug" ); } + + // Upgrade versions prior to 4.2. + if ( $wp_current_db_version < 31349 ) { + // So that we can change tables to utf8mb4, we need to shorten the index lengths to less than 767 bytes + $wpdb->query( "ALTER TABLE $wpdb->usermeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); + $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX slug, ADD INDEX slug(slug(191))" ); + $wpdb->query( "ALTER TABLE $wpdb->terms DROP INDEX name, ADD INDEX name(name(191))" ); + $wpdb->query( "ALTER TABLE $wpdb->commentmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); + $wpdb->query( "ALTER TABLE $wpdb->postmeta DROP INDEX meta_key, ADD INDEX meta_key(meta_key(191))" ); + $wpdb->query( "ALTER TABLE $wpdb->posts DROP INDEX post_name, ADD INDEX post_name(post_name(191))" ); + } } /** diff --git a/src/wp-admin/setup-config.php b/src/wp-admin/setup-config.php index 5f71f53e67..6b37ce8ba8 100644 --- a/src/wp-admin/setup-config.php +++ b/src/wp-admin/setup-config.php @@ -280,6 +280,11 @@ switch($step) { case 'DB_HOST' : $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'" . addcslashes( constant( $constant ), "\\'" ) . "');\r\n"; break; + case 'DB_CHARSET' : + if ( 'utf8mb4' === $wpdb->charset || ( ! $wpdb->charset && $wpdb->has_cap( 'utf8mb4' ) ) ) { + $config_file[ $line_num ] = "define('" . $constant . "'," . $padding . "'utf8mb4');\r\n"; + } + break; case 'AUTH_KEY' : case 'SECURE_AUTH_KEY' : case 'LOGGED_IN_KEY' : diff --git a/src/wp-includes/version.php b/src/wp-includes/version.php index 03c6874ca7..3633ac5089 100644 --- a/src/wp-includes/version.php +++ b/src/wp-includes/version.php @@ -11,7 +11,7 @@ $wp_version = '4.2-alpha-31007-src'; * * @global int $wp_db_version */ -$wp_db_version = 30133; +$wp_db_version = 31349; /** * Holds the TinyMCE version diff --git a/src/wp-includes/wp-db.php b/src/wp-includes/wp-db.php index 6eed9e8f78..6f60e9c31a 100644 --- a/src/wp-includes/wp-db.php +++ b/src/wp-includes/wp-db.php @@ -624,8 +624,6 @@ class wpdb { } } - $this->init_charset(); - $this->dbuser = $dbuser; $this->dbpassword = $dbpassword; $this->dbname = $dbname; @@ -717,16 +715,31 @@ class wpdb { public function init_charset() { if ( function_exists('is_multisite') && is_multisite() ) { $this->charset = 'utf8'; - if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) + if ( defined( 'DB_COLLATE' ) && DB_COLLATE ) { $this->collate = DB_COLLATE; - else + } else { $this->collate = 'utf8_general_ci'; + } } elseif ( defined( 'DB_COLLATE' ) ) { $this->collate = DB_COLLATE; } - if ( defined( 'DB_CHARSET' ) ) + if ( defined( 'DB_CHARSET' ) ) { $this->charset = DB_CHARSET; + } + + if ( ( $this->use_mysqli && ! ( $this->dbh instanceof mysqli ) ) + || ( empty( $this->dbh ) || ! ( $this->dbh instanceof mysqli ) ) ) { + return; + } + + if ( 'utf8' === $this->charset && $this->has_cap( 'utf8mb4' ) ) { + $this->charset = 'utf8mb4'; + } + + if ( 'utf8mb4' === $this->charset && ( ! $this->collate || stripos( $this->collate, 'utf8_' ) === 0 ) ) { + $this->collate = 'utf8mb4_unicode_ci'; + } } /** @@ -1476,8 +1489,14 @@ class wpdb { return false; } elseif ( $this->dbh ) { + if ( ! $this->has_connected ) { + $this->init_charset(); + } + $this->has_connected = true; + $this->set_charset( $this->dbh ); + $this->ready = true; $this->set_sql_mode(); $this->select( $this->dbname, $this->dbh ); @@ -2249,14 +2268,14 @@ class wpdb { * Retrieves the character set for the given column. * * @since 4.2.0 - * @access protected + * @access public * * @param string $table Table name. * @param string $column Column name. * @return mixed Column character set as a string. False if the column has no * character set. {@see WP_Error} object if there was an error. */ - protected function get_col_charset( $table, $column ) { + public function get_col_charset( $table, $column ) { $tablekey = strtolower( $table ); $columnkey = strtolower( $column ); @@ -2356,7 +2375,6 @@ class wpdb { 'gb2312' => 'EUC-CN', 'ujis' => 'EUC-JP', 'utf32' => 'UTF-32', - 'utf8mb4' => 'UTF-8', ); $supported_charsets = array(); @@ -2391,8 +2409,8 @@ class wpdb { } } - // utf8(mb3) can be handled by regex, which is a bunch faster than a DB lookup. - if ( 'utf8' === $charset || 'utf8mb3' === $charset ) { + // utf8 can be handled by regex, which is a bunch faster than a DB lookup. + if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) { $regex = '/ ( (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx @@ -2400,8 +2418,17 @@ class wpdb { | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 | [\xE1-\xEC][\x80-\xBF]{2} | \xED[\x80-\x9F][\x80-\xBF] - | [\xEE-\xEF][\x80-\xBF]{2} - ){1,50} # ...one or more times + | [\xEE-\xEF][\x80-\xBF]{2}'; + + if ( 'utf8mb4' === $charset) { + $regex .= ' + | \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3 + | [\xF1-\xF3][\x80-\xBF]{3} + | \xF4[\x80-\x8F][\x80-\xBF]{2} + '; + } + + $regex .= '){1,50} # ...one or more times ) | . # anything else /x'; diff --git a/tests/phpunit/tests/db/charset.php b/tests/phpunit/tests/db/charset.php index 5d32e24ae5..b221d1cb11 100755 --- a/tests/phpunit/tests/db/charset.php +++ b/tests/phpunit/tests/db/charset.php @@ -130,11 +130,12 @@ class Tests_DB_Charset extends WP_UnitTestCase { } /** - * @ ticket 21212 + * @ticket 21212 */ function test_process_fields_failure() { global $wpdb; - $data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" ); + // \xf0\xff\xff\xff is invalid in utf8 and utf8mb4. + $data = array( 'post_content' => "H€llo\xf0\xff\xff\xffWorld¢" ); $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); } @@ -436,6 +437,6 @@ class Tests_DB_Charset extends WP_UnitTestCase { */ function test_invalid_characters_in_query() { global $wpdb; - $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x9f\x98\x88bar')" ) ); + $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\xff\xff\xffbar')" ) ); } }