From bea2bf15020dc5f76735540a284f04e25688ed83 Mon Sep 17 00:00:00 2001 From: Gary Pendergast Date: Fri, 17 Jul 2015 06:33:36 +0000 Subject: [PATCH] WPDB: Remove some of the complexities in `::strip_invalid_text()` associated with switching character sets between queries. Instead of trying to dynamically change connection character sets, we now rely on the value of `::charset`. This also fixes the case where queries were being blocked when `DB_CHARSET` was `utf8`, but the column character set was non-`utf8`. Fixes #32165. git-svn-id: https://develop.svn.wordpress.org/trunk@33308 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/wp-db.php | 50 ++++++++++-------------------- tests/phpunit/tests/db/charset.php | 41 +++++++++++++++++++++--- 2 files changed, 53 insertions(+), 38 deletions(-) diff --git a/src/wp-includes/wp-db.php b/src/wp-includes/wp-db.php index a5b17fc8ca..d6f5a6679b 100644 --- a/src/wp-includes/wp-db.php +++ b/src/wp-includes/wp-db.php @@ -2710,55 +2710,37 @@ class wpdb { $queries = array(); foreach ( $data as $col => $value ) { if ( ! empty( $value['db'] ) ) { - if ( ! isset( $queries[ $value['charset'] ] ) ) { - $queries[ $value['charset'] ] = array(); - } - // We're going to need to truncate by characters or bytes, depending on the length value we have. if ( 'byte' === $value['length']['type'] ) { - // Split the CONVERT() calls by charset, so we can make sure the connection is right - $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING binary ), %.0f ) USING {$value['charset']} )", $value['value'], $value['length']['length'] ); + // Using binary causes LEFT() to truncate by bytes. + $charset = 'binary'; } else { - $queries[ $value['charset'] ][ $col ] = $this->prepare( "LEFT( CONVERT( %s USING {$value['charset']} ), %.0f )", $value['value'], $value['length']['length'] ); + $charset = $value['charset']; } + $queries[ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING $charset ), %.0f ) USING {$this->charset} )", $value['value'], $value['length']['length'] ); + unset( $data[ $col ]['db'] ); } } - $connection_charset = $this->charset; - foreach ( $queries as $charset => $query ) { + $sql = array(); + foreach ( $queries as $column => $query ) { if ( ! $query ) { continue; } - // Change the charset to match the string(s) we're converting - if ( $charset !== $connection_charset ) { - $connection_charset = $charset; - $this->set_charset( $this->dbh, $charset ); - } - - $this->check_current_query = false; - - $sql = array(); - foreach ( $query as $column => $column_query ) { - $sql[] = $column_query . " AS x_$column"; - } - - $row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A ); - if ( ! $row ) { - $this->set_charset( $this->dbh, $connection_charset ); - return new WP_Error( 'wpdb_strip_invalid_text_failure' ); - } - - foreach ( array_keys( $query ) as $column ) { - $data[ $column ]['value'] = $row["x_$column"]; - } + $sql[] = $query . " AS x_$column"; } - // Don't forget to change the charset back! - if ( $connection_charset !== $this->charset ) { - $this->set_charset( $this->dbh ); + $this->check_current_query = false; + $row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A ); + if ( ! $row ) { + return new WP_Error( 'wpdb_strip_invalid_text_failure' ); + } + + foreach ( array_keys( $data ) as $column ) { + $data[ $column ]['value'] = $row["x_$column"]; } } diff --git a/tests/phpunit/tests/db/charset.php b/tests/phpunit/tests/db/charset.php index 04787664db..25d4d3adf4 100755 --- a/tests/phpunit/tests/db/charset.php +++ b/tests/phpunit/tests/db/charset.php @@ -244,6 +244,27 @@ class Tests_DB_Charset extends WP_UnitTestCase { 'expected' => str_repeat( "\xcc\xe3", 5 ), 'length' => array( 'type' => 'byte', 'length' => 10 ), ), + 'ujis_with_utf8_connection' => array( + 'charset' => 'ujis', + 'connection_charset' => 'utf8', + 'value' => '自動下書き', + 'expected' => '自動下書き', + 'length' => array( 'type' => 'byte', 'length' => 100 ), + ), + 'ujis_with_utf8_connection_char_length' => array( + 'charset' => 'ujis', + 'connection_charset' => 'utf8', + 'value' => '自動下書き', + 'expected' => '自動下書', + 'length' => array( 'type' => 'char', 'length' => 4 ), + ), + 'ujis_with_utf8_connection_byte_length' => array( + 'charset' => 'ujis', + 'connection_charset' => 'utf8', + 'value' => '自動下書き', + 'expected' => '自動', + 'length' => array( 'type' => 'byte', 'length' => 6 ), + ), 'false' => array( // false is a column with no character set (ie, a number column) 'charset' => false, @@ -289,7 +310,7 @@ class Tests_DB_Charset extends WP_UnitTestCase { foreach ( $fields as $test_case => $field ) { $expected = $field; $expected['value'] = $expected['expected']; - unset( $expected['expected'], $field['expected'] ); + unset( $expected['expected'], $field['expected'], $expected['connection_charset'] ); // We're keeping track of these for our multiple-field test. $multiple[] = $field; @@ -303,9 +324,6 @@ class Tests_DB_Charset extends WP_UnitTestCase { $data_provider[] = array( $data, $expected, $test_case ); } - // Time for our test of multiple fields at once. - $data_provider[] = array( $multiple, $multiple_expected, 'multiple fields/charsets' ); - return $data_provider; } @@ -318,7 +336,22 @@ class Tests_DB_Charset extends WP_UnitTestCase { $this->markTestSkipped( 'This test fails in PHP 5.2 on Windows. See https://core.trac.wordpress.org/ticket/31262' ); } + $charset = self::$_wpdb->charset; + if ( isset( $data[0]['connection_charset'] ) ) { + $new_charset = $data[0]['connection_charset']; + unset( $data[0]['connection_charset'] ); + } else { + $new_charset = $data[0]['charset']; + } + + self::$_wpdb->charset = $new_charset; + self::$_wpdb->set_charset( self::$_wpdb->dbh, $new_charset ); + $actual = self::$_wpdb->strip_invalid_text( $data ); + + self::$_wpdb->charset = $charset; + self::$_wpdb->set_charset( self::$_wpdb->dbh, $charset ); + $this->assertSame( $expected, $actual, $message ); }