When creating a post containing emoji, the post can be saved incorrectly if MySQL isn't using a character set that supports emoji.

This change prevents the save from occurring, so it can be handled correctly in the UI.

See #21212.


git-svn-id: https://develop.svn.wordpress.org/trunk@30345 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Gary Pendergast 2014-11-14 21:27:17 +00:00
parent ab96cf6614
commit 6df14c1612
3 changed files with 1394 additions and 64 deletions

View File

@ -143,6 +143,33 @@ class wpdb {
*/
protected $result;
/**
* Cached column info, for sanity checking data before inserting
*
* @since 4.1.0
* @access protected
* @var array
*/
protected $col_meta = array();
/**
* Calculated character sets on tables
*
* @since 4.1.0
* @access protected
* @var array
*/
protected $table_charset = array();
/**
* Whether text fields in the current query need to be sanity checked.
*
* @since 4.1.0
* @access protected
* @var bool
*/
protected $check_current_query = true;
/**
* Saved info on the table column
*
@ -647,6 +674,14 @@ class wpdb {
* @param mixed $value The value to set
*/
public function __set( $name, $value ) {
$protected_members = array(
'col_meta',
'table_charset',
'check_current_query',
);
if ( in_array( $name, $protected_members, true ) ) {
return;
}
$this->$name = $value;
}
@ -1544,8 +1579,10 @@ class wpdb {
* @return int|false Number of rows affected/selected or false on error
*/
public function query( $query ) {
if ( ! $this->ready )
if ( ! $this->ready ) {
$this->check_current_query = true;
return false;
}
/**
* Filter the database query.
@ -1564,6 +1601,17 @@ class wpdb {
// Log how the function was called
$this->func_call = "\$db->query(\"$query\")";
// If we're writing to the database, make sure the query will write safely.
if ( $this->check_current_query && ! $this->check_ascii( $query ) ) {
$stripped_query = $this->strip_invalid_text_from_query( $query );
if ( $stripped_query !== $query ) {
$this->insert_id = 0;
return false;
}
}
$this->check_current_query = true;
// Keep track of the last query for debug..
$this->last_query = $query;
@ -1737,23 +1785,29 @@ class wpdb {
* @return int|false The number of rows affected, or false on error.
*/
function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) {
if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) )
if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) {
return false;
$this->insert_id = 0;
$formats = $format = (array) $format;
$fields = array_keys( $data );
$formatted_fields = array();
foreach ( $fields as $field ) {
if ( !empty( $format ) )
$form = ( $form = array_shift( $formats ) ) ? $form : $format[0];
elseif ( isset( $this->field_types[$field] ) )
$form = $this->field_types[$field];
else
$form = '%s';
$formatted_fields[] = $form;
}
$sql = "{$type} INTO `$table` (`" . implode( '`,`', $fields ) . "`) VALUES (" . implode( ",", $formatted_fields ) . ")";
return $this->query( $this->prepare( $sql, $data ) );
$data = $this->process_fields( $table, $data, $format );
if ( false === $data ) {
return false;
}
$formats = $values = array();
foreach ( $data as $value ) {
$formats[] = $value['format'];
$values[] = $value['value'];
}
$fields = '`' . implode( '`, `', array_keys( $data ) ) . '`';
$formats = implode( ', ', $formats );
$sql = "$type INTO `$table` ($fields) VALUES ($formats)";
$this->insert_id = 0;
$this->check_current_query = false;
return $this->query( $this->prepare( $sql, $values ) );
}
/**
@ -1778,34 +1832,36 @@ class wpdb {
* @return int|false The number of rows updated, or false on error.
*/
public function update( $table, $data, $where, $format = null, $where_format = null ) {
if ( ! is_array( $data ) || ! is_array( $where ) )
if ( ! is_array( $data ) || ! is_array( $where ) ) {
return false;
$formats = $format = (array) $format;
$bits = $wheres = array();
foreach ( (array) array_keys( $data ) as $field ) {
if ( !empty( $format ) )
$form = ( $form = array_shift( $formats ) ) ? $form : $format[0];
elseif ( isset($this->field_types[$field]) )
$form = $this->field_types[$field];
else
$form = '%s';
$bits[] = "`$field` = {$form}";
}
$where_formats = $where_format = (array) $where_format;
foreach ( (array) array_keys( $where ) as $field ) {
if ( !empty( $where_format ) )
$form = ( $form = array_shift( $where_formats ) ) ? $form : $where_format[0];
elseif ( isset( $this->field_types[$field] ) )
$form = $this->field_types[$field];
else
$form = '%s';
$wheres[] = "`$field` = {$form}";
$data = $this->process_fields( $table, $data, $format );
if ( false === $data ) {
return false;
}
$where = $this->process_fields( $table, $where, $where_format );
if ( false === $where ) {
return false;
}
$sql = "UPDATE `$table` SET " . implode( ', ', $bits ) . ' WHERE ' . implode( ' AND ', $wheres );
return $this->query( $this->prepare( $sql, array_merge( array_values( $data ), array_values( $where ) ) ) );
$fields = $conditions = $values = array();
foreach ( $data as $field => $value ) {
$fields[] = "`$field` = " . $value['format'];
$values[] = $value['value'];
}
foreach ( $where as $field => $value ) {
$conditions[] = "`$field` = " . $value['format'];
$values[] = $value['value'];
}
$fields = implode( ', ', $fields );
$conditions = implode( ' AND ', $conditions );
$sql = "UPDATE `$table` SET $fields WHERE $conditions";
$this->check_current_query = false;
return $this->query( $this->prepare( $sql, $values ) );
}
/**
@ -1827,29 +1883,133 @@ class wpdb {
* @return int|false The number of rows updated, or false on error.
*/
public function delete( $table, $where, $where_format = null ) {
if ( ! is_array( $where ) )
if ( ! is_array( $where ) ) {
return false;
$wheres = array();
$where_formats = $where_format = (array) $where_format;
foreach ( array_keys( $where ) as $field ) {
if ( !empty( $where_format ) ) {
$form = ( $form = array_shift( $where_formats ) ) ? $form : $where_format[0];
} elseif ( isset( $this->field_types[ $field ] ) ) {
$form = $this->field_types[ $field ];
} else {
$form = '%s';
}
$wheres[] = "$field = $form";
}
$sql = "DELETE FROM $table WHERE " . implode( ' AND ', $wheres );
return $this->query( $this->prepare( $sql, $where ) );
$where = $this->process_fields( $table, $where, $where_format );
if ( false === $where ) {
return false;
}
$conditions = $values = array();
foreach ( $where as $field => $value ) {
$conditions[] = "`$field` = " . $value['format'];
$values[] = $value['value'];
}
$conditions = implode( ' AND ', $conditions );
$sql = "DELETE FROM `$table` WHERE $conditions";
$this->check_current_query = false;
return $this->query( $this->prepare( $sql, $values ) );
}
/**
* Processes arrays of field/value pairs and field formats.
*
* This is a helper method for wpdb's CRUD methods, which take field/value
* pairs for inserts, updates, and where clauses. This method first pairs
* each value with a format. Then it determines the charset of that field,
* using that to determine if any invalid text would be stripped. If text is
* stripped, then field processing is rejected and the query fails.
*
* @since 4.1.0
* @access protected
*
* @param string $table Table name.
* @param array $data Field/value pair.
* @param mixed $format Format for each field.
* @return array|bool Returns an array of fields that contain paired values
* and formats. Returns false for invalid values.
*/
protected function process_fields( $table, $data, $format ) {
$data = $this->process_field_formats( $data, $format );
$data = $this->process_field_charsets( $data, $table );
if ( false === $data ) {
return false;
}
$converted_data = $this->strip_invalid_text( $data );
if ( $data !== $converted_data ) {
return false;
}
return $data;
}
/**
* Prepares arrays of value/format pairs as passed to wpdb CRUD methods.
*
* @since 4.1.0
* @access protected
*
* @param array $data Array of fields to values.
* @param mixed $format Formats to be mapped to the values in $data.
* @return array Array, keyed by field names with values being an array
* of 'value' and 'format' keys.
*/
protected function process_field_formats( $data, $format ) {
$formats = $original_formats = (array) $format;
foreach ( $data as $field => $value ) {
$value = array(
'value' => $value,
'format' => '%s',
);
if ( ! empty( $format ) ) {
$value['format'] = array_shift( $formats );
if ( ! $value['format'] ) {
$value['format'] = reset( $original_formats );
}
} elseif ( isset( $this->field_types[ $field ] ) ) {
$value['format'] = $this->field_types[ $field ];
}
$data[ $field ] = $value;
}
return $data;
}
/**
* Adds field charsets to field/value/format arrays
* generated by the process_field_formats() method.
*
* @since 4.1.0
* @access protected
*
* @param array $data As it comes from the process_field_formats() method.
* @param string $table Table name.
* @return The same array as $data with additional 'charset' keys.
*/
protected function process_field_charsets( $data, $table ) {
foreach ( $data as $field => $value ) {
if ( '%d' === $value['format'] || '%f' === $value['format'] ) {
// We can skip this field if we know it isn't a string.
// This checks %d/%f versus ! %s because it's sprintf() could take more.
$value['charset'] = false;
} elseif ( $this->check_ascii( $value['value'] ) ) {
// If it's ASCII, then we don't need the charset. We can skip this field.
$value['charset'] = false;
} else {
$value['charset'] = $this->get_col_charset( $table, $field );
if ( is_wp_error( $value['charset'] ) ) {
return false;
}
// This isn't ASCII. Don't have strip_invalid_text() re-check.
$value['ascii'] = false;
}
$data[ $field ] = $value;
}
return $data;
}
/**
* Retrieve one variable from the database.
@ -1867,8 +2027,11 @@ class wpdb {
*/
public function get_var( $query = null, $x = 0, $y = 0 ) {
$this->func_call = "\$db->get_var(\"$query\", $x, $y)";
if ( $query )
if ( $query ) {
$this->check_current_query = false;
$this->query( $query );
}
// Extract var out of cached results based x,y vals
if ( !empty( $this->last_result[$y] ) ) {
@ -1894,10 +2057,12 @@ class wpdb {
*/
public function get_row( $query = null, $output = OBJECT, $y = 0 ) {
$this->func_call = "\$db->get_row(\"$query\",$output,$y)";
if ( $query )
if ( $query ) {
$this->check_current_query = false;
$this->query( $query );
else
} else {
return null;
}
if ( !isset( $this->last_result[$y] ) )
return null;
@ -1930,8 +2095,10 @@ class wpdb {
* @return array Database query result. Array indexed from 0 by SQL result row number.
*/
public function get_col( $query = null , $x = 0 ) {
if ( $query )
if ( $query ) {
$this->check_current_query = false;
$this->query( $query );
}
$new_array = array();
// Extract the column values
@ -1957,10 +2124,12 @@ class wpdb {
public function get_results( $query = null, $output = OBJECT ) {
$this->func_call = "\$db->get_results(\"$query\", $output)";
if ( $query )
if ( $query ) {
$this->check_current_query = false;
$this->query( $query );
else
} else {
return null;
}
$new_array = array();
if ( $output == OBJECT ) {
@ -1997,6 +2166,453 @@ class wpdb {
return null;
}
/**
* Retrieves the character set for the given table.
*
* @since 4.1.0
* @access protected
*
* @param string $table Table name.
* @return string|WP_Error Table character set, `WP_Error` object if it couldn't be found.
*/
protected function get_table_charset( $table ) {
$table = strtolower( $table );
/**
* Filter the table charset value before the DB is checked.
*
* Passing a non-null value to the filter will effectively short-circuit
* checking the DB for the charset, returning that value instead.
*
* @since 4.1.0
*
* @param string $charset The character set to use. Default null.
* @param string $table The name of the table being checked.
*/
$charset = apply_filters( 'pre_get_table_charset', null, $table );
if ( null !== $charset ) {
return $charset;
}
if ( isset( $this->table_charset[ $table ] ) ) {
return $this->table_charset[ $table ];
}
$charsets = $columns = array();
$results = $this->get_results( "SHOW FULL COLUMNS FROM `$table`" );
if ( ! $results ) {
return new WP_Error( 'wpdb_get_table_charset_failure' );
}
foreach ( $results as $column ) {
$columns[ strtolower( $column->Field ) ] = $column;
}
$this->col_meta[ $table ] = $columns;
foreach ( $columns as $column ) {
if ( $column->Collation ) {
list( $charset ) = explode( '_', $column->Collation );
$charsets[ strtolower( $charset ) ] = true;
}
list( $type ) = explode( '(', $column->Type );
// A binary/blob means the whole query gets treated like this.
if ( in_array( strtoupper( $type ), array( 'BINARY', 'VARBINARY', 'TINYBLOB', 'MEDIUMBLOB', 'BLOB', 'LONGBLOB' ) ) ) {
$this->table_charset[ $table ] = 'binary';
return 'binary';
}
}
// utf8mb3 is an alias for utf8.
if ( isset( $charsets['utf8mb3'] ) ) {
$charsets['utf8'] = true;
unset( $charsets['utf8mb3'] );
}
// Check if we have more than one charset in play.
$count = count( $charsets );
if ( 1 === $count ) {
$charset = key( $charsets );
} elseif ( 0 === $count ) {
// No charsets, assume this table can store whatever.
$charset = 'latin1';
} else {
// More than one charset. Remove latin1 if present and recalculate.
unset( $charsets['latin1'] );
$count = count( $charsets );
if ( 1 === $count ) {
// Only one charset (besides latin1).
$charset = key( $charsets );
} elseif ( 2 === $count && isset( $charsets['utf8'], $charsets['utf8mb4'] ) ) {
// Two charsets, but they're utf8 and utf8mb4, use utf8.
$charset = 'utf8';
} else {
// Two mixed character sets. ascii.
$charset = 'ascii';
}
}
$this->table_charset[ $table ] = $charset;
return $charset;
}
/**
* Retrieves the character set for the given column.
*
* @since 4.1.0
* @access protected
*
* @param string $table Table name.
* @param string $column Column name.
* @return mixed Column character set as a string.
* False if the column has no character set.
* `WP_Error` object if there was an error.
*/
protected function get_col_charset( $table, $column ) {
$table = strtolower( $table );
$column = strtolower( $column );
/**
* Filter the column charset value before the DB is checked.
*
* Passing a non-null value to the filter will short-circuit
* checking the DB for the charset, returning that value instead.
*
* @since 4.1.0
*
* @param string $charset The character set to use. Default null.
* @param string $table The name of the table being checked.
* @param string $column The name of the column being checked.
*/
$charset = apply_filters( 'pre_get_col_charset', null, $table, $column );
if ( null !== $charset ) {
return $charset;
}
if ( empty( $this->table_charset[ $table ] ) ) {
// This primes column information for us.
$table_charset = $this->get_table_charset( $table );
if ( is_wp_error( $table_charset ) ) {
return $table_charset;
}
}
// If still no column information, return the table charset.
if ( empty( $this->col_meta[ $table ] ) ) {
return $this->table_charset[ $table ];
}
// If this column doesn't exist, return the table charset.
if ( empty( $this->col_meta[ $table ][ $column ] ) ) {
return $this->table_charset[ $table ];
}
// Return false when it's not a string column.
if ( empty( $this->col_meta[ $table ][ $column ]->Collation ) ) {
return false;
}
list( $charset ) = explode( '_', $this->col_meta[ $table ][ $column ]->Collation );
return $charset;
}
/**
* Check if a string is ASCII.
*
* The negative regex is faster for non-ASCII strings, as it allows
* the search to finish as soon as it encounters a non-ASCII character.
*
* @since 4.1.0
* @access protected
*
* @param string $string String to check.
* @return bool True if ASCII, false if not.
*/
protected function check_ascii( $string ) {
if ( function_exists( 'mb_check_encoding' ) ) {
if ( mb_check_encoding( $string, 'ASCII' ) ) {
return true;
}
} elseif ( ! preg_match( '/[^\x00-\x7F]/', $string ) ) {
return true;
}
return false;
}
/**
* Strips any invalid characters based on value/charset pairs.
*
* @since 4.1.0
* @access protected
*
* @param array $data Array of value arrays. Each value array has the keys
* 'value' and 'charset'. An optional 'ascii' key can be
* set to false to avoid redundant ASCII checks.
* @return array|WP_Error The $data parameter, with invalid characters removed from
* each value. This works as a passthrough: any additional keys
* such as 'field' are retained in each value array. If we cannot
* remove invalid characters, a `WP_Error` object is returned.
*/
protected function strip_invalid_text( $data ) {
// Some multibyte character sets that we can check in PHP.
$mb_charsets = array(
'ascii' => 'ASCII',
'big5' => 'BIG-5',
'eucjpms' => 'eucJP-win',
'gb2312' => 'EUC-CN',
'ujis' => 'EUC-JP',
'utf32' => 'UTF-32',
'utf8mb4' => 'UTF-8',
);
$supported_charsets = array();
if ( function_exists( 'mb_list_encodings' ) ) {
$supported_charsets = mb_list_encodings();
}
$db_check_string = false;
foreach ( $data as &$value ) {
$charset = $value['charset'];
// latin1 will happily store anything.
if ( 'latin1' === $charset ) {
continue;
}
// Column or value isn't a string.
if ( false === $charset || ! is_string( $value['value'] ) ) {
continue;
}
// ASCII is always OK.
if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) {
continue;
}
// Convert the text locally.
if ( $supported_charsets ) {
if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) {
$value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] );
continue;
}
}
// utf8(mb3) can be handled by regex, which is a bunch faster than a DB lookup.
if ( 'utf8' === $charset || 'utf8mb3' === $charset ) {
$regex = '/
(
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
| [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [\xE1-\xEC][\x80-\xBF]{2}
| \xED[\x80-\x9F][\x80-\xBF]
| [\xEE-\xEF][\x80-\xBF]{2}
){1,100} # ...one or more times
)
| . # anything else
/x';
$value['value'] = preg_replace( $regex, '$1', $value['value'] );
continue;
}
// We couldn't use any local conversions, send it to the DB.
$value['db'] = $db_check_string = true;
}
unset( $value ); // Remove by reference.
if ( $db_check_string ) {
$queries = array();
foreach ( $data as $col => $value ) {
if ( ! empty( $value['db'] ) ) {
if ( ! isset( $queries[ $value['charset'] ] ) ) {
$queries[ $value['charset'] ] = array();
}
// Split the CONVERT() calls by charset, so we can make sure the connection is right
$queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] );
}
}
$connection_charset = $this->charset;
foreach ( $queries as $charset => $query ) {
if ( ! $query ) {
continue;
}
// Change the charset to match the string(s) we're converting
if ( $charset !== $this->charset ) {
$this->set_charset( $this->dbh, $charset );
}
$row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N );
if ( ! $row ) {
$this->set_charset( $this->dbh, $connection_charset );
return new WP_Error( 'wpdb_strip_invalid_text_failure' );
}
$cols = array_keys( $query );
$col_count = count( $cols );
for ( $ii = 0; $ii < $col_count; $ii++ ) {
$data[ $cols[ $ii ] ]['value'] = $row[ $ii ];
}
}
// Don't forget to change the charset back!
if ( $connection_charset !== $this->charset ) {
$this->set_charset( $this->dbh, $connection_charset );
}
}
return $data;
}
/**
* Strips any invalid characters from the query.
*
* @since 4.1.0
* @access protected
*
* @param string $query Query to convert.
* @return string|WP_Error The converted query, or a `WP_Error` object if the conversion fails.
*/
protected function strip_invalid_text_from_query( $query ) {
$table = $this->get_table_from_query( $query );
if ( $table ) {
$charset = $this->get_table_charset( $table );
if ( is_wp_error( $charset ) ) {
return $charset;
}
// We can't reliably strip text from tables containing binary/blob columns
if ( 'binary' === $charset ) {
return $query;
}
} else {
$charset = $this->charset;
}
$data = array(
'value' => $query,
'charset' => $charset,
'ascii' => false,
);
$data = $this->strip_invalid_text( array( $data ) );
if ( is_wp_error( $data ) ) {
return $data;
}
return $data[0]['value'];
}
/**
* Strips any invalid characters from the string for a given table and column.
*
* @since 4.1.0
* @access public
*
* @param string $table Table name.
* @param string $column Column name.
* @param string $value The text to check.
* @return string|WP_Error The converted string, or a `WP_Error` object if the conversion fails.
*/
public function strip_invalid_text_for_column( $table, $column, $value ) {
if ( $this->check_ascii( $value ) || ! is_string( $value ) ) {
return $value;
}
$charset = $this->get_col_charset( $table, $column );
if ( ! $charset ) {
// Not a string column.
return $value;
} elseif ( is_wp_error( $charset ) ) {
// Bail on real errors.
return $charset;
}
$data = array(
$column => array(
'value' => $value,
'charset' => $charset,
'ascii' => false,
)
);
$data = $this->strip_invalid_text( $data );
if ( is_wp_error( $data ) ) {
return $data;
}
return $data[ $column ]['value'];
}
/**
* Find the first table name referenced in a query.
*
* @since 4.1.0
* @access protected
*
* @param string $query The query to search.
* @return string|bool $table The table name found, or false if a table couldn't be found.
*/
protected function get_table_from_query( $query ) {
// Remove characters that can legally trail the table name.
$query = rtrim( $query, ';/-#' );
// Allow (select...) union [...] style queries. Use the first query's table name.
$query = ltrim( $query, "\r\n\t (" );
/*
* Strip everything between parentheses except nested selects and use only 1,000
* chars of the query.
*/
$query = preg_replace( '/\((?!\s*select)[^(]*?\)/is', '()', substr( $query, 0, 1000 ) );
// Quickly match most common queries.
if ( preg_match( '/^\s*(?:'
. 'SELECT.*?\s+FROM'
. '|INSERT(?:\s+LOW_PRIORITY|\s+DELAYED|\s+HIGH_PRIORITY)?(?:\s+IGNORE)?(?:\s+INTO)?'
. '|REPLACE(?:\s+LOW_PRIORITY|\s+DELAYED)?(?:\s+INTO)?'
. '|UPDATE(?:\s+LOW_PRIORITY)?(?:\s+IGNORE)?'
. '|DELETE(?:\s+LOW_PRIORITY|\s+QUICK|\s+IGNORE)*(?:\s+FROM)?'
. ')\s+`?([\w-]+)`?/is', $query, $maybe ) ) {
return $maybe[1];
}
// SHOW TABLE STATUS and SHOW TABLES
if ( preg_match( '/^\s*(?:'
. 'SHOW\s+TABLE\s+STATUS.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)'
. '|SHOW\s+(?:FULL\s+)?TABLES.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)'
. ')\W([\w-]+)\W/is', $query, $maybe ) ) {
return $maybe[1];
}
// Big pattern for the rest of the table-related queries.
if ( preg_match( '/^\s*(?:'
. '(?:EXPLAIN\s+(?:EXTENDED\s+)?)?SELECT.*?\s+FROM'
. '|DESCRIBE|DESC|EXPLAIN|HANDLER'
. '|(?:LOCK|UNLOCK)\s+TABLE(?:S)?'
. '|(?:RENAME|OPTIMIZE|BACKUP|RESTORE|CHECK|CHECKSUM|ANALYZE|REPAIR).*\s+TABLE'
. '|TRUNCATE(?:\s+TABLE)?'
. '|CREATE(?:\s+TEMPORARY)?\s+TABLE(?:\s+IF\s+NOT\s+EXISTS)?'
. '|ALTER(?:\s+IGNORE)?\s+TABLE'
. '|DROP\s+TABLE(?:\s+IF\s+EXISTS)?'
. '|CREATE(?:\s+\w+)?\s+INDEX.*\s+ON'
. '|DROP\s+INDEX.*\s+ON'
. '|LOAD\s+DATA.*INFILE.*INTO\s+TABLE'
. '|(?:GRANT|REVOKE).*ON\s+TABLE'
. '|SHOW\s+(?:.*FROM|.*TABLE)'
. ')\s+\(*\s*`?([\w-]+)`?\s*\)*/is', $query, $maybe ) ) {
return $maybe[1];
}
return false;
}
/**
* Load the column metadata from the last query.
*
@ -2160,6 +2776,8 @@ class wpdb {
return version_compare( $version, '4.1', '>=' );
case 'set_charset' :
return version_compare( $version, '5.0.7', '>=' );
case 'utf8mb4' : // @since 4.1.0
return version_compare( $version, '5.5.3', '>=' );
}
return false;

View File

@ -13,6 +13,16 @@ class Tests_DB extends WP_UnitTestCase {
*/
protected $_queries = array();
/**
* Our special WPDB
* @var resource
*/
protected static $_wpdb;
public static function setUpBeforeClass() {
self::$_wpdb = new wpdb_exposed_methods_for_testing();
}
/**
* Set up the test fixture
*/
@ -26,8 +36,8 @@ class Tests_DB extends WP_UnitTestCase {
* Tear down the test fixture
*/
public function tearDown() {
parent::tearDown();
remove_filter( 'query', array( $this, 'query_filter' ) );
parent::tearDown();
}
/**
@ -223,6 +233,21 @@ class Tests_DB extends WP_UnitTestCase {
$this->assertNotEmpty( $wpdb->dbh );
}
/**
* @ticket 21212
*/
function test_wpdb_actually_protected_properties() {
global $wpdb;
$new_meta = "HAHA I HOPE THIS DOESN'T WORK";
$col_meta = $wpdb->col_meta;
$wpdb->col_meta = $new_meta;
$this->assertNotEquals( $col_meta, $new_meta );
$this->assertEquals( $col_meta, $wpdb->col_meta );
}
/**
* @ticket 18510
*/
@ -493,4 +518,282 @@ class Tests_DB extends WP_UnitTestCase {
$wpdb->query( 'DROP PROCEDURE IF EXISTS `test_mysqli_flush_sync_procedure`' );
$wpdb->suppress_errors( $suppress );
}
/**
* @ticket 21212
*/
function data_get_table_from_query() {
$table = 'a_test_table_name';
$queries = array(
// Basic
"SELECT * FROM $table",
"SELECT * FROM `$table`",
"INSERT $table",
"INSERT IGNORE $table",
"INSERT IGNORE INTO $table",
"INSERT INTO $table",
"INSERT LOW_PRIORITY $table",
"INSERT DELAYED $table",
"INSERT HIGH_PRIORITY $table",
"INSERT LOW_PRIORITY IGNORE $table",
"INSERT LOW_PRIORITY INTO $table",
"INSERT LOW_PRIORITY IGNORE INTO $table",
"REPLACE $table",
"REPLACE INTO $table",
"REPLACE LOW_PRIORITY $table",
"REPLACE DELAYED $table",
"REPLACE LOW_PRIORITY INTO $table",
"UPDATE LOW_PRIORITY $table",
"UPDATE LOW_PRIORITY IGNORE $table",
"DELETE $table",
"DELETE IGNORE $table",
"DELETE IGNORE FROM $table",
"DELETE FROM $table",
"DELETE LOW_PRIORITY $table",
"DELETE QUICK $table",
"DELETE IGNORE $table",
"DELETE LOW_PRIORITY FROM $table",
// STATUS
"SHOW TABLE STATUS LIKE '$table'",
"SHOW TABLE STATUS WHERE NAME='$table'",
"SHOW TABLES LIKE '$table'",
"SHOW FULL TABLES LIKE '$table'",
"SHOW TABLES WHERE NAME='$table'",
// Extended
"EXPLAIN SELECT * FROM $table",
"EXPLAIN EXTENDED SELECT * FROM $table",
"EXPLAIN EXTENDED SELECT * FROM `$table`",
"DESCRIBE $table",
"DESC $table",
"EXPLAIN $table",
"HANDLER $table",
"LOCK TABLE $table",
"LOCK TABLES $table",
"UNLOCK TABLE $table",
"RENAME TABLE $table",
"OPTIMIZE TABLE $table",
"BACKUP TABLE $table",
"RESTORE TABLE $table",
"CHECK TABLE $table",
"CHECKSUM TABLE $table",
"ANALYZE TABLE $table",
"REPAIR TABLE $table",
"TRUNCATE $table",
"TRUNCATE TABLE $table",
"CREATE TABLE $table",
"CREATE TEMPORARY TABLE $table",
"CREATE TABLE IF NOT EXISTS $table",
"ALTER TABLE $table",
"ALTER IGNORE TABLE $table",
"DROP TABLE $table",
"DROP TABLE IF EXISTS $table",
"CREATE INDEX foo(bar(20)) ON $table",
"CREATE UNIQUE INDEX foo(bar(20)) ON $table",
"CREATE FULLTEXT INDEX foo(bar(20)) ON $table",
"CREATE SPATIAL INDEX foo(bar(20)) ON $table",
"DROP INDEX foo ON $table",
"LOAD DATA INFILE 'wp.txt' INTO TABLE $table",
"LOAD DATA LOW_PRIORITY INFILE 'wp.txt' INTO TABLE $table",
"LOAD DATA CONCURRENT INFILE 'wp.txt' INTO TABLE $table",
"LOAD DATA LOW_PRIORITY LOCAL INFILE 'wp.txt' INTO TABLE $table",
"LOAD DATA INFILE 'wp.txt' REPLACE INTO TABLE $table",
"LOAD DATA INFILE 'wp.txt' IGNORE INTO TABLE $table",
"GRANT ALL ON TABLE $table",
"REVOKE ALL ON TABLE $table",
"SHOW COLUMNS FROM $table",
"SHOW FULL COLUMNS FROM $table",
"SHOW CREATE TABLE $table",
"SHOW INDEX FROM $table",
);
foreach ( $queries as &$query ) {
$query = array( $query, $table );
}
return $queries;
}
/**
* @dataProvider data_get_table_from_query
* @ticket 21212
*/
function test_get_table_from_query( $query, $table ) {
$this->assertEquals( $table, self::$_wpdb->get_table_from_query( $query ) );
}
function data_get_table_from_query_false() {
$table = 'a_test_table_name';
return array(
array( "LOL THIS ISN'T EVEN A QUERY $table" ),
);
}
/**
* @dataProvider data_get_table_from_query_false
* @ticket 21212
*/
function test_get_table_from_query_false( $query ) {
$this->assertFalse( self::$_wpdb->get_table_from_query( $query ) );
}
/**
* @ticket 21212
*/
function data_process_field_formats() {
$core_db_fields_no_format_specified = array(
array( 'post_content' => 'foo', 'post_parent' => 0 ),
null,
array(
'post_content' => array( 'value' => 'foo', 'format' => '%s' ),
'post_parent' => array( 'value' => 0, 'format' => '%d' ),
)
);
$core_db_fields_formats_specified = array(
array( 'post_content' => 'foo', 'post_parent' => 0 ),
array( '%d', '%s' ), // These override core field_types
array(
'post_content' => array( 'value' => 'foo', 'format' => '%d' ),
'post_parent' => array( 'value' => 0, 'format' => '%s' ),
)
);
$misc_fields_no_format_specified = array(
array( 'this_is_not_a_core_field' => 'foo', 'this_is_not_either' => 0 ),
null,
array(
'this_is_not_a_core_field' => array( 'value' => 'foo', 'format' => '%s' ),
'this_is_not_either' => array( 'value' => 0, 'format' => '%s' ),
)
);
$misc_fields_formats_specified = array(
array( 'this_is_not_a_core_field' => 0, 'this_is_not_either' => 1.2 ),
array( '%d', '%f' ),
array(
'this_is_not_a_core_field' => array( 'value' => 0, 'format' => '%d' ),
'this_is_not_either' => array( 'value' => 1.2, 'format' => '%f' ),
)
);
$misc_fields_insufficient_formats_specified = array(
array( 'this_is_not_a_core_field' => 0, 'this_is_not_either' => 's', 'nor_this' => 1 ),
array( '%d', '%s' ), // The first format is used for the third
array(
'this_is_not_a_core_field' => array( 'value' => 0, 'format' => '%d' ),
'this_is_not_either' => array( 'value' => 's', 'format' => '%s' ),
'nor_this' => array( 'value' => 1, 'format' => '%d' ),
)
);
$vars = get_defined_vars();
// Push the variable name onto the end for assertSame $message
foreach ( $vars as $var_name => $var ) {
$vars[ $var_name ][] = $var_name;
}
return array_values( $vars );
}
/**
* @dataProvider data_process_field_formats
* @ticket 21212
*/
function test_process_field_formats( $data, $format, $expected, $message ) {
$actual = self::$_wpdb->process_field_formats( $data, $format );
$this->assertSame( $expected, $actual, $message );
}
/**
* @ticket 21212
*/
function test_process_fields() {
global $wpdb;
$data = array( 'post_content' => '¡foo foo foo!' );
$expected = array(
'post_content' => array(
'value' => '¡foo foo foo!',
'format' => '%s',
'charset' => $wpdb->charset,
'ascii' => false,
)
);
$this->assertSame( $expected, self::$_wpdb->process_fields( $wpdb->posts, $data, null ) );
}
/**
* @ticket 21212
* @depends test_process_fields
*/
function test_process_fields_on_nonexistent_table( $data ) {
self::$_wpdb->suppress_errors( true );
$data = array( 'post_content' => '¡foo foo foo!' );
$this->assertFalse( self::$_wpdb->process_fields( 'nonexistent_table', $data, null ) );
self::$_wpdb->suppress_errors( false );
}
/**
* @ticket 21212
*/
function test_pre_get_table_charset_filter() {
add_filter( 'pre_get_table_charset', array( $this, 'filter_pre_get_table_charset' ), 10, 2 );
$charset = self::$_wpdb->get_table_charset( 'some_table' );
remove_filter( 'pre_get_table_charset', array( $this, 'filter_pre_get_table_charset' ), 10 );
$this->assertEquals( $charset, 'fake_charset' );
}
function filter_pre_get_table_charset( $charset, $table ) {
return 'fake_charset';
}
/**
* @ ticket 21212
*/
function test_pre_get_col_charset_filter() {
add_filter( 'pre_get_col_charset', array( $this, 'filter_pre_get_col_charset' ), 10, 3 );
$charset = self::$_wpdb->get_col_charset( 'some_table', 'some_col' );
remove_filter( 'pre_get_col_charset', array( $this, 'filter_pre_get_col_charset' ), 10 );
$this->assertEquals( $charset, 'fake_col_charset' );
}
function filter_pre_get_col_charset( $charset, $table, $column ) {
return 'fake_col_charset';
}
}
/**
* Special class for exposing protected wpdb methods we need to access
*/
class wpdb_exposed_methods_for_testing extends wpdb {
public function __construct() {
global $wpdb;
$this->dbh = $wpdb->dbh;
$this->use_mysqli = $wpdb->use_mysqli;
$this->ready = true;
$this->field_types = $wpdb->field_types;
$this->charset = $wpdb->charset;
}
public function __call( $name, $arguments ) {
return call_user_func_array( array( $this, $name ), $arguments );
}
}

View File

@ -0,0 +1,409 @@
<?php
require_once dirname( dirname( __FILE__ ) ) . '/db.php';
/**
* Test WPDB methods
*
* @group wpdb
*/
class Tests_DB_Charset extends WP_UnitTestCase {
/**
* Our special WPDB
* @var resource
*/
protected static $_wpdb;
public static function setUpBeforeClass() {
self::$_wpdb = new wpdb_exposed_methods_for_testing();
}
/**
* @ticket 21212
*/
function data_strip_invalid_text() {
$fields = array(
'latin1' => array(
// latin1. latin1 never changes.
'charset' => 'latin1',
'value' => "\xf0\x9f\x8e\xb7",
'expected' => "\xf0\x9f\x8e\xb7"
),
'ascii' => array(
// ascii gets special treatment, make sure it's covered
'charset' => 'ascii',
'value' => 'Hello World',
'expected' => 'Hello World'
),
'utf8' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => 'H€lloWorld¢'
),
'utf8mb3' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => 'H€lloWorld¢'
),
'utf8mb4' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => "H€llo\xf0\x9f\x98\x88World¢"
),
'koi8r' => array(
// koi8r is a character set that needs to be checked in MySQL
'charset' => 'koi8r',
'value' => "\xfdord\xf2ress",
'expected' => "\xfdord\xf2ress",
'db' => true
),
'hebrew' => array(
// hebrew needs to be checked in MySQL, too
'charset' => 'hebrew',
'value' => "\xf9ord\xf7ress",
'expected' => "\xf9ord\xf7ress",
'db' => true
),
'false' => array(
// false is a column with no character set (ie, a number column)
'charset' => false,
'value' => 100,
'expected' => 100
),
);
// big5 is a non-Unicode multibyte charset
$utf8 = "a\xe5\x85\xb1b"; // UTF-8 Character 20849
$big5 = mb_convert_encoding( $utf8, 'BIG-5', 'UTF-8' );
$conv_utf8 = mb_convert_encoding( $big5, 'UTF-8', 'BIG-5' );
// Make sure PHP's multibyte conversions are working correctly
$this->assertNotEquals( $utf8, $big5 );
$this->assertEquals( $utf8, $conv_utf8 );
$fields['big5'] = array(
'charset' => 'big5',
'value' => $big5,
'expected' => $big5
);
// The data above is easy to edit. Now, prepare it for the data provider.
$data_provider = $multiple = $multiple_expected = array();
foreach ( $fields as $test_case => $field ) {
$expected = $field;
$expected['value'] = $expected['expected'];
unset( $expected['expected'], $field['expected'] );
// We're keeping track of these for our multiple-field test.
$multiple[] = $field;
$multiple_expected[] = $expected;
// strip_invalid_text() expects an array of fields. We're testing one field at a time.
$data = array( $field );
$expected = array( $expected );
// First argument is field data. Second is expected. Third is the message.
$data_provider[] = array( $data, $expected, $test_case );
}
// Time for our test of multiple fields at once.
$data_provider[] = array( $multiple, $multiple_expected, 'multiple fields/charsets' );
return $data_provider;
}
/**
* @dataProvider data_strip_invalid_text
* @ticket 21212
*/
function test_strip_invalid_text( $data, $expected, $message ) {
if ( $data[0]['charset'] === 'koi8r' ) {
self::$_wpdb->query( 'SET NAMES koi8r' );
}
$actual = self::$_wpdb->strip_invalid_text( $data );
$this->assertSame( $expected, $actual, $message );
}
/**
* @ ticket 21212
*/
function test_process_fields_failure() {
global $wpdb;
$data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" );
$this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) );
}
/**
* @ticket 21212
*/
function data_process_field_charsets() {
$charset = $GLOBALS['wpdb']->charset; // This is how all tables were installed
// 'value' and 'format' are $data, 'charset' ends up as part of $expected
$no_string_fields = array(
'post_parent' => array( 'value' => 10, 'format' => '%d', 'charset' => false ),
'comment_count' => array( 'value' => 0, 'format' => '%d', 'charset' => false ),
);
$all_ascii_fields = array(
'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false ),
'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false ),
);
// This is the same data used in process_field_charsets_for_nonexistent_table()
$non_ascii_string_fields = array(
'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ),
'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ),
);
$vars = get_defined_vars();
unset( $vars['charset'] );
foreach ( $vars as $var_name => $var ) {
$data = $expected = $var;
foreach ( $data as &$datum ) {
// 'charset' and 'ascii' are part of the expected return only.
unset( $datum['charset'], $datum['ascii'] );
}
$vars[ $var_name ] = array( $data, $expected, $var_name );
}
return array_values( $vars );
}
/**
* @dataProvider data_process_field_charsets
* @ticket 21212
*/
function test_process_field_charsets( $data, $expected, $message ) {
$actual = self::$_wpdb->process_field_charsets( $data, $GLOBALS['wpdb']->posts );
$this->assertSame( $expected, $actual, $message );
}
/**
* The test this test depends on first verifies that this
* would normally work against the posts table.
*
* @ticket 21212
* @depends test_process_field_charsets
*/
function test_process_field_charsets_on_nonexistent_table() {
$data = array( 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s' ) );
self::$_wpdb->suppress_errors( true );
$this->assertFalse( self::$_wpdb->process_field_charsets( $data, 'nonexistent_table' ) );
self::$_wpdb->suppress_errors( false );
}
/**
* @ticket 21212
*/
function test_check_ascii() {
$ascii = "\0\t\n\r '" . '!"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~';
$this->assertTrue( self::$_wpdb->check_ascii( $ascii ) );
}
/**
* @ticket 21212
*/
function test_check_ascii_false() {
$this->assertFalse( self::$_wpdb->check_ascii( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ¡©«' ) );
}
/**
* @ticket 21212
*/
function test_strip_invalid_text_for_column() {
global $wpdb;
// Invalid 3-byte and 4-byte sequences
$value = "H€llo\xe0\x80\x80World\xf0\xff\xff\xff¢";
$expected = "H€lloWorld¢";
$actual = $wpdb->strip_invalid_text_for_column( $wpdb->posts, 'post_content', $value );
$this->assertEquals( $expected, $actual );
}
/**
* Set of table definitions for testing wpdb::get_table_charset and wpdb::get_column_charset
* @var array
*/
protected $table_and_column_defs = array(
array(
'definition' => '( a INT, b FLOAT )',
'table_expected' => 'latin1',
'column_expected' => array( 'a' => false, 'b' => false )
),
array(
'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET big5 )',
'table_expected' => 'big5',
'column_expected' => array( 'a' => 'big5', 'b' => 'big5' )
),
array(
'definition' => '( a VARCHAR(50) CHARACTER SET big5, b BINARY )',
'table_expected' => 'binary',
'column_expected' => array( 'a' => 'big5', 'b' => false )
),
array(
'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b BLOB )',
'table_expected' => 'binary',
'column_expected' => array( 'a' => 'latin1', 'b' => false )
),
array(
'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b TEXT CHARACTER SET koi8r )',
'table_expected' => 'koi8r',
'column_expected' => array( 'a' => 'latin1', 'b' => 'koi8r' )
),
array(
'definition' => '( a VARCHAR(50) CHARACTER SET utf8mb3, b TEXT CHARACTER SET utf8mb3 )',
'table_expected' => 'utf8',
'column_expected' => array( 'a' => 'utf8', 'b' => 'utf8' )
),
array(
'definition' => '( a VARCHAR(50) CHARACTER SET utf8, b TEXT CHARACTER SET utf8mb4 )',
'table_expected' => 'utf8',
'column_expected' => array( 'a' => 'utf8', 'b' => 'utf8mb4' )
),
array(
'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET koi8r )',
'table_expected' => 'ascii',
'column_expected' => array( 'a' => 'big5', 'b' => 'koi8r' )
),
);
/**
* @ticket 21212
*/
function data_test_get_table_charset() {
$table_name = 'test_get_table_charset';
$vars = array();
foreach( $this->table_and_column_defs as $value ) {
$this_table_name = $table_name . '_' . rand_str( 5 );
$drop = "DROP TABLE IF EXISTS $this_table_name";
$create = "CREATE TABLE $this_table_name {$value['definition']}";
$vars[] = array( $drop, $create, $this_table_name, $value['table_expected'] );
}
return $vars;
}
/**
* @dataProvider data_test_get_table_charset
* @ticket 21212
*/
function test_get_table_charset( $drop, $create, $table, $expected_charset ) {
self::$_wpdb->query( $drop );
if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
$this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
return;
}
self::$_wpdb->query( $create );
$charset = self::$_wpdb->get_table_charset( $table );
$this->assertEquals( $charset, $expected_charset );
self::$_wpdb->query( $drop );
}
/**
* @ticket 21212
*/
function data_test_get_column_charset() {
$table_name = 'test_get_column_charset';
$vars = array();
foreach( $this->table_and_column_defs as $value ) {
$this_table_name = $table_name . '_' . rand_str( 5 );
$drop = "DROP TABLE IF EXISTS $this_table_name";
$create = "CREATE TABLE $this_table_name {$value['definition']}";
$vars[] = array( $drop, $create, $this_table_name, $value['column_expected'] );
}
return $vars;
}
/**
* @dataProvider data_test_get_column_charset
* @ticket 21212
*/
function test_get_column_charset( $drop, $create, $table, $expected_charset ) {
self::$_wpdb->query( $drop );
if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
$this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
return;
}
self::$_wpdb->query( $create );
foreach ( $expected_charset as $column => $charset ) {
$this->assertEquals( $charset, self::$_wpdb->get_col_charset( $table, $column ) );
}
self::$_wpdb->query( $drop );
}
/**
* @ticket 21212
*/
function data_strip_invalid_text_from_query() {
$table_name = 'strip_invalid_text_from_query_table';
$data = array(
array(
// binary tables don't get stripped
"( a VARCHAR(50) CHARACTER SET utf8, b BINARY )", // create
"('foo\xf0\x9f\x98\x88bar', 'foo')", // query
"('foo\xf0\x9f\x98\x88bar', 'foo')" // expected result
),
array(
// utf8/utf8mb4 tables default to utf8
"( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )",
"('foo\xf0\x9f\x98\x88bar', 'foo')",
"('foobar', 'foo')"
),
);
foreach( $data as &$value ) {
$this_table_name = $table_name . '_' . rand_str( 5 );
$value[0] = "CREATE TABLE $this_table_name {$value[0]}";
$value[1] = "INSERT INTO $this_table_name VALUES {$value[1]}";
$value[2] = "INSERT INTO $this_table_name VALUES {$value[2]}";
$value[3] = "DROP TABLE IF EXISTS $this_table_name";
}
unset( $value );
return $data;
}
/**
* @dataProvider data_strip_invalid_text_from_query
* @ticket 21212
*/
function test_strip_invalid_text_from_query( $create, $query, $expected, $drop ) {
self::$_wpdb->query( $drop );
if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) {
$this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." );
return;
}
self::$_wpdb->query( $create );
$return = self::$_wpdb->strip_invalid_text_from_query( $query );
$this->assertEquals( $expected, $return );
self::$_wpdb->query( $drop );
}
/**
* @ticket 21212
*/
function test_invalid_characters_in_query() {
global $wpdb;
$this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x9f\x98\x88bar')" ) );
}
}