diff --git a/src/wp-includes/wp-db.php b/src/wp-includes/wp-db.php index 8e4d2a42e5..dc5a8db228 100644 --- a/src/wp-includes/wp-db.php +++ b/src/wp-includes/wp-db.php @@ -143,6 +143,33 @@ class wpdb { */ protected $result; + /** + * Cached column info, for sanity checking data before inserting + * + * @since 4.1.0 + * @access protected + * @var array + */ + protected $col_meta = array(); + + /** + * Calculated character sets on tables + * + * @since 4.1.0 + * @access protected + * @var array + */ + protected $table_charset = array(); + + /** + * Whether text fields in the current query need to be sanity checked. + * + * @since 4.1.0 + * @access protected + * @var bool + */ + protected $check_current_query = true; + /** * Saved info on the table column * @@ -647,6 +674,14 @@ class wpdb { * @param mixed $value The value to set */ public function __set( $name, $value ) { + $protected_members = array( + 'col_meta', + 'table_charset', + 'check_current_query', + ); + if ( in_array( $name, $protected_members, true ) ) { + return; + } $this->$name = $value; } @@ -1544,8 +1579,10 @@ class wpdb { * @return int|false Number of rows affected/selected or false on error */ public function query( $query ) { - if ( ! $this->ready ) + if ( ! $this->ready ) { + $this->check_current_query = true; return false; + } /** * Filter the database query. @@ -1564,6 +1601,17 @@ class wpdb { // Log how the function was called $this->func_call = "\$db->query(\"$query\")"; + // If we're writing to the database, make sure the query will write safely. + if ( $this->check_current_query && ! $this->check_ascii( $query ) ) { + $stripped_query = $this->strip_invalid_text_from_query( $query ); + if ( $stripped_query !== $query ) { + $this->insert_id = 0; + return false; + } + } + + $this->check_current_query = true; + // Keep track of the last query for debug.. $this->last_query = $query; @@ -1737,23 +1785,29 @@ class wpdb { * @return int|false The number of rows affected, or false on error. */ function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) { - if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) + if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) { return false; - $this->insert_id = 0; - $formats = $format = (array) $format; - $fields = array_keys( $data ); - $formatted_fields = array(); - foreach ( $fields as $field ) { - if ( !empty( $format ) ) - $form = ( $form = array_shift( $formats ) ) ? $form : $format[0]; - elseif ( isset( $this->field_types[$field] ) ) - $form = $this->field_types[$field]; - else - $form = '%s'; - $formatted_fields[] = $form; } - $sql = "{$type} INTO `$table` (`" . implode( '`,`', $fields ) . "`) VALUES (" . implode( ",", $formatted_fields ) . ")"; - return $this->query( $this->prepare( $sql, $data ) ); + + $data = $this->process_fields( $table, $data, $format ); + if ( false === $data ) { + return false; + } + + $formats = $values = array(); + foreach ( $data as $value ) { + $formats[] = $value['format']; + $values[] = $value['value']; + } + + $fields = '`' . implode( '`, `', array_keys( $data ) ) . '`'; + $formats = implode( ', ', $formats ); + + $sql = "$type INTO `$table` ($fields) VALUES ($formats)"; + + $this->insert_id = 0; + $this->check_current_query = false; + return $this->query( $this->prepare( $sql, $values ) ); } /** @@ -1778,34 +1832,36 @@ class wpdb { * @return int|false The number of rows updated, or false on error. */ public function update( $table, $data, $where, $format = null, $where_format = null ) { - if ( ! is_array( $data ) || ! is_array( $where ) ) + if ( ! is_array( $data ) || ! is_array( $where ) ) { return false; - - $formats = $format = (array) $format; - $bits = $wheres = array(); - foreach ( (array) array_keys( $data ) as $field ) { - if ( !empty( $format ) ) - $form = ( $form = array_shift( $formats ) ) ? $form : $format[0]; - elseif ( isset($this->field_types[$field]) ) - $form = $this->field_types[$field]; - else - $form = '%s'; - $bits[] = "`$field` = {$form}"; } - $where_formats = $where_format = (array) $where_format; - foreach ( (array) array_keys( $where ) as $field ) { - if ( !empty( $where_format ) ) - $form = ( $form = array_shift( $where_formats ) ) ? $form : $where_format[0]; - elseif ( isset( $this->field_types[$field] ) ) - $form = $this->field_types[$field]; - else - $form = '%s'; - $wheres[] = "`$field` = {$form}"; + $data = $this->process_fields( $table, $data, $format ); + if ( false === $data ) { + return false; + } + $where = $this->process_fields( $table, $where, $where_format ); + if ( false === $where ) { + return false; } - $sql = "UPDATE `$table` SET " . implode( ', ', $bits ) . ' WHERE ' . implode( ' AND ', $wheres ); - return $this->query( $this->prepare( $sql, array_merge( array_values( $data ), array_values( $where ) ) ) ); + $fields = $conditions = $values = array(); + foreach ( $data as $field => $value ) { + $fields[] = "`$field` = " . $value['format']; + $values[] = $value['value']; + } + foreach ( $where as $field => $value ) { + $conditions[] = "`$field` = " . $value['format']; + $values[] = $value['value']; + } + + $fields = implode( ', ', $fields ); + $conditions = implode( ' AND ', $conditions ); + + $sql = "UPDATE `$table` SET $fields WHERE $conditions"; + + $this->check_current_query = false; + return $this->query( $this->prepare( $sql, $values ) ); } /** @@ -1827,29 +1883,133 @@ class wpdb { * @return int|false The number of rows updated, or false on error. */ public function delete( $table, $where, $where_format = null ) { - if ( ! is_array( $where ) ) + if ( ! is_array( $where ) ) { return false; - - $wheres = array(); - - $where_formats = $where_format = (array) $where_format; - - foreach ( array_keys( $where ) as $field ) { - if ( !empty( $where_format ) ) { - $form = ( $form = array_shift( $where_formats ) ) ? $form : $where_format[0]; - } elseif ( isset( $this->field_types[ $field ] ) ) { - $form = $this->field_types[ $field ]; - } else { - $form = '%s'; - } - - $wheres[] = "$field = $form"; } - $sql = "DELETE FROM $table WHERE " . implode( ' AND ', $wheres ); - return $this->query( $this->prepare( $sql, $where ) ); + $where = $this->process_fields( $table, $where, $where_format ); + if ( false === $where ) { + return false; + } + + $conditions = $values = array(); + foreach ( $where as $field => $value ) { + $conditions[] = "`$field` = " . $value['format']; + $values[] = $value['value']; + } + + $conditions = implode( ' AND ', $conditions ); + + $sql = "DELETE FROM `$table` WHERE $conditions"; + + $this->check_current_query = false; + return $this->query( $this->prepare( $sql, $values ) ); } + /** + * Processes arrays of field/value pairs and field formats. + * + * This is a helper method for wpdb's CRUD methods, which take field/value + * pairs for inserts, updates, and where clauses. This method first pairs + * each value with a format. Then it determines the charset of that field, + * using that to determine if any invalid text would be stripped. If text is + * stripped, then field processing is rejected and the query fails. + * + * @since 4.1.0 + * @access protected + * + * @param string $table Table name. + * @param array $data Field/value pair. + * @param mixed $format Format for each field. + * @return array|bool Returns an array of fields that contain paired values + * and formats. Returns false for invalid values. + */ + protected function process_fields( $table, $data, $format ) { + $data = $this->process_field_formats( $data, $format ); + $data = $this->process_field_charsets( $data, $table ); + if ( false === $data ) { + return false; + } + + $converted_data = $this->strip_invalid_text( $data ); + + if ( $data !== $converted_data ) { + return false; + } + + return $data; + } + + /** + * Prepares arrays of value/format pairs as passed to wpdb CRUD methods. + * + * @since 4.1.0 + * @access protected + * + * @param array $data Array of fields to values. + * @param mixed $format Formats to be mapped to the values in $data. + * @return array Array, keyed by field names with values being an array + * of 'value' and 'format' keys. + */ + protected function process_field_formats( $data, $format ) { + $formats = $original_formats = (array) $format; + + foreach ( $data as $field => $value ) { + $value = array( + 'value' => $value, + 'format' => '%s', + ); + + if ( ! empty( $format ) ) { + $value['format'] = array_shift( $formats ); + if ( ! $value['format'] ) { + $value['format'] = reset( $original_formats ); + } + } elseif ( isset( $this->field_types[ $field ] ) ) { + $value['format'] = $this->field_types[ $field ]; + } + + $data[ $field ] = $value; + } + + return $data; + } + + /** + * Adds field charsets to field/value/format arrays + * generated by the process_field_formats() method. + * + * @since 4.1.0 + * @access protected + * + * @param array $data As it comes from the process_field_formats() method. + * @param string $table Table name. + * @return The same array as $data with additional 'charset' keys. + */ + protected function process_field_charsets( $data, $table ) { + foreach ( $data as $field => $value ) { + if ( '%d' === $value['format'] || '%f' === $value['format'] ) { + // We can skip this field if we know it isn't a string. + // This checks %d/%f versus ! %s because it's sprintf() could take more. + $value['charset'] = false; + } elseif ( $this->check_ascii( $value['value'] ) ) { + // If it's ASCII, then we don't need the charset. We can skip this field. + $value['charset'] = false; + } else { + $value['charset'] = $this->get_col_charset( $table, $field ); + if ( is_wp_error( $value['charset'] ) ) { + return false; + } + + // This isn't ASCII. Don't have strip_invalid_text() re-check. + $value['ascii'] = false; + } + + $data[ $field ] = $value; + } + + return $data; + } /** * Retrieve one variable from the database. @@ -1867,8 +2027,11 @@ class wpdb { */ public function get_var( $query = null, $x = 0, $y = 0 ) { $this->func_call = "\$db->get_var(\"$query\", $x, $y)"; - if ( $query ) + + if ( $query ) { + $this->check_current_query = false; $this->query( $query ); + } // Extract var out of cached results based x,y vals if ( !empty( $this->last_result[$y] ) ) { @@ -1894,10 +2057,12 @@ class wpdb { */ public function get_row( $query = null, $output = OBJECT, $y = 0 ) { $this->func_call = "\$db->get_row(\"$query\",$output,$y)"; - if ( $query ) + if ( $query ) { + $this->check_current_query = false; $this->query( $query ); - else + } else { return null; + } if ( !isset( $this->last_result[$y] ) ) return null; @@ -1930,8 +2095,10 @@ class wpdb { * @return array Database query result. Array indexed from 0 by SQL result row number. */ public function get_col( $query = null , $x = 0 ) { - if ( $query ) + if ( $query ) { + $this->check_current_query = false; $this->query( $query ); + } $new_array = array(); // Extract the column values @@ -1957,10 +2124,12 @@ class wpdb { public function get_results( $query = null, $output = OBJECT ) { $this->func_call = "\$db->get_results(\"$query\", $output)"; - if ( $query ) + if ( $query ) { + $this->check_current_query = false; $this->query( $query ); - else + } else { return null; + } $new_array = array(); if ( $output == OBJECT ) { @@ -1997,6 +2166,453 @@ class wpdb { return null; } + /** + * Retrieves the character set for the given table. + * + * @since 4.1.0 + * @access protected + * + * @param string $table Table name. + * @return string|WP_Error Table character set, `WP_Error` object if it couldn't be found. + */ + protected function get_table_charset( $table ) { + $table = strtolower( $table ); + + /** + * Filter the table charset value before the DB is checked. + * + * Passing a non-null value to the filter will effectively short-circuit + * checking the DB for the charset, returning that value instead. + * + * @since 4.1.0 + * + * @param string $charset The character set to use. Default null. + * @param string $table The name of the table being checked. + */ + $charset = apply_filters( 'pre_get_table_charset', null, $table ); + if ( null !== $charset ) { + return $charset; + } + + if ( isset( $this->table_charset[ $table ] ) ) { + return $this->table_charset[ $table ]; + } + + $charsets = $columns = array(); + $results = $this->get_results( "SHOW FULL COLUMNS FROM `$table`" ); + if ( ! $results ) { + return new WP_Error( 'wpdb_get_table_charset_failure' ); + } + + foreach ( $results as $column ) { + $columns[ strtolower( $column->Field ) ] = $column; + } + + $this->col_meta[ $table ] = $columns; + + foreach ( $columns as $column ) { + if ( $column->Collation ) { + list( $charset ) = explode( '_', $column->Collation ); + $charsets[ strtolower( $charset ) ] = true; + } + + list( $type ) = explode( '(', $column->Type ); + + // A binary/blob means the whole query gets treated like this. + if ( in_array( strtoupper( $type ), array( 'BINARY', 'VARBINARY', 'TINYBLOB', 'MEDIUMBLOB', 'BLOB', 'LONGBLOB' ) ) ) { + $this->table_charset[ $table ] = 'binary'; + return 'binary'; + } + } + + // utf8mb3 is an alias for utf8. + if ( isset( $charsets['utf8mb3'] ) ) { + $charsets['utf8'] = true; + unset( $charsets['utf8mb3'] ); + } + + // Check if we have more than one charset in play. + $count = count( $charsets ); + if ( 1 === $count ) { + $charset = key( $charsets ); + } elseif ( 0 === $count ) { + // No charsets, assume this table can store whatever. + $charset = 'latin1'; + } else { + // More than one charset. Remove latin1 if present and recalculate. + unset( $charsets['latin1'] ); + $count = count( $charsets ); + if ( 1 === $count ) { + // Only one charset (besides latin1). + $charset = key( $charsets ); + } elseif ( 2 === $count && isset( $charsets['utf8'], $charsets['utf8mb4'] ) ) { + // Two charsets, but they're utf8 and utf8mb4, use utf8. + $charset = 'utf8'; + } else { + // Two mixed character sets. ascii. + $charset = 'ascii'; + } + } + + $this->table_charset[ $table ] = $charset; + return $charset; + } + + /** + * Retrieves the character set for the given column. + * + * @since 4.1.0 + * @access protected + * + * @param string $table Table name. + * @param string $column Column name. + * @return mixed Column character set as a string. + * False if the column has no character set. + * `WP_Error` object if there was an error. + */ + protected function get_col_charset( $table, $column ) { + $table = strtolower( $table ); + $column = strtolower( $column ); + + /** + * Filter the column charset value before the DB is checked. + * + * Passing a non-null value to the filter will short-circuit + * checking the DB for the charset, returning that value instead. + * + * @since 4.1.0 + * + * @param string $charset The character set to use. Default null. + * @param string $table The name of the table being checked. + * @param string $column The name of the column being checked. + */ + $charset = apply_filters( 'pre_get_col_charset', null, $table, $column ); + if ( null !== $charset ) { + return $charset; + } + + if ( empty( $this->table_charset[ $table ] ) ) { + // This primes column information for us. + $table_charset = $this->get_table_charset( $table ); + if ( is_wp_error( $table_charset ) ) { + return $table_charset; + } + } + + // If still no column information, return the table charset. + if ( empty( $this->col_meta[ $table ] ) ) { + return $this->table_charset[ $table ]; + } + + // If this column doesn't exist, return the table charset. + if ( empty( $this->col_meta[ $table ][ $column ] ) ) { + return $this->table_charset[ $table ]; + } + + // Return false when it's not a string column. + if ( empty( $this->col_meta[ $table ][ $column ]->Collation ) ) { + return false; + } + + list( $charset ) = explode( '_', $this->col_meta[ $table ][ $column ]->Collation ); + return $charset; + } + + /** + * Check if a string is ASCII. + * + * The negative regex is faster for non-ASCII strings, as it allows + * the search to finish as soon as it encounters a non-ASCII character. + * + * @since 4.1.0 + * @access protected + * + * @param string $string String to check. + * @return bool True if ASCII, false if not. + */ + protected function check_ascii( $string ) { + if ( function_exists( 'mb_check_encoding' ) ) { + if ( mb_check_encoding( $string, 'ASCII' ) ) { + return true; + } + } elseif ( ! preg_match( '/[^\x00-\x7F]/', $string ) ) { + return true; + } + + return false; + } + + /** + * Strips any invalid characters based on value/charset pairs. + * + * @since 4.1.0 + * @access protected + * + * @param array $data Array of value arrays. Each value array has the keys + * 'value' and 'charset'. An optional 'ascii' key can be + * set to false to avoid redundant ASCII checks. + * @return array|WP_Error The $data parameter, with invalid characters removed from + * each value. This works as a passthrough: any additional keys + * such as 'field' are retained in each value array. If we cannot + * remove invalid characters, a `WP_Error` object is returned. + */ + protected function strip_invalid_text( $data ) { + // Some multibyte character sets that we can check in PHP. + $mb_charsets = array( + 'ascii' => 'ASCII', + 'big5' => 'BIG-5', + 'eucjpms' => 'eucJP-win', + 'gb2312' => 'EUC-CN', + 'ujis' => 'EUC-JP', + 'utf32' => 'UTF-32', + 'utf8mb4' => 'UTF-8', + ); + + $supported_charsets = array(); + if ( function_exists( 'mb_list_encodings' ) ) { + $supported_charsets = mb_list_encodings(); + } + + $db_check_string = false; + + foreach ( $data as &$value ) { + $charset = $value['charset']; + + // latin1 will happily store anything. + if ( 'latin1' === $charset ) { + continue; + } + + // Column or value isn't a string. + if ( false === $charset || ! is_string( $value['value'] ) ) { + continue; + } + + // ASCII is always OK. + if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) { + continue; + } + + // Convert the text locally. + if ( $supported_charsets ) { + if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) { + $value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] ); + continue; + } + } + + // utf8(mb3) can be handled by regex, which is a bunch faster than a DB lookup. + if ( 'utf8' === $charset || 'utf8mb3' === $charset ) { + $regex = '/ + ( + (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx + | [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx + | \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2 + | [\xE1-\xEC][\x80-\xBF]{2} + | \xED[\x80-\x9F][\x80-\xBF] + | [\xEE-\xEF][\x80-\xBF]{2} + ){1,100} # ...one or more times + ) + | . # anything else + /x'; + $value['value'] = preg_replace( $regex, '$1', $value['value'] ); + continue; + } + + // We couldn't use any local conversions, send it to the DB. + $value['db'] = $db_check_string = true; + } + unset( $value ); // Remove by reference. + + if ( $db_check_string ) { + $queries = array(); + foreach ( $data as $col => $value ) { + if ( ! empty( $value['db'] ) ) { + if ( ! isset( $queries[ $value['charset'] ] ) ) { + $queries[ $value['charset'] ] = array(); + } + + // Split the CONVERT() calls by charset, so we can make sure the connection is right + $queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] ); + } + } + + $connection_charset = $this->charset; + foreach ( $queries as $charset => $query ) { + if ( ! $query ) { + continue; + } + + // Change the charset to match the string(s) we're converting + if ( $charset !== $this->charset ) { + $this->set_charset( $this->dbh, $charset ); + } + + $row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N ); + if ( ! $row ) { + $this->set_charset( $this->dbh, $connection_charset ); + return new WP_Error( 'wpdb_strip_invalid_text_failure' ); + } + + $cols = array_keys( $query ); + $col_count = count( $cols ); + for ( $ii = 0; $ii < $col_count; $ii++ ) { + $data[ $cols[ $ii ] ]['value'] = $row[ $ii ]; + } + } + + // Don't forget to change the charset back! + if ( $connection_charset !== $this->charset ) { + $this->set_charset( $this->dbh, $connection_charset ); + } + } + + return $data; + } + + /** + * Strips any invalid characters from the query. + * + * @since 4.1.0 + * @access protected + * + * @param string $query Query to convert. + * @return string|WP_Error The converted query, or a `WP_Error` object if the conversion fails. + */ + protected function strip_invalid_text_from_query( $query ) { + $table = $this->get_table_from_query( $query ); + if ( $table ) { + $charset = $this->get_table_charset( $table ); + if ( is_wp_error( $charset ) ) { + return $charset; + } + + // We can't reliably strip text from tables containing binary/blob columns + if ( 'binary' === $charset ) { + return $query; + } + } else { + $charset = $this->charset; + } + + $data = array( + 'value' => $query, + 'charset' => $charset, + 'ascii' => false, + ); + + $data = $this->strip_invalid_text( array( $data ) ); + if ( is_wp_error( $data ) ) { + return $data; + } + + return $data[0]['value']; + } + + /** + * Strips any invalid characters from the string for a given table and column. + * + * @since 4.1.0 + * @access public + * + * @param string $table Table name. + * @param string $column Column name. + * @param string $value The text to check. + * @return string|WP_Error The converted string, or a `WP_Error` object if the conversion fails. + */ + public function strip_invalid_text_for_column( $table, $column, $value ) { + if ( $this->check_ascii( $value ) || ! is_string( $value ) ) { + return $value; + } + + $charset = $this->get_col_charset( $table, $column ); + if ( ! $charset ) { + // Not a string column. + return $value; + } elseif ( is_wp_error( $charset ) ) { + // Bail on real errors. + return $charset; + } + + $data = array( + $column => array( + 'value' => $value, + 'charset' => $charset, + 'ascii' => false, + ) + ); + + $data = $this->strip_invalid_text( $data ); + if ( is_wp_error( $data ) ) { + return $data; + } + + return $data[ $column ]['value']; + } + + /** + * Find the first table name referenced in a query. + * + * @since 4.1.0 + * @access protected + * + * @param string $query The query to search. + * @return string|bool $table The table name found, or false if a table couldn't be found. + */ + protected function get_table_from_query( $query ) { + // Remove characters that can legally trail the table name. + $query = rtrim( $query, ';/-#' ); + + // Allow (select...) union [...] style queries. Use the first query's table name. + $query = ltrim( $query, "\r\n\t (" ); + + /* + * Strip everything between parentheses except nested selects and use only 1,000 + * chars of the query. + */ + $query = preg_replace( '/\((?!\s*select)[^(]*?\)/is', '()', substr( $query, 0, 1000 ) ); + + // Quickly match most common queries. + if ( preg_match( '/^\s*(?:' + . 'SELECT.*?\s+FROM' + . '|INSERT(?:\s+LOW_PRIORITY|\s+DELAYED|\s+HIGH_PRIORITY)?(?:\s+IGNORE)?(?:\s+INTO)?' + . '|REPLACE(?:\s+LOW_PRIORITY|\s+DELAYED)?(?:\s+INTO)?' + . '|UPDATE(?:\s+LOW_PRIORITY)?(?:\s+IGNORE)?' + . '|DELETE(?:\s+LOW_PRIORITY|\s+QUICK|\s+IGNORE)*(?:\s+FROM)?' + . ')\s+`?([\w-]+)`?/is', $query, $maybe ) ) { + return $maybe[1]; + } + + // SHOW TABLE STATUS and SHOW TABLES + if ( preg_match( '/^\s*(?:' + . 'SHOW\s+TABLE\s+STATUS.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)' + . '|SHOW\s+(?:FULL\s+)?TABLES.+(?:LIKE\s+|WHERE\s+Name\s*=\s*)' + . ')\W([\w-]+)\W/is', $query, $maybe ) ) { + return $maybe[1]; + } + + // Big pattern for the rest of the table-related queries. + if ( preg_match( '/^\s*(?:' + . '(?:EXPLAIN\s+(?:EXTENDED\s+)?)?SELECT.*?\s+FROM' + . '|DESCRIBE|DESC|EXPLAIN|HANDLER' + . '|(?:LOCK|UNLOCK)\s+TABLE(?:S)?' + . '|(?:RENAME|OPTIMIZE|BACKUP|RESTORE|CHECK|CHECKSUM|ANALYZE|REPAIR).*\s+TABLE' + . '|TRUNCATE(?:\s+TABLE)?' + . '|CREATE(?:\s+TEMPORARY)?\s+TABLE(?:\s+IF\s+NOT\s+EXISTS)?' + . '|ALTER(?:\s+IGNORE)?\s+TABLE' + . '|DROP\s+TABLE(?:\s+IF\s+EXISTS)?' + . '|CREATE(?:\s+\w+)?\s+INDEX.*\s+ON' + . '|DROP\s+INDEX.*\s+ON' + . '|LOAD\s+DATA.*INFILE.*INTO\s+TABLE' + . '|(?:GRANT|REVOKE).*ON\s+TABLE' + . '|SHOW\s+(?:.*FROM|.*TABLE)' + . ')\s+\(*\s*`?([\w-]+)`?\s*\)*/is', $query, $maybe ) ) { + return $maybe[1]; + } + + return false; + } + /** * Load the column metadata from the last query. * @@ -2160,6 +2776,8 @@ class wpdb { return version_compare( $version, '4.1', '>=' ); case 'set_charset' : return version_compare( $version, '5.0.7', '>=' ); + case 'utf8mb4' : // @since 4.1.0 + return version_compare( $version, '5.5.3', '>=' ); } return false; diff --git a/tests/phpunit/tests/db.php b/tests/phpunit/tests/db.php index 62a056f05b..757fe9127b 100644 --- a/tests/phpunit/tests/db.php +++ b/tests/phpunit/tests/db.php @@ -13,6 +13,16 @@ class Tests_DB extends WP_UnitTestCase { */ protected $_queries = array(); + /** + * Our special WPDB + * @var resource + */ + protected static $_wpdb; + + public static function setUpBeforeClass() { + self::$_wpdb = new wpdb_exposed_methods_for_testing(); + } + /** * Set up the test fixture */ @@ -26,8 +36,8 @@ class Tests_DB extends WP_UnitTestCase { * Tear down the test fixture */ public function tearDown() { - parent::tearDown(); remove_filter( 'query', array( $this, 'query_filter' ) ); + parent::tearDown(); } /** @@ -223,6 +233,21 @@ class Tests_DB extends WP_UnitTestCase { $this->assertNotEmpty( $wpdb->dbh ); } + /** + * @ticket 21212 + */ + function test_wpdb_actually_protected_properties() { + global $wpdb; + + $new_meta = "HAHA I HOPE THIS DOESN'T WORK"; + + $col_meta = $wpdb->col_meta; + $wpdb->col_meta = $new_meta; + + $this->assertNotEquals( $col_meta, $new_meta ); + $this->assertEquals( $col_meta, $wpdb->col_meta ); + } + /** * @ticket 18510 */ @@ -493,4 +518,282 @@ class Tests_DB extends WP_UnitTestCase { $wpdb->query( 'DROP PROCEDURE IF EXISTS `test_mysqli_flush_sync_procedure`' ); $wpdb->suppress_errors( $suppress ); } + + /** + * @ticket 21212 + */ + function data_get_table_from_query() { + $table = 'a_test_table_name'; + + $queries = array( + // Basic + "SELECT * FROM $table", + "SELECT * FROM `$table`", + + "INSERT $table", + "INSERT IGNORE $table", + "INSERT IGNORE INTO $table", + "INSERT INTO $table", + "INSERT LOW_PRIORITY $table", + "INSERT DELAYED $table", + "INSERT HIGH_PRIORITY $table", + "INSERT LOW_PRIORITY IGNORE $table", + "INSERT LOW_PRIORITY INTO $table", + "INSERT LOW_PRIORITY IGNORE INTO $table", + + "REPLACE $table", + "REPLACE INTO $table", + "REPLACE LOW_PRIORITY $table", + "REPLACE DELAYED $table", + "REPLACE LOW_PRIORITY INTO $table", + + "UPDATE LOW_PRIORITY $table", + "UPDATE LOW_PRIORITY IGNORE $table", + + "DELETE $table", + "DELETE IGNORE $table", + "DELETE IGNORE FROM $table", + "DELETE FROM $table", + "DELETE LOW_PRIORITY $table", + "DELETE QUICK $table", + "DELETE IGNORE $table", + "DELETE LOW_PRIORITY FROM $table", + + // STATUS + "SHOW TABLE STATUS LIKE '$table'", + "SHOW TABLE STATUS WHERE NAME='$table'", + + "SHOW TABLES LIKE '$table'", + "SHOW FULL TABLES LIKE '$table'", + "SHOW TABLES WHERE NAME='$table'", + + // Extended + "EXPLAIN SELECT * FROM $table", + "EXPLAIN EXTENDED SELECT * FROM $table", + "EXPLAIN EXTENDED SELECT * FROM `$table`", + + "DESCRIBE $table", + "DESC $table", + "EXPLAIN $table", + "HANDLER $table", + + "LOCK TABLE $table", + "LOCK TABLES $table", + "UNLOCK TABLE $table", + + "RENAME TABLE $table", + "OPTIMIZE TABLE $table", + "BACKUP TABLE $table", + "RESTORE TABLE $table", + "CHECK TABLE $table", + "CHECKSUM TABLE $table", + "ANALYZE TABLE $table", + "REPAIR TABLE $table", + + "TRUNCATE $table", + "TRUNCATE TABLE $table", + + "CREATE TABLE $table", + "CREATE TEMPORARY TABLE $table", + "CREATE TABLE IF NOT EXISTS $table", + + "ALTER TABLE $table", + "ALTER IGNORE TABLE $table", + + "DROP TABLE $table", + "DROP TABLE IF EXISTS $table", + + "CREATE INDEX foo(bar(20)) ON $table", + "CREATE UNIQUE INDEX foo(bar(20)) ON $table", + "CREATE FULLTEXT INDEX foo(bar(20)) ON $table", + "CREATE SPATIAL INDEX foo(bar(20)) ON $table", + + "DROP INDEX foo ON $table", + + "LOAD DATA INFILE 'wp.txt' INTO TABLE $table", + "LOAD DATA LOW_PRIORITY INFILE 'wp.txt' INTO TABLE $table", + "LOAD DATA CONCURRENT INFILE 'wp.txt' INTO TABLE $table", + "LOAD DATA LOW_PRIORITY LOCAL INFILE 'wp.txt' INTO TABLE $table", + "LOAD DATA INFILE 'wp.txt' REPLACE INTO TABLE $table", + "LOAD DATA INFILE 'wp.txt' IGNORE INTO TABLE $table", + + "GRANT ALL ON TABLE $table", + "REVOKE ALL ON TABLE $table", + + "SHOW COLUMNS FROM $table", + "SHOW FULL COLUMNS FROM $table", + "SHOW CREATE TABLE $table", + "SHOW INDEX FROM $table", + ); + + foreach ( $queries as &$query ) { + $query = array( $query, $table ); + } + return $queries; + } + + /** + * @dataProvider data_get_table_from_query + * @ticket 21212 + */ + function test_get_table_from_query( $query, $table ) { + $this->assertEquals( $table, self::$_wpdb->get_table_from_query( $query ) ); + } + + function data_get_table_from_query_false() { + $table = 'a_test_table_name'; + return array( + array( "LOL THIS ISN'T EVEN A QUERY $table" ), + ); + } + + /** + * @dataProvider data_get_table_from_query_false + * @ticket 21212 + */ + function test_get_table_from_query_false( $query ) { + $this->assertFalse( self::$_wpdb->get_table_from_query( $query ) ); + } + + /** + * @ticket 21212 + */ + function data_process_field_formats() { + $core_db_fields_no_format_specified = array( + array( 'post_content' => 'foo', 'post_parent' => 0 ), + null, + array( + 'post_content' => array( 'value' => 'foo', 'format' => '%s' ), + 'post_parent' => array( 'value' => 0, 'format' => '%d' ), + ) + ); + + $core_db_fields_formats_specified = array( + array( 'post_content' => 'foo', 'post_parent' => 0 ), + array( '%d', '%s' ), // These override core field_types + array( + 'post_content' => array( 'value' => 'foo', 'format' => '%d' ), + 'post_parent' => array( 'value' => 0, 'format' => '%s' ), + ) + ); + + $misc_fields_no_format_specified = array( + array( 'this_is_not_a_core_field' => 'foo', 'this_is_not_either' => 0 ), + null, + array( + 'this_is_not_a_core_field' => array( 'value' => 'foo', 'format' => '%s' ), + 'this_is_not_either' => array( 'value' => 0, 'format' => '%s' ), + ) + ); + + $misc_fields_formats_specified = array( + array( 'this_is_not_a_core_field' => 0, 'this_is_not_either' => 1.2 ), + array( '%d', '%f' ), + array( + 'this_is_not_a_core_field' => array( 'value' => 0, 'format' => '%d' ), + 'this_is_not_either' => array( 'value' => 1.2, 'format' => '%f' ), + ) + ); + + $misc_fields_insufficient_formats_specified = array( + array( 'this_is_not_a_core_field' => 0, 'this_is_not_either' => 's', 'nor_this' => 1 ), + array( '%d', '%s' ), // The first format is used for the third + array( + 'this_is_not_a_core_field' => array( 'value' => 0, 'format' => '%d' ), + 'this_is_not_either' => array( 'value' => 's', 'format' => '%s' ), + 'nor_this' => array( 'value' => 1, 'format' => '%d' ), + ) + ); + + $vars = get_defined_vars(); + // Push the variable name onto the end for assertSame $message + foreach ( $vars as $var_name => $var ) { + $vars[ $var_name ][] = $var_name; + } + return array_values( $vars ); + } + + /** + * @dataProvider data_process_field_formats + * @ticket 21212 + */ + function test_process_field_formats( $data, $format, $expected, $message ) { + $actual = self::$_wpdb->process_field_formats( $data, $format ); + $this->assertSame( $expected, $actual, $message ); + } + + /** + * @ticket 21212 + */ + function test_process_fields() { + global $wpdb; + $data = array( 'post_content' => '¡foo foo foo!' ); + $expected = array( + 'post_content' => array( + 'value' => '¡foo foo foo!', + 'format' => '%s', + 'charset' => $wpdb->charset, + 'ascii' => false, + ) + ); + + $this->assertSame( $expected, self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); + } + + /** + * @ticket 21212 + * @depends test_process_fields + */ + function test_process_fields_on_nonexistent_table( $data ) { + self::$_wpdb->suppress_errors( true ); + $data = array( 'post_content' => '¡foo foo foo!' ); + $this->assertFalse( self::$_wpdb->process_fields( 'nonexistent_table', $data, null ) ); + self::$_wpdb->suppress_errors( false ); + } + + /** + * @ticket 21212 + */ + function test_pre_get_table_charset_filter() { + add_filter( 'pre_get_table_charset', array( $this, 'filter_pre_get_table_charset' ), 10, 2 ); + $charset = self::$_wpdb->get_table_charset( 'some_table' ); + remove_filter( 'pre_get_table_charset', array( $this, 'filter_pre_get_table_charset' ), 10 ); + + $this->assertEquals( $charset, 'fake_charset' ); + } + function filter_pre_get_table_charset( $charset, $table ) { + return 'fake_charset'; + } + + /** + * @ ticket 21212 + */ + function test_pre_get_col_charset_filter() { + add_filter( 'pre_get_col_charset', array( $this, 'filter_pre_get_col_charset' ), 10, 3 ); + $charset = self::$_wpdb->get_col_charset( 'some_table', 'some_col' ); + remove_filter( 'pre_get_col_charset', array( $this, 'filter_pre_get_col_charset' ), 10 ); + + $this->assertEquals( $charset, 'fake_col_charset' ); + } + function filter_pre_get_col_charset( $charset, $table, $column ) { + return 'fake_col_charset'; + } +} + +/** + * Special class for exposing protected wpdb methods we need to access + */ +class wpdb_exposed_methods_for_testing extends wpdb { + public function __construct() { + global $wpdb; + $this->dbh = $wpdb->dbh; + $this->use_mysqli = $wpdb->use_mysqli; + $this->ready = true; + $this->field_types = $wpdb->field_types; + $this->charset = $wpdb->charset; + } + + public function __call( $name, $arguments ) { + return call_user_func_array( array( $this, $name ), $arguments ); + } } diff --git a/tests/phpunit/tests/db/charset.php b/tests/phpunit/tests/db/charset.php new file mode 100755 index 0000000000..fcad682e04 --- /dev/null +++ b/tests/phpunit/tests/db/charset.php @@ -0,0 +1,409 @@ + array( + // latin1. latin1 never changes. + 'charset' => 'latin1', + 'value' => "\xf0\x9f\x8e\xb7", + 'expected' => "\xf0\x9f\x8e\xb7" + ), + 'ascii' => array( + // ascii gets special treatment, make sure it's covered + 'charset' => 'ascii', + 'value' => 'Hello World', + 'expected' => 'Hello World' + ), + 'utf8' => array( + // utf8 only allows <= 3-byte chars + 'charset' => 'utf8', + 'value' => "H€llo\xf0\x9f\x98\x88World¢", + 'expected' => 'H€lloWorld¢' + ), + 'utf8mb3' => array( + // utf8mb3 should behave the same an utf8 + 'charset' => 'utf8mb3', + 'value' => "H€llo\xf0\x9f\x98\x88World¢", + 'expected' => 'H€lloWorld¢' + ), + 'utf8mb4' => array( + // utf8mb4 allows 4-byte characters, too + 'charset' => 'utf8mb4', + 'value' => "H€llo\xf0\x9f\x98\x88World¢", + 'expected' => "H€llo\xf0\x9f\x98\x88World¢" + ), + 'koi8r' => array( + // koi8r is a character set that needs to be checked in MySQL + 'charset' => 'koi8r', + 'value' => "\xfdord\xf2ress", + 'expected' => "\xfdord\xf2ress", + 'db' => true + ), + 'hebrew' => array( + // hebrew needs to be checked in MySQL, too + 'charset' => 'hebrew', + 'value' => "\xf9ord\xf7ress", + 'expected' => "\xf9ord\xf7ress", + 'db' => true + ), + 'false' => array( + // false is a column with no character set (ie, a number column) + 'charset' => false, + 'value' => 100, + 'expected' => 100 + ), + ); + + // big5 is a non-Unicode multibyte charset + $utf8 = "a\xe5\x85\xb1b"; // UTF-8 Character 20849 + $big5 = mb_convert_encoding( $utf8, 'BIG-5', 'UTF-8' ); + $conv_utf8 = mb_convert_encoding( $big5, 'UTF-8', 'BIG-5' ); + // Make sure PHP's multibyte conversions are working correctly + $this->assertNotEquals( $utf8, $big5 ); + $this->assertEquals( $utf8, $conv_utf8 ); + + $fields['big5'] = array( + 'charset' => 'big5', + 'value' => $big5, + 'expected' => $big5 + ); + + // The data above is easy to edit. Now, prepare it for the data provider. + $data_provider = $multiple = $multiple_expected = array(); + foreach ( $fields as $test_case => $field ) { + $expected = $field; + $expected['value'] = $expected['expected']; + unset( $expected['expected'], $field['expected'] ); + + // We're keeping track of these for our multiple-field test. + $multiple[] = $field; + $multiple_expected[] = $expected; + + // strip_invalid_text() expects an array of fields. We're testing one field at a time. + $data = array( $field ); + $expected = array( $expected ); + + // First argument is field data. Second is expected. Third is the message. + $data_provider[] = array( $data, $expected, $test_case ); + } + + // Time for our test of multiple fields at once. + $data_provider[] = array( $multiple, $multiple_expected, 'multiple fields/charsets' ); + + return $data_provider; + } + + /** + * @dataProvider data_strip_invalid_text + * @ticket 21212 + */ + function test_strip_invalid_text( $data, $expected, $message ) { + if ( $data[0]['charset'] === 'koi8r' ) { + self::$_wpdb->query( 'SET NAMES koi8r' ); + } + $actual = self::$_wpdb->strip_invalid_text( $data ); + $this->assertSame( $expected, $actual, $message ); + } + + /** + * @ ticket 21212 + */ + function test_process_fields_failure() { + global $wpdb; + $data = array( 'post_content' => "H€llo\xf0\x9f\x98\x88World¢" ); + $this->assertFalse( self::$_wpdb->process_fields( $wpdb->posts, $data, null ) ); + } + + /** + * @ticket 21212 + */ + function data_process_field_charsets() { + $charset = $GLOBALS['wpdb']->charset; // This is how all tables were installed + // 'value' and 'format' are $data, 'charset' ends up as part of $expected + + $no_string_fields = array( + 'post_parent' => array( 'value' => 10, 'format' => '%d', 'charset' => false ), + 'comment_count' => array( 'value' => 0, 'format' => '%d', 'charset' => false ), + ); + + $all_ascii_fields = array( + 'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false ), + 'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false ), + ); + + // This is the same data used in process_field_charsets_for_nonexistent_table() + $non_ascii_string_fields = array( + 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ), + 'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ), + ); + + $vars = get_defined_vars(); + unset( $vars['charset'] ); + foreach ( $vars as $var_name => $var ) { + $data = $expected = $var; + foreach ( $data as &$datum ) { + // 'charset' and 'ascii' are part of the expected return only. + unset( $datum['charset'], $datum['ascii'] ); + } + + $vars[ $var_name ] = array( $data, $expected, $var_name ); + } + + return array_values( $vars ); + } + + /** + * @dataProvider data_process_field_charsets + * @ticket 21212 + */ + function test_process_field_charsets( $data, $expected, $message ) { + $actual = self::$_wpdb->process_field_charsets( $data, $GLOBALS['wpdb']->posts ); + $this->assertSame( $expected, $actual, $message ); + } + + /** + * The test this test depends on first verifies that this + * would normally work against the posts table. + * + * @ticket 21212 + * @depends test_process_field_charsets + */ + function test_process_field_charsets_on_nonexistent_table() { + $data = array( 'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s' ) ); + self::$_wpdb->suppress_errors( true ); + $this->assertFalse( self::$_wpdb->process_field_charsets( $data, 'nonexistent_table' ) ); + self::$_wpdb->suppress_errors( false ); + } + + /** + * @ticket 21212 + */ + function test_check_ascii() { + $ascii = "\0\t\n\r '" . '!"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'; + $this->assertTrue( self::$_wpdb->check_ascii( $ascii ) ); + } + + /** + * @ticket 21212 + */ + function test_check_ascii_false() { + $this->assertFalse( self::$_wpdb->check_ascii( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ¡©«' ) ); + } + + /** + * @ticket 21212 + */ + function test_strip_invalid_text_for_column() { + global $wpdb; + // Invalid 3-byte and 4-byte sequences + $value = "H€llo\xe0\x80\x80World\xf0\xff\xff\xff¢"; + $expected = "H€lloWorld¢"; + $actual = $wpdb->strip_invalid_text_for_column( $wpdb->posts, 'post_content', $value ); + $this->assertEquals( $expected, $actual ); + } + + /** + * Set of table definitions for testing wpdb::get_table_charset and wpdb::get_column_charset + * @var array + */ + protected $table_and_column_defs = array( + array( + 'definition' => '( a INT, b FLOAT )', + 'table_expected' => 'latin1', + 'column_expected' => array( 'a' => false, 'b' => false ) + ), + array( + 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET big5 )', + 'table_expected' => 'big5', + 'column_expected' => array( 'a' => 'big5', 'b' => 'big5' ) + ), + array( + 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b BINARY )', + 'table_expected' => 'binary', + 'column_expected' => array( 'a' => 'big5', 'b' => false ) + ), + array( + 'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b BLOB )', + 'table_expected' => 'binary', + 'column_expected' => array( 'a' => 'latin1', 'b' => false ) + ), + array( + 'definition' => '( a VARCHAR(50) CHARACTER SET latin1, b TEXT CHARACTER SET koi8r )', + 'table_expected' => 'koi8r', + 'column_expected' => array( 'a' => 'latin1', 'b' => 'koi8r' ) + ), + array( + 'definition' => '( a VARCHAR(50) CHARACTER SET utf8mb3, b TEXT CHARACTER SET utf8mb3 )', + 'table_expected' => 'utf8', + 'column_expected' => array( 'a' => 'utf8', 'b' => 'utf8' ) + ), + array( + 'definition' => '( a VARCHAR(50) CHARACTER SET utf8, b TEXT CHARACTER SET utf8mb4 )', + 'table_expected' => 'utf8', + 'column_expected' => array( 'a' => 'utf8', 'b' => 'utf8mb4' ) + ), + array( + 'definition' => '( a VARCHAR(50) CHARACTER SET big5, b TEXT CHARACTER SET koi8r )', + 'table_expected' => 'ascii', + 'column_expected' => array( 'a' => 'big5', 'b' => 'koi8r' ) + ), + ); + + /** + * @ticket 21212 + */ + function data_test_get_table_charset() { + $table_name = 'test_get_table_charset'; + + $vars = array(); + foreach( $this->table_and_column_defs as $value ) { + $this_table_name = $table_name . '_' . rand_str( 5 ); + $drop = "DROP TABLE IF EXISTS $this_table_name"; + $create = "CREATE TABLE $this_table_name {$value['definition']}"; + $vars[] = array( $drop, $create, $this_table_name, $value['table_expected'] ); + } + + return $vars; + } + + /** + * @dataProvider data_test_get_table_charset + * @ticket 21212 + */ + function test_get_table_charset( $drop, $create, $table, $expected_charset ) { + self::$_wpdb->query( $drop ); + + if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { + $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); + return; + } + + self::$_wpdb->query( $create ); + + $charset = self::$_wpdb->get_table_charset( $table ); + $this->assertEquals( $charset, $expected_charset ); + + self::$_wpdb->query( $drop ); + } + + /** + * @ticket 21212 + */ + function data_test_get_column_charset() { + $table_name = 'test_get_column_charset'; + + $vars = array(); + foreach( $this->table_and_column_defs as $value ) { + $this_table_name = $table_name . '_' . rand_str( 5 ); + $drop = "DROP TABLE IF EXISTS $this_table_name"; + $create = "CREATE TABLE $this_table_name {$value['definition']}"; + $vars[] = array( $drop, $create, $this_table_name, $value['column_expected'] ); + } + + return $vars; + } + + /** + * @dataProvider data_test_get_column_charset + * @ticket 21212 + */ + function test_get_column_charset( $drop, $create, $table, $expected_charset ) { + self::$_wpdb->query( $drop ); + + if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { + $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); + return; + } + + self::$_wpdb->query( $create ); + + foreach ( $expected_charset as $column => $charset ) { + $this->assertEquals( $charset, self::$_wpdb->get_col_charset( $table, $column ) ); + } + + self::$_wpdb->query( $drop ); + } + + /** + * @ticket 21212 + */ + function data_strip_invalid_text_from_query() { + $table_name = 'strip_invalid_text_from_query_table'; + $data = array( + array( + // binary tables don't get stripped + "( a VARCHAR(50) CHARACTER SET utf8, b BINARY )", // create + "('foo\xf0\x9f\x98\x88bar', 'foo')", // query + "('foo\xf0\x9f\x98\x88bar', 'foo')" // expected result + ), + array( + // utf8/utf8mb4 tables default to utf8 + "( a VARCHAR(50) CHARACTER SET utf8, b VARCHAR(50) CHARACTER SET utf8mb4 )", + "('foo\xf0\x9f\x98\x88bar', 'foo')", + "('foobar', 'foo')" + ), + ); + + foreach( $data as &$value ) { + $this_table_name = $table_name . '_' . rand_str( 5 ); + + $value[0] = "CREATE TABLE $this_table_name {$value[0]}"; + $value[1] = "INSERT INTO $this_table_name VALUES {$value[1]}"; + $value[2] = "INSERT INTO $this_table_name VALUES {$value[2]}"; + $value[3] = "DROP TABLE IF EXISTS $this_table_name"; + } + unset( $value ); + + return $data; + } + + /** + * @dataProvider data_strip_invalid_text_from_query + * @ticket 21212 + */ + function test_strip_invalid_text_from_query( $create, $query, $expected, $drop ) { + self::$_wpdb->query( $drop ); + + if ( ! self::$_wpdb->has_cap( 'utf8mb4' ) && preg_match( '/utf8mb[34]/i', $create ) ) { + $this->markTestSkipped( "This version of MySQL doesn't support utf8mb4." ); + return; + } + + self::$_wpdb->query( $create ); + + $return = self::$_wpdb->strip_invalid_text_from_query( $query ); + $this->assertEquals( $expected, $return ); + + self::$_wpdb->query( $drop ); + } + + /** + * @ticket 21212 + */ + function test_invalid_characters_in_query() { + global $wpdb; + $this->assertFalse( $wpdb->query( "INSERT INTO {$wpdb->posts} (post_content) VALUES ('foo\xf0\x9f\x98\x88bar')" ) ); + } +}