WPDB: When checking that a string can be sent to MySQL, we shouldn't use mb_convert_encoding(), as it behaves differently to MySQL's character encoding conversion.

Props mdawaffe, pento, nbachiyski, jorbin, johnjamesjacoby, jeremyfelt.

See #32165.



git-svn-id: https://develop.svn.wordpress.org/trunk@32364 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Gary Pendergast 2015-05-06 02:59:50 +00:00
parent 35db6d722b
commit 2ce97b2984
9 changed files with 622 additions and 106 deletions

View File

@ -527,7 +527,7 @@ function upgrade_all() {
if ( $wp_current_db_version < 31351 )
upgrade_420();
if ( $wp_current_db_version < 32308 )
if ( $wp_current_db_version < 32364 )
upgrade_430();
maybe_disable_link_manager();
@ -1446,17 +1446,33 @@ function upgrade_420() {
function upgrade_430() {
global $wp_current_db_version, $wpdb;
if ( $wp_current_db_version < 32308 ) {
if ( $wp_current_db_version < 32364 ) {
$content_length = $wpdb->get_col_length( $wpdb->comments, 'comment_content' );
if ( ! $content_length ) {
$content_length = 65535;
if ( false === $content_length ) {
$content_length = array(
'type' => 'byte',
'length' => 65535,
);
} elseif ( ! is_array( $content_length ) ) {
$length = (int) $content_length > 0 ? (int) $content_length : 65535;
$content_length = array(
'type' => 'byte',
'length' => $length
);
}
if ( 'byte' !== $content_length['type'] ) {
// Sites with malformed DB schemas are on their own.
return;
}
$allowed_length = intval( $content_length['length'] ) - 10;
$comments = $wpdb->get_results(
"SELECT comment_ID FROM $wpdb->comments
WHERE comment_date_gmt > '2015-04-26'
AND CHAR_LENGTH( comment_content ) >= $content_length
AND ( comment_content LIKE '%<%' OR comment_content LIKE '%>%' )"
"SELECT `comment_ID` FROM `{$wpdb->comments}`
WHERE `comment_date_gmt` > '2015-04-26'
AND LENGTH( `comment_content` ) >= {$allowed_length}
AND ( `comment_content` LIKE '%<%' OR `comment_content` LIKE '%>%' )"
);
foreach ( $comments as $comment ) {

View File

@ -2118,17 +2118,7 @@ function wp_insert_comment( $commentdata ) {
$compacted = compact( 'comment_post_ID', 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_author_IP', 'comment_date', 'comment_date_gmt', 'comment_content', 'comment_karma', 'comment_approved', 'comment_agent', 'comment_type', 'comment_parent', 'user_id' );
if ( ! $wpdb->insert( $wpdb->comments, $compacted ) ) {
$fields = array( 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_content' );
foreach( $fields as $field ) {
if ( isset( $compacted[ $field ] ) ) {
$compacted[ $field ] = $wpdb->strip_invalid_text_for_column( $wpdb->comments, $field, $compacted[ $field ] );
}
}
if ( ! $wpdb->insert( $wpdb->comments, $compacted ) ) {
return false;
}
return false;
}
$id = (int) $wpdb->insert_id;
@ -2252,6 +2242,8 @@ function wp_throttle_comment_flood($block, $time_lastcomment, $time_newcomment)
* @return int|bool The ID of the comment on success, false on failure.
*/
function wp_new_comment( $commentdata ) {
global $wpdb;
if ( isset( $commentdata['user_ID'] ) ) {
$commentdata['user_id'] = $commentdata['user_ID'] = (int) $commentdata['user_ID'];
}
@ -2295,7 +2287,22 @@ function wp_new_comment( $commentdata ) {
$comment_ID = wp_insert_comment($commentdata);
if ( ! $comment_ID ) {
return false;
$fields = array( 'comment_author', 'comment_author_email', 'comment_author_url', 'comment_content' );
foreach( $fields as $field ) {
if ( isset( $commentdata[ $field ] ) ) {
$commentdata[ $field ] = $wpdb->strip_invalid_text_for_column( $wpdb->comments, $field, $commentdata[ $field ] );
}
}
$commentdata = wp_filter_comment( $commentdata );
$commentdata['comment_approved'] = wp_allow_comment( $commentdata );
$comment_ID = wp_insert_comment( $commentdata );
if ( ! $comment_ID ) {
return false;
}
}
/**

View File

@ -13,23 +13,85 @@ if ( !function_exists('_') ) {
}
}
/**
* Returns whether PCRE/u (PCRE_UTF8 modifier) is available for use.
*
* @ignore
* @since 4.2.2
* @access private
*
* @param bool $set - Used for testing only
* null : default - get PCRE/u capability
* false : Used for testing - return false for future calls to this function
* 'reset': Used for testing - restore default behavior of this function
*/
function _wp_can_use_pcre_u( $set = null ) {
static $utf8_pcre = 'reset';
if ( null !== $set ) {
$utf8_pcre = $set;
}
if ( 'reset' === $utf8_pcre ) {
$utf8_pcre = @preg_match( '/^./u', 'a' );
}
return $utf8_pcre;
}
if ( ! function_exists( 'mb_substr' ) ) :
function mb_substr( $str, $start, $length = null, $encoding = null ) {
return _mb_substr( $str, $start, $length, $encoding );
}
endif;
/*
* Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit.
* For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
* The behavior of this function for invalid inputs is undefined.
*/
function _mb_substr( $str, $start, $length = null, $encoding = null ) {
if ( null === $encoding ) {
$encoding = get_option( 'blog_charset' );
}
// The solution below works only for UTF-8,
// so in case of a different charset just use built-in substr()
$charset = get_option( 'blog_charset' );
if ( ! in_array( $charset, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
return is_null( $length ) ? substr( $str, $start ) : substr( $str, $start, $length );
}
// Use the regex unicode support to separate the UTF-8 characters into an array
preg_match_all( '/./us', $str, $match );
$chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
return implode( '', $chars );
if ( _wp_can_use_pcre_u() ) {
// Use the regex unicode support to separate the UTF-8 characters into an array
preg_match_all( '/./us', $str, $match );
$chars = is_null( $length ) ? array_slice( $match[0], $start ) : array_slice( $match[0], $start, $length );
return implode( '', $chars );
}
$regex = '/(
[\x00-\x7F] # single-byte sequences 0xxxxxxx
| [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [\xE1-\xEC][\x80-\xBF]{2}
| \xED[\x80-\x9F][\x80-\xBF]
| [\xEE-\xEF][\x80-\xBF]{2}
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
| [\xF1-\xF3][\x80-\xBF]{3}
| \xF4[\x80-\x8F][\x80-\xBF]{2}
)/x';
$chars = array( '' ); // Start with 1 element instead of 0 since the first thing we do is pop
do {
// We had some string left over from the last round, but we counted it in that last round.
array_pop( $chars );
// Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
$pieces = preg_split( $regex, $str, 1000, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
$chars = array_merge( $chars, $pieces );
} while ( count( $pieces ) > 1 && $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
return join( '', array_slice( $chars, $start, $length ) );
}
if ( ! function_exists( 'mb_strlen' ) ) :
@ -38,16 +100,54 @@ if ( ! function_exists( 'mb_strlen' ) ) :
}
endif;
/*
* Only understands UTF-8 and 8bit. All other character sets will be treated as 8bit.
* For $encoding === UTF-8, the $str input is expected to be a valid UTF-8 byte sequence.
* The behavior of this function for invalid inputs is undefined.
*/
function _mb_strlen( $str, $encoding = null ) {
if ( null === $encoding ) {
$encoding = get_option( 'blog_charset' );
}
// The solution below works only for UTF-8,
// so in case of a different charset just use built-in strlen()
$charset = get_option( 'blog_charset' );
if ( ! in_array( $charset, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
if ( ! in_array( $encoding, array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ) ) {
return strlen( $str );
}
// Use the regex unicode support to separate the UTF-8 characters into an array
preg_match_all( '/./us', $str, $match );
return count( $match[0] );
if ( _wp_can_use_pcre_u() ) {
// Use the regex unicode support to separate the UTF-8 characters into an array
preg_match_all( '/./us', $str, $match );
return count( $match[0] );
}
$regex = '/(?:
[\x00-\x7F] # single-byte sequences 0xxxxxxx
| [\xC2-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
| \xE0[\xA0-\xBF][\x80-\xBF] # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [\xE1-\xEC][\x80-\xBF]{2}
| \xED[\x80-\x9F][\x80-\xBF]
| [\xEE-\xEF][\x80-\xBF]{2}
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
| [\xF1-\xF3][\x80-\xBF]{3}
| \xF4[\x80-\x8F][\x80-\xBF]{2}
)/x';
$count = 1; // Start at 1 instead of 0 since the first thing we do is decrement
do {
// We had some string left over from the last round, but we counted it in that last round.
$count--;
// Split by UTF-8 character, limit to 1000 characters (last array element will contain the rest of the string)
$pieces = preg_split( $regex, $str, 1000 );
// Increment
$count += count( $pieces );
} while ( $str = array_pop( $pieces ) ); // If there's anything left over, repeat the loop.
// Fencepost: preg_split() always returns one extra item in the array
return --$count;
}
if ( !function_exists('hash_hmac') ):

View File

@ -11,7 +11,7 @@ $wp_version = '4.3-alpha-32280-src';
*
* @global int $wp_db_version
*/
$wp_db_version = 32308;
$wp_db_version = 32364;
/**
* Holds the TinyMCE version

View File

@ -1809,6 +1809,8 @@ class wpdb {
* @return int|false The number of rows affected, or false on error.
*/
function _insert_replace_helper( $table, $data, $format = null, $type = 'INSERT' ) {
$this->insert_id = 0;
if ( ! in_array( strtoupper( $type ), array( 'REPLACE', 'INSERT' ) ) ) {
return false;
}
@ -1829,7 +1831,6 @@ class wpdb {
$sql = "$type INTO `$table` ($fields) VALUES ($formats)";
$this->insert_id = 0;
$this->check_current_query = false;
return $this->query( $this->prepare( $sql, $values ) );
}
@ -2021,17 +2022,11 @@ class wpdb {
// We can skip this field if we know it isn't a string.
// This checks %d/%f versus ! %s because it's sprintf() could take more.
$value['charset'] = false;
} elseif ( $this->check_ascii( $value['value'] ) ) {
// If it's ASCII, then we don't need the charset. We can skip this field.
$value['charset'] = false;
} else {
$value['charset'] = $this->get_col_charset( $table, $field );
if ( is_wp_error( $value['charset'] ) ) {
return false;
}
// This isn't ASCII. Don't have strip_invalid_text() re-check.
$value['ascii'] = false;
}
$data[ $field ] = $value;
@ -2064,10 +2059,6 @@ class wpdb {
}
}
if ( false !== $value['length'] && mb_strlen( $value['value'] ) > $value['length'] ) {
return false;
}
$data[ $field ] = $value;
}
@ -2406,14 +2397,16 @@ class wpdb {
/**
* Retrieve the maximum string length allowed in a given column.
* The length may either be specified as a byte length or a character length.
*
* @since 4.2.1
* @access public
*
* @param string $table Table name.
* @param string $column Column name.
* @return mixed Max column length as an int. False if the column has no
* length. WP_Error object if there was an error.
* @return mixed array( 'length' => (int), 'type' => 'byte' | 'char' )
* false if the column has no length (for example, numeric column)
* WP_Error object if there was an error.
*/
public function get_col_length( $table, $column ) {
$tablekey = strtolower( $table );
@ -2446,27 +2439,47 @@ class wpdb {
}
switch( $type ) {
case 'binary':
case 'char':
case 'varbinary':
case 'varchar':
return $length;
return array(
'type' => 'char',
'length' => (int) $length,
);
break;
case 'binary':
case 'varbinary':
return array(
'type' => 'byte',
'length' => (int) $length,
);
break;
case 'tinyblob':
case 'tinytext':
return 255; // 2^8 - 1
return array(
'type' => 'byte',
'length' => 255, // 2^8 - 1
);
break;
case 'blob':
case 'text':
return 65535; // 2^16 - 1
return array(
'type' => 'byte',
'length' => 65535, // 2^16 - 1
);
break;
case 'mediumblob':
case 'mediumtext':
return 16777215; // 2^24 - 1
return array(
'type' => 'byte',
'length' => 16777215, // 2^24 - 1
);
break;
case 'longblob':
case 'longtext':
return 4294967295; // 2^32 - 1
return array(
'type' => 'byte',
'length' => 4294967295, // 2^32 - 1
);
break;
default:
return false;
@ -2572,50 +2585,55 @@ class wpdb {
* remove invalid characters, a WP_Error object is returned.
*/
protected function strip_invalid_text( $data ) {
// Some multibyte character sets that we can check in PHP.
$mb_charsets = array(
'ascii' => 'ASCII',
'big5' => 'BIG-5',
'eucjpms' => 'eucJP-win',
'gb2312' => 'EUC-CN',
'ujis' => 'EUC-JP',
'utf32' => 'UTF-32',
);
$supported_charsets = array();
if ( function_exists( 'mb_list_encodings' ) ) {
$supported_charsets = mb_list_encodings();
}
$db_check_string = false;
foreach ( $data as &$value ) {
$charset = $value['charset'];
// Column isn't a string, or is latin1, which will will happily store anything.
if ( false === $charset || 'latin1' === $charset ) {
if ( is_array( $value['length'] ) ) {
$length = $value['length']['length'];
} else {
$length = false;
}
// There's no charset to work with.
if ( false === $charset ) {
continue;
}
// Column isn't a string.
if ( ! is_string( $value['value'] ) ) {
continue;
}
// ASCII is always OK.
if ( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) ) {
continue;
$truncate_by_byte_length = 'byte' === $value['length']['type'];
$needs_validation = true;
if (
// latin1 can store any byte sequence
'latin1' === $charset
||
// ASCII is always OK.
( ! isset( $value['ascii'] ) && $this->check_ascii( $value['value'] ) )
) {
$truncate_by_byte_length = true;
$needs_validation = false;
}
// Convert the text locally.
if ( $supported_charsets ) {
if ( isset( $mb_charsets[ $charset ] ) && in_array( $mb_charsets[ $charset ], $supported_charsets ) ) {
$value['value'] = mb_convert_encoding( $value['value'], $mb_charsets[ $charset ], $mb_charsets[ $charset ] );
if ( $truncate_by_byte_length ) {
mbstring_binary_safe_encoding();
if ( false !== $length && strlen( $value['value'] ) > $length ) {
$value['value'] = substr( $value['value'], 0, $length );
}
reset_mbstring_encoding();
if ( ! $needs_validation ) {
continue;
}
}
// utf8 can be handled by regex, which is a bunch faster than a DB lookup.
if ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) {
if ( ( 'utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset ) && function_exists( 'mb_strlen' ) ) {
$regex = '/
(
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
@ -2625,7 +2643,7 @@ class wpdb {
| \xED[\x80-\x9F][\x80-\xBF]
| [\xEE-\xEF][\x80-\xBF]{2}';
if ( 'utf8mb4' === $charset) {
if ( 'utf8mb4' === $charset ) {
$regex .= '
| \xF0[\x90-\xBF][\x80-\xBF]{2} # four-byte sequences 11110xxx 10xxxxxx * 3
| [\xF1-\xF3][\x80-\xBF]{3}
@ -2638,6 +2656,11 @@ class wpdb {
| . # anything else
/x';
$value['value'] = preg_replace( $regex, '$1', $value['value'] );
if ( false !== $length && mb_strlen( $value['value'], 'UTF-8' ) > $length ) {
$value['value'] = mb_substr( $value['value'], 0, $length, 'UTF-8' );
}
continue;
}
@ -2654,8 +2677,14 @@ class wpdb {
$queries[ $value['charset'] ] = array();
}
// Split the CONVERT() calls by charset, so we can make sure the connection is right
$queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( %s USING {$value['charset']} )", $value['value'] );
// We're going to need to truncate by characters or bytes, depending on the length value we have.
if ( 'byte' === $value['length']['type'] ) {
// Split the CONVERT() calls by charset, so we can make sure the connection is right
$queries[ $value['charset'] ][ $col ] = $this->prepare( "CONVERT( LEFT( CONVERT( %s USING binary ), %d ) USING {$value['charset']} )", $value['value'], $value['length']['length'] );
} else {
$queries[ $value['charset'] ][ $col ] = $this->prepare( "LEFT( CONVERT( %s USING {$value['charset']} ), %d )", $value['value'], $value['length']['length'] );
}
unset( $data[ $col ]['db'] );
}
}
@ -2674,16 +2703,19 @@ class wpdb {
$this->check_current_query = false;
$row = $this->get_row( "SELECT " . implode( ', ', $query ), ARRAY_N );
$sql = array();
foreach ( $query as $column => $column_query ) {
$sql[] = $column_query . " AS x_$column";
}
$row = $this->get_row( "SELECT " . implode( ', ', $sql ), ARRAY_A );
if ( ! $row ) {
$this->set_charset( $this->dbh, $connection_charset );
return new WP_Error( 'wpdb_strip_invalid_text_failure' );
}
$cols = array_keys( $query );
$col_count = count( $cols );
for ( $ii = 0; $ii < $col_count; $ii++ ) {
$data[ $cols[ $ii ] ]['value'] = $row[ $ii ];
foreach ( array_keys( $query ) as $column ) {
$data[ $column ]['value'] = $row["x_$column"];
}
}
@ -2725,6 +2757,7 @@ class wpdb {
'value' => $query,
'charset' => $charset,
'ascii' => false,
'length' => false,
);
$data = $this->strip_invalid_text( array( $data ) );
@ -2747,7 +2780,7 @@ class wpdb {
* @return string|WP_Error The converted string, or a WP_Error object if the conversion fails.
*/
public function strip_invalid_text_for_column( $table, $column, $value ) {
if ( ! is_string( $value ) || $this->check_ascii( $value ) ) {
if ( ! is_string( $value ) ) {
return $value;
}
@ -2764,7 +2797,7 @@ class wpdb {
$column => array(
'value' => $value,
'charset' => $charset,
'ascii' => false,
'length' => $this->get_col_length( $table, $column ),
)
);

View File

@ -121,7 +121,8 @@ class Tests_Comment extends WP_UnitTestCase {
$_SERVER['REMOTE_ADDR'] = '';
}
$post_id = $this->factory->post->create();
$u = $this->factory->user->create();
$post_id = $this->factory->post->create( array( 'post_author' => $u ) );
$data = array(
'comment_post_ID' => $post_id,
@ -136,7 +137,9 @@ class Tests_Comment extends WP_UnitTestCase {
$id = wp_new_comment( $data );
$this->assertFalse( $id );
$comment = get_comment( $id );
$this->assertEquals( strlen( $comment->comment_content ), 65535 );
// Cleanup.
if ( isset( $remote_addr ) ) {

View File

@ -2,13 +2,166 @@
/**
* @group compat
* @group security-153
*/
class Tests_Compat extends WP_UnitTestCase {
function test_mb_substr() {
$this->assertEquals('баб', _mb_substr('баба', 0, 3));
$this->assertEquals('баб', _mb_substr('баба', 0, -1));
$this->assertEquals('баб', _mb_substr('баба', 0, -1));
$this->assertEquals('I am your б', _mb_substr('I am your баба', 0, 11));
function utf8_string_lengths() {
return array(
// string, character_length, byte_length
array( 'баба', 4, 8 ),
array( 'баб', 3, 6 ),
array( 'I am your б', 11, 12 ),
array( '1111111111', 10, 10 ),
array( '²²²²²²²²²²', 10, 20 ),
array( '', 10, 30 ),
array( '𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜𝟜', 10, 40 ),
array( '1²𝟜𝟜𝟜', 12, 30 ),
);
}
function utf8_substrings() {
return array(
// string, start, length, character_substring, byte_substring
array( 'баба', 0, 3, 'баб', "б\xD0" ),
array( 'баба', 0, -1, 'баб', "баб\xD0" ),
array( 'баба', 1, null, 'аба', "\xB1аба" ),
array( 'баба', -3, null, 'аба', "\xB1а" ),
array( 'баба', -3, 2, 'аб', "\xB1\xD0" ),
array( 'баба', -1, 2, 'а', "\xB0" ),
array( 'I am your баба', 0, 11, 'I am your б', "I am your \xD0" ),
);
}
/**
* @dataProvider utf8_string_lengths
*/
function test_mb_strlen( $string, $expected_character_length ) {
$this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) );
}
/**
* @dataProvider utf8_string_lengths
*/
function test_mb_strlen_via_regex( $string, $expected_character_length ) {
_wp_can_use_pcre_u( false );
$this->assertEquals( $expected_character_length, _mb_strlen( $string, 'UTF-8' ) );
_wp_can_use_pcre_u( 'reset' );
}
/**
* @dataProvider utf8_string_lengths
*/
function test_8bit_mb_strlen( $string, $expected_character_length, $expected_byte_length ) {
$this->assertEquals( $expected_byte_length, _mb_strlen( $string, '8bit' ) );
}
/**
* @dataProvider utf8_substrings
*/
function test_mb_substr( $string, $start, $length, $expected_character_substring ) {
$this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) );
}
/**
* @dataProvider utf8_substrings
*/
function test_mb_substr_via_regex( $string, $start, $length, $expected_character_substring ) {
_wp_can_use_pcre_u( false );
$this->assertEquals( $expected_character_substring, _mb_substr( $string, $start, $length, 'UTF-8' ) );
_wp_can_use_pcre_u( 'reset' );
}
/**
* @dataProvider utf8_substrings
*/
function test_8bit_mb_substr( $string, $start, $length, $expected_character_substring, $expected_byte_substring ) {
$this->assertEquals( $expected_byte_substring, _mb_substr( $string, $start, $length, '8bit' ) );
}
function test_mb_substr_phpcore(){
/* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_basic.phpt */
$string_ascii = 'ABCDEF';
$string_mb = base64_decode('5pel5pys6Kqe44OG44Kt44K544OI44Gn44GZ44CCMDEyMzTvvJXvvJbvvJfvvJjvvJnjgII=');
$this->assertEquals( 'DEF', _mb_substr($string_ascii, 3) );
$this->assertEquals( 'DEF', _mb_substr($string_ascii, 3, 5, 'ISO-8859-1') );
// specific latin-1 as that is the default the core php test opporates under
$this->assertEquals( 'peacrOiqng==' , base64_encode( _mb_substr($string_mb, 2, 7, 'latin-1' ) ) );
$this->assertEquals( '6Kqe44OG44Kt44K544OI44Gn44GZ', base64_encode( _mb_substr($string_mb, 2, 7, 'utf-8') ) );
/* https://github.com/php/php-src/blob/php-5.6.8/ext/mbstring/tests/mb_substr_variation1.phpt */
$start = 0;
$length = 5;
$unset_var = 10;
unset ($unset_var);
$heredoc = <<<EOT
hello world
EOT;
$inputs = array(
/*1*/ 0,
1,
12345,
-2345,
// float data
/*5*/ 10.5,
-10.5,
12.3456789000e10,
12.3456789000E-10,
.5,
// null data
/*10*/ NULL,
null,
// boolean data
/*12*/ true,
false,
TRUE,
FALSE,
// empty data
/*16*/ "",
'',
// string data
/*18*/ "string",
'string',
$heredoc,
// object data
/*21*/ new classA(),
// undefined data
/*22*/ @$undefined_var,
// unset data
/*23*/ @$unset_var,
);
$outputs = array(
"0",
"1",
"12345",
"-2345",
"10.5",
"-10.5",
"12345",
"1.234",
"0.5",
"",
"",
"1",
"",
"1",
"",
"",
"",
"strin",
"strin",
"hello",
"Class",
"",
"",
);
$iterator = 0;
foreach($inputs as $input) {
$this->assertEquals( $outputs[$iterator] , _mb_substr($input, $start, $length) );
$iterator++;
}
}
function test_hash_hmac_simple() {
@ -34,3 +187,10 @@ class Tests_Compat extends WP_UnitTestCase {
$this->assertEquals( array( 'foo' ), $json->decode( '["foo"]' ) );
}
}
/* used in test_mb_substr_phpcore */
class classA {
public function __toString() {
return "Class A object";
}
}

View File

@ -746,7 +746,6 @@ class Tests_DB extends WP_UnitTestCase {
'value' => '¡foo foo foo!',
'format' => '%s',
'charset' => $expected_charset,
'ascii' => false,
'length' => $wpdb->get_col_length( $wpdb->posts, 'post_content' ),
)
);

View File

@ -6,6 +6,7 @@ require_once dirname( dirname( __FILE__ ) ) . '/db.php';
* Test WPDB methods
*
* @group wpdb
* @group security-153
*/
class Tests_DB_Charset extends WP_UnitTestCase {
@ -28,57 +29,227 @@ class Tests_DB_Charset extends WP_UnitTestCase {
// latin1. latin1 never changes.
'charset' => 'latin1',
'value' => "\xf0\x9f\x8e\xb7",
'expected' => "\xf0\x9f\x8e\xb7"
'expected' => "\xf0\x9f\x8e\xb7",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'latin1_char_length' => array(
// latin1. latin1 never changes.
'charset' => 'latin1',
'value' => str_repeat( 'A', 11 ),
'expected' => str_repeat( 'A', 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'latin1_byte_length' => array(
// latin1. latin1 never changes.
'charset' => 'latin1',
'value' => str_repeat( 'A', 11 ),
'expected' => str_repeat( 'A', 10 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'ascii' => array(
// ascii gets special treatment, make sure it's covered
'charset' => 'ascii',
'value' => 'Hello World',
'expected' => 'Hello World'
'expected' => 'Hello World',
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'ascii_char_length' => array(
// ascii gets special treatment, make sure it's covered
'charset' => 'ascii',
'value' => str_repeat( 'A', 11 ),
'expected' => str_repeat( 'A', 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'ascii_byte_length' => array(
// ascii gets special treatment, make sure it's covered
'charset' => 'ascii',
'value' => str_repeat( 'A', 11 ),
'expected' => str_repeat( 'A', 10 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => 'H€lloWorld¢'
'expected' => 'H€lloWorld¢',
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'utf8_23char_length' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => str_repeat( "²3", 10 ),
'expected' => str_repeat( "²3", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8_23byte_length' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => str_repeat( "²3", 10 ),
'expected' => "²3²3",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8_3char_length' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => str_repeat( "", 11 ),
'expected' => str_repeat( "", 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8_3byte_length' => array(
// utf8 only allows <= 3-byte chars
'charset' => 'utf8',
'value' => str_repeat( "", 11 ),
'expected' => "",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8mb3' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => 'H€lloWorld¢'
'expected' => 'H€lloWorld¢',
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'utf8mb3_23char_length' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => str_repeat( "²3", 10 ),
'expected' => str_repeat( "²3", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8mb3_23byte_length' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => str_repeat( "²3", 10 ),
'expected' => "²3²3",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8mb3_3char_length' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => str_repeat( "", 11 ),
'expected' => str_repeat( "", 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8mb3_3byte_length' => array(
// utf8mb3 should behave the same an utf8
'charset' => 'utf8mb3',
'value' => str_repeat( "", 10 ),
'expected' => "",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8mb4' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => "H€llo\xf0\x9f\x98\x88World¢",
'expected' => "H€llo\xf0\x9f\x98\x88World¢"
'expected' => "H€llo\xf0\x9f\x98\x88World¢",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'utf8mb4_234char_length' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => str_repeat( "²3𝟜", 10 ),
'expected' => "²3𝟜²3𝟜²3𝟜²",
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8mb4_234byte_length' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => str_repeat( "²3𝟜", 10 ),
'expected' => "²3𝟜",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'utf8mb4_4char_length' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => str_repeat( "𝟜", 11 ),
'expected' => str_repeat( "𝟜", 10 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'utf8mb4_4byte_length' => array(
// utf8mb4 allows 4-byte characters, too
'charset' => 'utf8mb4',
'value' => str_repeat( "𝟜", 10 ),
'expected' => "𝟜𝟜",
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'koi8r' => array(
'charset' => 'koi8r',
'value' => "\xfdord\xf2ress",
'expected' => "\xfdord\xf2ress",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'koi8r_char_length' => array(
'charset' => 'koi8r',
'value' => str_repeat( "\xfd\xf2", 10 ),
'expected' => str_repeat( "\xfd\xf2", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'koi8r_byte_length' => array(
'charset' => 'koi8r',
'value' => str_repeat( "\xfd\xf2", 10 ),
'expected' => str_repeat( "\xfd\xf2", 5 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'hebrew' => array(
'charset' => 'hebrew',
'value' => "\xf9ord\xf7ress",
'expected' => "\xf9ord\xf7ress",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'hebrew_char_length' => array(
'charset' => 'hebrew',
'value' => str_repeat( "\xf9\xf7", 10 ),
'expected' => str_repeat( "\xf9\xf7", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'hebrew_byte_length' => array(
'charset' => 'hebrew',
'value' => str_repeat( "\xf9\xf7", 10 ),
'expected' => str_repeat( "\xf9\xf7", 5 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'cp1251' => array(
'charset' => 'cp1251',
'value' => "\xd8ord\xd0ress",
'expected' => "\xd8ord\xd0ress",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'cp1251_char_length' => array(
'charset' => 'cp1251',
'value' => str_repeat( "\xd8\xd0", 10 ),
'expected' => str_repeat( "\xd8\xd0", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'cp1251_byte_length' => array(
'charset' => 'cp1251',
'value' => str_repeat( "\xd8\xd0", 10 ),
'expected' => str_repeat( "\xd8\xd0", 5 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'tis620' => array(
'charset' => 'tis620',
'value' => "\xccord\xe3ress",
'expected' => "\xccord\xe3ress",
'length' => array( 'type' => 'char', 'length' => 100 ),
),
'tis620_char_length' => array(
'charset' => 'tis620',
'value' => str_repeat( "\xcc\xe3", 10 ),
'expected' => str_repeat( "\xcc\xe3", 5 ),
'length' => array( 'type' => 'char', 'length' => 10 ),
),
'tis620_byte_length' => array(
'charset' => 'tis620',
'value' => str_repeat( "\xcc\xe3", 10 ),
'expected' => str_repeat( "\xcc\xe3", 5 ),
'length' => array( 'type' => 'byte', 'length' => 10 ),
),
'false' => array(
// false is a column with no character set (ie, a number column)
'charset' => false,
'value' => 100,
'expected' => 100
'expected' => 100,
'length' => false,
),
);
@ -94,7 +265,22 @@ class Tests_DB_Charset extends WP_UnitTestCase {
$fields['big5'] = array(
'charset' => 'big5',
'value' => $big5,
'expected' => $big5
'expected' => $big5,
'length' => array( 'type' => 'char', 'length' => 100 ),
);
$fields['big5_char_length'] = array(
'charset' => 'big5',
'value' => str_repeat( $big5, 10 ),
'expected' => str_repeat( $big5, 3 ) . 'a',
'length' => array( 'type' => 'char', 'length' => 10 ),
);
$fields['big5_byte_length'] = array(
'charset' => 'big5',
'value' => str_repeat( $big5, 10 ),
'expected' => str_repeat( $big5, 2 ) . 'a',
'length' => array( 'type' => 'byte', 'length' => 10 ),
);
}
@ -170,14 +356,14 @@ class Tests_DB_Charset extends WP_UnitTestCase {
);
$all_ascii_fields = array(
'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => false ),
'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => false ),
'post_content' => array( 'value' => 'foo foo foo!', 'format' => '%s', 'charset' => $charset ),
'post_excerpt' => array( 'value' => 'bar bar bar!', 'format' => '%s', 'charset' => $charset ),
);
// This is the same data used in process_field_charsets_for_nonexistent_table()
$non_ascii_string_fields = array(
'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ),
'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset, 'ascii' => false ),
'post_content' => array( 'value' => '¡foo foo foo!', 'format' => '%s', 'charset' => $charset ),
'post_excerpt' => array( 'value' => '¡bar bar bar!', 'format' => '%s', 'charset' => $charset ),
);
$vars = get_defined_vars();
@ -544,4 +730,16 @@ class Tests_DB_Charset extends WP_UnitTestCase {
self::$_wpdb->query( $drop );
}
function test_strip_invalid_test_for_column_bails_if_ascii_input_too_long() {
global $wpdb;
// TEXT column
$stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_content', str_repeat( 'A', 65536 ) );
$this->assertEquals( 65535, strlen( $stripped ) );
// VARCHAR column
$stripped = $wpdb->strip_invalid_text_for_column( $wpdb->comments, 'comment_agent', str_repeat( 'A', 256 ) );
$this->assertEquals( 255, strlen( $stripped ) );
}
}