diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 35ac30563a..130a2c36d1 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -2005,6 +2005,24 @@ function remove_accents( $string ) { function sanitize_file_name( $filename ) { $filename_raw = $filename; $special_chars = array( '?', '[', ']', '/', '\\', '=', '<', '>', ':', ';', ',', "'", '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', chr( 0 ) ); + + // Check for support for utf8 in the installed PCRE library once and store the result in a static. + static $utf8_pcre = null; + if ( ! isset( $utf8_pcre ) ) { + // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged + $utf8_pcre = @preg_match( '/^./u', 'a' ); + } + + if ( ! seems_utf8( $filename ) ) { + $_ext = pathinfo( $filename, PATHINFO_EXTENSION ); + $_name = pathinfo( $filename, PATHINFO_FILENAME ); + $filename = sanitize_title_with_dashes( $_name ) . '.' . $_ext; + } + + if ( $utf8_pcre ) { + $filename = preg_replace( "#\x{00a0}#siu", ' ', $filename ); + } + /** * Filters the list of characters to remove from a filename. * @@ -2014,7 +2032,6 @@ function sanitize_file_name( $filename ) { * @param string $filename_raw The original filename to be sanitized. */ $special_chars = apply_filters( 'sanitize_file_name_chars', $special_chars, $filename_raw ); - $filename = preg_replace( "#\x{00a0}#siu", ' ', $filename ); $filename = str_replace( $special_chars, '', $filename ); $filename = str_replace( array( '%20', '+' ), '-', $filename ); $filename = preg_replace( '/[\r\n\t -]+/', '-', $filename ); diff --git a/tests/phpunit/tests/formatting/SanitizeFileName.php b/tests/phpunit/tests/formatting/SanitizeFileName.php index 1ae561a4ad..37ce764a12 100644 --- a/tests/phpunit/tests/formatting/SanitizeFileName.php +++ b/tests/phpunit/tests/formatting/SanitizeFileName.php @@ -68,4 +68,20 @@ class Tests_Formatting_SanitizeFileName extends WP_UnitTestCase { // Test a filenames that becomes extensionless. $this->assertEquals( 'no-extension', sanitize_file_name( '_.no-extension' ) ); } + + /** + * @dataProvider data_wp_filenames + */ + function test_replaces_invalid_utf8_characters( $input, $expected ) { + $this->assertEquals( $expected, sanitize_file_name( $input ) ); + } + + function data_wp_filenames() { + return array( + [ urldecode( '%B1myfile.png' ), 'myfile.png' ], + [ urldecode( '%B1myfile' ), 'myfile' ], + [ 'demo bar.png', 'demo-bar.png' ], + [ 'demo' . json_decode( '"\u00a0"' ) . 'bar.png', 'demo-bar.png' ], + ); + } }