Formatting: Expand `sanitize_file_name` to have better support for utf8 characters.

Props: xknown, peterwilsoncc.


git-svn-id: https://develop.svn.wordpress.org/trunk@47638 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Jake Spurlock 2020-04-29 15:38:43 +00:00
parent 935ab39e8e
commit 74d6f9613b
2 changed files with 34 additions and 1 deletions

View File

@ -2005,6 +2005,24 @@ function remove_accents( $string ) {
function sanitize_file_name( $filename ) {
$filename_raw = $filename;
$special_chars = array( '?', '[', ']', '/', '\\', '=', '<', '>', ':', ';', ',', "'", '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', chr( 0 ) );
// Check for support for utf8 in the installed PCRE library once and store the result in a static.
static $utf8_pcre = null;
if ( ! isset( $utf8_pcre ) ) {
// phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged
$utf8_pcre = @preg_match( '/^./u', 'a' );
}
if ( ! seems_utf8( $filename ) ) {
$_ext = pathinfo( $filename, PATHINFO_EXTENSION );
$_name = pathinfo( $filename, PATHINFO_FILENAME );
$filename = sanitize_title_with_dashes( $_name ) . '.' . $_ext;
}
if ( $utf8_pcre ) {
$filename = preg_replace( "#\x{00a0}#siu", ' ', $filename );
}
/**
* Filters the list of characters to remove from a filename.
*
@ -2014,7 +2032,6 @@ function sanitize_file_name( $filename ) {
* @param string $filename_raw The original filename to be sanitized.
*/
$special_chars = apply_filters( 'sanitize_file_name_chars', $special_chars, $filename_raw );
$filename = preg_replace( "#\x{00a0}#siu", ' ', $filename );
$filename = str_replace( $special_chars, '', $filename );
$filename = str_replace( array( '%20', '+' ), '-', $filename );
$filename = preg_replace( '/[\r\n\t -]+/', '-', $filename );

View File

@ -68,4 +68,20 @@ class Tests_Formatting_SanitizeFileName extends WP_UnitTestCase {
// Test a filenames that becomes extensionless.
$this->assertEquals( 'no-extension', sanitize_file_name( '_.no-extension' ) );
}
/**
* @dataProvider data_wp_filenames
*/
function test_replaces_invalid_utf8_characters( $input, $expected ) {
$this->assertEquals( $expected, sanitize_file_name( $input ) );
}
function data_wp_filenames() {
return array(
[ urldecode( '%B1myfile.png' ), 'myfile.png' ],
[ urldecode( '%B1myfile' ), 'myfile' ],
[ 'demo bar.png', 'demo-bar.png' ],
[ 'demo' . json_decode( '"\u00a0"' ) . 'bar.png', 'demo-bar.png' ],
);
}
}