From 85eb52669daf51f34749cb1d339c4cc0b48ab5ae Mon Sep 17 00:00:00 2001 From: Gary Pendergast Date: Wed, 26 Oct 2016 05:16:09 +0000 Subject: [PATCH] General: Add a `sanitize_textarea_field()` function. Like its predecessor (`sanitize_text_field()`), `sanitize_textarea_field()` is a helper function to sanitise user input. As the name suggests, this function is for sanitising input from `textarea` fields - it strips tags and invalid UTF-8 characters, like `sanitize_text_field()`, but retains newlines and extra inline whitespace. Props ottok, nbachiyski, chriscct7, pento. Fixes #32257. git-svn-id: https://develop.svn.wordpress.org/trunk@38944 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/formatting.php | 76 ++++++++-- .../tests/formatting/SanitizeTextField.php | 139 +++++++++++++----- 2 files changed, 167 insertions(+), 48 deletions(-) diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index cfc2eaa432..d9884fb8fe 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -4653,6 +4653,7 @@ function wp_strip_all_tags($string, $remove_breaks = false) { * * @since 2.9.0 * + * @see sanitize_textarea_field() * @see wp_check_invalid_utf8() * @see wp_strip_all_tags() * @@ -4660,16 +4661,75 @@ function wp_strip_all_tags($string, $remove_breaks = false) { * @return string Sanitized string. */ function sanitize_text_field( $str ) { + $filtered = _sanitize_text_fields( $str, false ); + + /** + * Filters a sanitized text field string. + * + * @since 2.9.0 + * + * @param string $filtered The sanitized string. + * @param string $str The string prior to being sanitized. + */ + return apply_filters( 'sanitize_text_field', $filtered, $str ); +} + +/** + * Sanitizes a multiline string from user input or from the database. + * + * The function is like sanitize_text_field(), but preserves + * new lines (\n) and other whitespace, which are legitimate + * input in textarea elements. + * + * @see sanitize_text_field() + * + * @since 4.7.0 + * + * @param string $str String to sanitize. + * @return string Sanitized string. + */ +function sanitize_textarea_field( $str ) { + $filtered = _sanitize_text_fields( $str, true ); + + /** + * Filters a sanitized textarea field string. + * + * @since 4.7.0 + * + * @param string $filtered The sanitized string. + * @param string $str The string prior to being sanitized. + */ + return apply_filters( 'sanitize_textarea_field', $filtered, $str ); +} + +/** + * Internal helper function to sanitize a string from user input or from the db + * + * @since 4.7.0 + * @access private + * + * @param string $str String to sanitize. + * @param bool $keep_newlines optional Whether to keep newlines. Default: false. + * @return string Sanitized string. + */ +function _sanitize_text_fields( $str, $keep_newlines = false ) { $filtered = wp_check_invalid_utf8( $str ); if ( strpos($filtered, '<') !== false ) { $filtered = wp_pre_kses_less_than( $filtered ); // This will strip extra whitespace for us. - $filtered = wp_strip_all_tags( $filtered, true ); - } else { - $filtered = trim( preg_replace('/[\r\n\t ]+/', ' ', $filtered) ); + $filtered = wp_strip_all_tags( $filtered, false ); + + // Use html entities in a special case to make sure no later + // newline stripping stage could lead to a functional tag + $filtered = str_replace("<\n", "<\n", $filtered); } + if ( ! $keep_newlines ) { + $filtered = preg_replace( '/[\r\n\t ]+/', ' ', $filtered ); + } + $filtered = trim( $filtered ); + $found = false; while ( preg_match('/%[a-f0-9]{2}/i', $filtered, $match) ) { $filtered = str_replace($match[0], '', $filtered); @@ -4681,15 +4741,7 @@ function sanitize_text_field( $str ) { $filtered = trim( preg_replace('/ +/', ' ', $filtered) ); } - /** - * Filters a sanitized text field string. - * - * @since 2.9.0 - * - * @param string $filtered The sanitized string. - * @param string $str The string prior to being sanitized. - */ - return apply_filters( 'sanitize_text_field', $filtered, $str ); + return $filtered; } /** diff --git a/tests/phpunit/tests/formatting/SanitizeTextField.php b/tests/phpunit/tests/formatting/SanitizeTextField.php index 4cbcbbcdf2..be470ce2e8 100644 --- a/tests/phpunit/tests/formatting/SanitizeTextField.php +++ b/tests/phpunit/tests/formatting/SanitizeTextField.php @@ -4,44 +4,111 @@ * @group formatting */ class Tests_Formatting_SanitizeTextField extends WP_UnitTestCase { - // #11528 - function test_sanitize_text_field() { - $inputs = array( - 'оРангутанг', //Ensure UTF8 text is safe the Р is D0 A0 and A0 is the non-breaking space. - 'САПР', //Ensure UTF8 text is safe the Р is D0 A0 and A0 is the non-breaking space. - 'one is < two', - 'tags are not allowed here', - ' we should trim leading and trailing whitespace ', - 'we also trim extra internal whitespace', - 'tabs get removed too', - 'newlines are not welcome - here', - 'We also %AB remove %ab octets', - 'We don\'t need to wory about %A - B removing %a - b octets even when %a B they are obscured by whitespace', - '%AB%BC%DE', //Just octets - 'Invalid octects remain %II', - 'Nested octects %%%ABABAB %A%A%ABBB', - ); - $expected = array( - 'оРангутанг', - 'САПР', - 'one is < two', - 'tags are not allowed here', - 'we should trim leading and trailing whitespace', - 'we also trim extra internal whitespace', - 'tabs get removed too', - 'newlines are not welcome here', - 'We also remove octets', - 'We don\'t need to wory about %A B removing %a b octets even when %a B they are obscured by whitespace', - '', //Emtpy as we strip all the octets out - 'Invalid octects remain %II', - 'Nested octects', + function data_sanitize_text_field() { + return array( + array( + 'оРангутанг', //Ensure UTF8 text is safe the Р is D0 A0 and A0 is the non-breaking space. + 'оРангутанг', + ), + array( + 'САПР', //Ensure UTF8 text is safe the Р is D0 A0 and A0 is the non-breaking space. + 'САПР', + ), + array( + 'one is < two', + 'one is < two', + ), + array( + "one is <\n two", + array( + 'oneline' => 'one is < two', + 'multiline' => "one is <\n two", + ), + ), + array( + "foo bar", + array( + 'oneline' => 'foo bar', + 'multiline' => "foo bar", + ), + ), + array( + "foo <\ndiv\n> bar", + array( + 'oneline' => 'foo < div > bar', + 'multiline' => "foo <\ndiv\n> bar", + ), + ), + array( + 'tags are not allowed here', + 'tags are not allowed here', + ), + array( + ' we should trim leading and trailing whitespace ', + 'we should trim leading and trailing whitespace', + ), + array( + 'we trim extra internal whitespace only in single line texts', + array( + 'oneline' => 'we trim extra internal whitespace only in single line texts', + 'multiline' => 'we trim extra internal whitespace only in single line texts', + ), + ), + array( + "tabs \tget removed in single line texts", + array( + 'oneline' => 'tabs get removed in single line texts', + 'multiline' => "tabs \tget removed in single line texts", + ), + ), + array( + "newlines are allowed only\n in multiline texts", + array( + 'oneline' => 'newlines are allowed only in multiline texts', + 'multiline' => "newlines are allowed only\n in multiline texts", + ), + ), + array( + 'We also %AB remove %ab octets', + 'We also remove octets', + ), + array( + 'We don\'t need to wory about %A + B removing %a + b octets even when %a B they are obscured by whitespace', + array ( + 'oneline' => 'We don\'t need to wory about %A B removing %a b octets even when %a B they are obscured by whitespace', + 'multiline' => "We don't need to wory about %A\n B removing %a\n b octets even when %a B they are obscured by whitespace", + ), + ), + array( + '%AB%BC%DE', //Just octets + '', //Emtpy as we strip all the octets out + ), + array( + 'Invalid octects remain %II', + 'Invalid octects remain %II', + ), + array( + 'Nested octects %%%ABABAB %A%A%ABBB', + 'Nested octects', + ), ); + } - foreach ($inputs as $key => $input) { - $this->assertEquals($expected[$key], sanitize_text_field($input)); + /** + * @ticket 32257 + * @dataProvider data_sanitize_text_field + */ + function test_sanitize_text_field( $string, $expected ) { + if ( is_array( $expected ) ) { + $expected_oneline = $expected['oneline']; + $expected_multiline = $expected['multiline']; + } else { + $expected_oneline = $expected_multiline = $expected; } + $this->assertEquals( $expected_oneline, sanitize_text_field( $string ) ); + $this->assertEquals( $expected_multiline, sanitize_textarea_field( $string ) ); + } }