General: Add a `sanitize_textarea_field()` function.

Like its predecessor (`sanitize_text_field()`), `sanitize_textarea_field()` is a helper function to sanitise user input. As the name suggests, this function is for sanitising input from `textarea` fields - it strips tags and invalid UTF-8 characters, like `sanitize_text_field()`, but retains newlines and extra inline whitespace.

Props ottok, nbachiyski, chriscct7, pento.
Fixes #32257.



git-svn-id: https://develop.svn.wordpress.org/trunk@38944 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Gary Pendergast 2016-10-26 05:16:09 +00:00
parent e06b7111e5
commit 85eb52669d
2 changed files with 167 additions and 48 deletions

View File

@ -4653,6 +4653,7 @@ function wp_strip_all_tags($string, $remove_breaks = false) {
*
* @since 2.9.0
*
* @see sanitize_textarea_field()
* @see wp_check_invalid_utf8()
* @see wp_strip_all_tags()
*
@ -4660,16 +4661,75 @@ function wp_strip_all_tags($string, $remove_breaks = false) {
* @return string Sanitized string.
*/
function sanitize_text_field( $str ) {
$filtered = _sanitize_text_fields( $str, false );
/**
* Filters a sanitized text field string.
*
* @since 2.9.0
*
* @param string $filtered The sanitized string.
* @param string $str The string prior to being sanitized.
*/
return apply_filters( 'sanitize_text_field', $filtered, $str );
}
/**
* Sanitizes a multiline string from user input or from the database.
*
* The function is like sanitize_text_field(), but preserves
* new lines (\n) and other whitespace, which are legitimate
* input in textarea elements.
*
* @see sanitize_text_field()
*
* @since 4.7.0
*
* @param string $str String to sanitize.
* @return string Sanitized string.
*/
function sanitize_textarea_field( $str ) {
$filtered = _sanitize_text_fields( $str, true );
/**
* Filters a sanitized textarea field string.
*
* @since 4.7.0
*
* @param string $filtered The sanitized string.
* @param string $str The string prior to being sanitized.
*/
return apply_filters( 'sanitize_textarea_field', $filtered, $str );
}
/**
* Internal helper function to sanitize a string from user input or from the db
*
* @since 4.7.0
* @access private
*
* @param string $str String to sanitize.
* @param bool $keep_newlines optional Whether to keep newlines. Default: false.
* @return string Sanitized string.
*/
function _sanitize_text_fields( $str, $keep_newlines = false ) {
$filtered = wp_check_invalid_utf8( $str );
if ( strpos($filtered, '<') !== false ) {
$filtered = wp_pre_kses_less_than( $filtered );
// This will strip extra whitespace for us.
$filtered = wp_strip_all_tags( $filtered, true );
} else {
$filtered = trim( preg_replace('/[\r\n\t ]+/', ' ', $filtered) );
$filtered = wp_strip_all_tags( $filtered, false );
// Use html entities in a special case to make sure no later
// newline stripping stage could lead to a functional tag
$filtered = str_replace("<\n", "&lt;\n", $filtered);
}
if ( ! $keep_newlines ) {
$filtered = preg_replace( '/[\r\n\t ]+/', ' ', $filtered );
}
$filtered = trim( $filtered );
$found = false;
while ( preg_match('/%[a-f0-9]{2}/i', $filtered, $match) ) {
$filtered = str_replace($match[0], '', $filtered);
@ -4681,15 +4741,7 @@ function sanitize_text_field( $str ) {
$filtered = trim( preg_replace('/ +/', ' ', $filtered) );
}
/**
* Filters a sanitized text field string.
*
* @since 2.9.0
*
* @param string $filtered The sanitized string.
* @param string $str The string prior to being sanitized.
*/
return apply_filters( 'sanitize_text_field', $filtered, $str );
return $filtered;
}
/**

View File

@ -4,44 +4,111 @@
* @group formatting
*/
class Tests_Formatting_SanitizeTextField extends WP_UnitTestCase {
// #11528
function test_sanitize_text_field() {
$inputs = array(
'оРангутанг', //Ensure UTF8 text is safe the Р is D0 A0 and A0 is the non-breaking space.
'САПР', //Ensure UTF8 text is safe the Р is D0 A0 and A0 is the non-breaking space.
'one is < two',
'tags <span>are</span> <em>not allowed</em> here',
' we should trim leading and trailing whitespace ',
'we also trim extra internal whitespace',
'tabs get removed too',
'newlines are not welcome
here',
'We also %AB remove %ab octets',
'We don\'t need to wory about %A
B removing %a
b octets even when %a B they are obscured by whitespace',
'%AB%BC%DE', //Just octets
'Invalid octects remain %II',
'Nested octects %%%ABABAB %A%A%ABBB',
);
$expected = array(
'оРангутанг',
'САПР',
'one is &lt; two',
'tags are not allowed here',
'we should trim leading and trailing whitespace',
'we also trim extra internal whitespace',
'tabs get removed too',
'newlines are not welcome here',
'We also remove octets',
'We don\'t need to wory about %A B removing %a b octets even when %a B they are obscured by whitespace',
'', //Emtpy as we strip all the octets out
'Invalid octects remain %II',
'Nested octects',
function data_sanitize_text_field() {
return array(
array(
'оРангутанг', //Ensure UTF8 text is safe the Р is D0 A0 and A0 is the non-breaking space.
'оРангутанг',
),
array(
'САПР', //Ensure UTF8 text is safe the Р is D0 A0 and A0 is the non-breaking space.
'САПР',
),
array(
'one is < two',
'one is &lt; two',
),
array(
"one is <\n two",
array(
'oneline' => 'one is &lt; two',
'multiline' => "one is &lt;\n two",
),
),
array(
"foo <div\n> bar",
array(
'oneline' => 'foo bar',
'multiline' => "foo bar",
),
),
array(
"foo <\ndiv\n> bar",
array(
'oneline' => 'foo &lt; div > bar',
'multiline' => "foo &lt;\ndiv\n> bar",
),
),
array(
'tags <span>are</span> <em>not allowed</em> here',
'tags are not allowed here',
),
array(
' we should trim leading and trailing whitespace ',
'we should trim leading and trailing whitespace',
),
array(
'we trim extra internal whitespace only in single line texts',
array(
'oneline' => 'we trim extra internal whitespace only in single line texts',
'multiline' => 'we trim extra internal whitespace only in single line texts',
),
),
array(
"tabs \tget removed in single line texts",
array(
'oneline' => 'tabs get removed in single line texts',
'multiline' => "tabs \tget removed in single line texts",
),
),
array(
"newlines are allowed only\n in multiline texts",
array(
'oneline' => 'newlines are allowed only in multiline texts',
'multiline' => "newlines are allowed only\n in multiline texts",
),
),
array(
'We also %AB remove %ab octets',
'We also remove octets',
),
array(
'We don\'t need to wory about %A
B removing %a
b octets even when %a B they are obscured by whitespace',
array (
'oneline' => 'We don\'t need to wory about %A B removing %a b octets even when %a B they are obscured by whitespace',
'multiline' => "We don't need to wory about %A\n B removing %a\n b octets even when %a B they are obscured by whitespace",
),
),
array(
'%AB%BC%DE', //Just octets
'', //Emtpy as we strip all the octets out
),
array(
'Invalid octects remain %II',
'Invalid octects remain %II',
),
array(
'Nested octects %%%ABABAB %A%A%ABBB',
'Nested octects',
),
);
}
foreach ($inputs as $key => $input) {
$this->assertEquals($expected[$key], sanitize_text_field($input));
/**
* @ticket 32257
* @dataProvider data_sanitize_text_field
*/
function test_sanitize_text_field( $string, $expected ) {
if ( is_array( $expected ) ) {
$expected_oneline = $expected['oneline'];
$expected_multiline = $expected['multiline'];
} else {
$expected_oneline = $expected_multiline = $expected;
}
$this->assertEquals( $expected_oneline, sanitize_text_field( $string ) );
$this->assertEquals( $expected_multiline, sanitize_textarea_field( $string ) );
}
}