From 821c052473eab0a983ce12701f4d88b4779df6d3 Mon Sep 17 00:00:00 2001 From: Scott Taylor Date: Tue, 10 Jun 2014 14:33:16 +0000 Subject: [PATCH] In `wptexturize()`, ensure that texturization does not corrupt contents of HTML elements, HTML comments, and smartcode attributes. Adds a variety of unit tests/assertions. Props miqrogroove. Fixes #12690, #8912, #27602. git-svn-id: https://develop.svn.wordpress.org/trunk@28727 602fd350-edb4-49c9-b593-d223f7449a82 --- src/wp-includes/formatting.php | 56 +++++-- .../phpunit/tests/formatting/WPTexturize.php | 151 ++++++++++++++++++ 2 files changed, 194 insertions(+), 13 deletions(-) diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 12dc1868a3..9551a61c65 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -158,6 +158,11 @@ function wptexturize($text) { $dynamic_replacements = array_values( $dynamic ); } + // If there's nothing to do, just stop. + if ( empty( $text ) ) { + return $text; + } + // Transform into regexp sub-expression used in _wptexturize_pushpop_element // Must do this every time in case plugins use these filters in a context sensitive manner /** @@ -180,25 +185,50 @@ function wptexturize($text) { $no_texturize_tags_stack = array(); $no_texturize_shortcodes_stack = array(); - $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); + // Look for shortcodes and HTML elements. + + $regex = '/(' // Capture the entire match. + . '<' // Find start of element. + . '(?(?=!--)' // Is this a comment? + . '.+?--\s*>' // Find end of comment + . '|' + . '.+?>' // Find end of element + . ')' + . '|' + . '\[' // Find start of shortcode. + . '\[?' // Shortcodes may begin with [[ + . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes or HTML elements. + . '\]' // Find end of shortcode. + . '\]?' // Shortcodes may end with ]] + . ')/s'; + + $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); foreach ( $textarr as &$curl ) { - if ( empty( $curl ) ) { - continue; - } - - // Only call _wptexturize_pushpop_element if first char is correct tag opening + // Only call _wptexturize_pushpop_element if $curl is a delimeter. $first = $curl[0]; - if ( '<' === $first ) { - _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); - } elseif ( '[' === $first ) { - _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); + if ( '<' === $first && '>' === substr( $curl, -1 ) ) { + // This is an HTML delimeter. + + if ( '', + '', + ), + array( + '', + '', + ), + array( + '', + '', + ), + array( + '......', + '……', + ), + array( + '[gallery ...]......
', + '[gallery ...]…
', + ), + array( + '', + '', + ), + ); + } }