The joys of `wptexturize()`:
* Revert parts of [28773] and [28727] and [29748]. * Do not crash PHP. Make the shortcode quantifier possessive to avoid backtracks. * Reduce backtracking in long HTML comments by 100x. * Do not ignore unclosed HTML comments. * Do not break unregistered shortcodes, e.g. `[hello attr="value"]`. * Do not break HTML in shortcode attributes, e.g. `[hello attr="<"]`. * Do not match for shortcodes when there is extra whitespace, e.g. `[ hello ]`. * Add unit tests to show #12690 was not fully resolved. * Tested PHP 5.2.4, 5.2.13, 5.4.32, and 5.5.8. Adds/modifies unit tests. Props miqrogroove. See #29557. git-svn-id: https://develop.svn.wordpress.org/trunk@29781 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
parent
18adbb6439
commit
23f7f53be2
|
@ -28,7 +28,7 @@
|
||||||
* @return string The string replaced with html entities
|
* @return string The string replaced with html entities
|
||||||
*/
|
*/
|
||||||
function wptexturize($text, $reset = false) {
|
function wptexturize($text, $reset = false) {
|
||||||
global $wp_cockneyreplace, $shortcode_tags;
|
global $wp_cockneyreplace;
|
||||||
static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
|
static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
|
||||||
$default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true;
|
$default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true;
|
||||||
|
|
||||||
|
@ -205,45 +205,55 @@ function wptexturize($text, $reset = false) {
|
||||||
|
|
||||||
// Look for shortcodes and HTML elements.
|
// Look for shortcodes and HTML elements.
|
||||||
|
|
||||||
$tagnames = array_keys( $shortcode_tags );
|
$comment_regex =
|
||||||
$tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
|
'!' // Start of comment, after the <.
|
||||||
$tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
|
. '(?:' // Unroll the loop: Consume everything until --> is found.
|
||||||
|
. '-(?!->)' // Dash not followed by end of comment.
|
||||||
|
. '[^\-]*+' // Consume non-dashes.
|
||||||
|
. ')*+' // Loop possessively.
|
||||||
|
. '(?:-->)?'; // End of comment. If not found, match all input.
|
||||||
|
|
||||||
$regex = '/(' // Capture the entire match.
|
$shortcode_regex =
|
||||||
. '<' // Find start of element.
|
'\[' // Find start of shortcode.
|
||||||
. '(?(?=!--)' // Is this a comment?
|
. '[\/\[]?' // Shortcodes may begin with [/ or [[
|
||||||
. '.+?--\s*>' // Find end of comment
|
. '[^\s\/\[\]]' // No whitespace before name.
|
||||||
|
. '[^\[\]]*+' // Shortcodes do not contain other shortcodes. Possessive critical.
|
||||||
|
. '\]' // Find end of shortcode.
|
||||||
|
. '\]?'; // Shortcodes may end with ]]
|
||||||
|
|
||||||
|
$regex =
|
||||||
|
'/(' // Capture the entire match.
|
||||||
|
. '<' // Find start of element.
|
||||||
|
. '(?(?=!--)' // Is this a comment?
|
||||||
|
. $comment_regex // Find end of comment.
|
||||||
|
. '|'
|
||||||
|
. '[^>]+>' // Find end of element.
|
||||||
|
. ')'
|
||||||
. '|'
|
. '|'
|
||||||
. '[^>]+>' // Find end of element
|
. $shortcode_regex // Find shortcodes.
|
||||||
. ')'
|
. ')/s';
|
||||||
. '|'
|
|
||||||
. '\[' // Find start of shortcode.
|
|
||||||
. '\[?' // Shortcodes may begin with [[
|
|
||||||
. '\/?' // Closing slash may precede name.
|
|
||||||
. $tagregexp // Only match registered shortcodes, because performance.
|
|
||||||
. '[^\[\]]*' // Shortcodes do not contain other shortcodes.
|
|
||||||
. '\]' // Find end of shortcode.
|
|
||||||
. '\]?' // Shortcodes may end with ]]
|
|
||||||
. ')/s';
|
|
||||||
|
|
||||||
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
|
||||||
|
|
||||||
foreach ( $textarr as &$curl ) {
|
foreach ( $textarr as &$curl ) {
|
||||||
// Only call _wptexturize_pushpop_element if $curl is a delimiter.
|
// Only call _wptexturize_pushpop_element if $curl is a delimiter.
|
||||||
$first = $curl[0];
|
$first = $curl[0];
|
||||||
if ( '<' === $first && '>' === substr( $curl, -1 ) ) {
|
if ( '<' === $first && '<!--' === substr( $curl, 0, 4 ) ) {
|
||||||
// This is an HTML delimiter.
|
// This is an HTML comment delimeter.
|
||||||
|
|
||||||
if ( '<!--' !== substr( $curl, 0, 4 ) ) {
|
continue;
|
||||||
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
|
|
||||||
}
|
} elseif ( '<' === $first && '>' === substr( $curl, -1 ) ) {
|
||||||
|
// This is an HTML element delimiter.
|
||||||
|
|
||||||
|
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags );
|
||||||
|
|
||||||
} elseif ( '' === trim( $curl ) ) {
|
} elseif ( '' === trim( $curl ) ) {
|
||||||
// This is a newline between delimiters. Performance improves when we check this.
|
// This is a newline between delimiters. Performance improves when we check this.
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
} elseif ( '[' === $first && 1 === preg_match( '/^\[\[?\/?' . $tagregexp . '[^\[\]]*\]\]?$/', $curl ) ) {
|
} elseif ( '[' === $first && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) {
|
||||||
// This is a shortcode delimiter.
|
// This is a shortcode delimiter.
|
||||||
|
|
||||||
if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
|
if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
|
||||||
|
|
|
@ -231,7 +231,7 @@ function get_shortcode_regex() {
|
||||||
$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
|
$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
|
||||||
|
|
||||||
// WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag()
|
// WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag()
|
||||||
// Also, see shortcode_unautop() and shortcode.js and wptexturize().
|
// Also, see shortcode_unautop() and shortcode.js.
|
||||||
return
|
return
|
||||||
'\\[' // Opening bracket
|
'\\[' // Opening bracket
|
||||||
. '(\\[?)' // 1: Optional second opening bracket for escaping shortcodes: [[tag]]
|
. '(\\[?)' // 1: Optional second opening bracket for escaping shortcodes: [[tag]]
|
||||||
|
|
|
@ -1187,14 +1187,30 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
|
||||||
|
|
||||||
function data_tag_avoidance() {
|
function data_tag_avoidance() {
|
||||||
return array(
|
return array(
|
||||||
|
array(
|
||||||
|
'[ ... ]',
|
||||||
|
'[ … ]',
|
||||||
|
),
|
||||||
array(
|
array(
|
||||||
'[ is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
|
'[ is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
|
||||||
'[ is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
|
'[ is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
|
||||||
),
|
),
|
||||||
|
array(
|
||||||
|
'[is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]', // HTML corruption is a known bug. See tickets #12690 and #29557.
|
||||||
|
'[is it wise to <a title="allow user content ] here? hmm”> maybe </a> ]',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'[caption - is it wise to <a title="allow user content ] here? hmm"> maybe </a> ]',
|
||||||
|
'[caption - is it wise to <a title="allow user content ] here? hmm”> maybe </a> ]',
|
||||||
|
),
|
||||||
array(
|
array(
|
||||||
'[ photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy </a> ]',
|
'[ photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy </a> ]',
|
||||||
'[ photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy </a> ]',
|
'[ photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy </a> ]',
|
||||||
),
|
),
|
||||||
|
array(
|
||||||
|
'[photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy </a>]',
|
||||||
|
'[photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy </a>]',
|
||||||
|
),
|
||||||
array(
|
array(
|
||||||
'[gallery ...]',
|
'[gallery ...]',
|
||||||
'[gallery ...]',
|
'[gallery ...]',
|
||||||
|
@ -1211,10 +1227,6 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
|
||||||
'[/gallery ...]', // This would actually be ignored by the shortcode system. The decision to not texturize it is intentional, if not correct.
|
'[/gallery ...]', // This would actually be ignored by the shortcode system. The decision to not texturize it is intentional, if not correct.
|
||||||
'[/gallery ...]',
|
'[/gallery ...]',
|
||||||
),
|
),
|
||||||
array(
|
|
||||||
'[...]...[/...]', // These are potentially usable shortcodes.
|
|
||||||
'[…]…[/…]',
|
|
||||||
),
|
|
||||||
array(
|
array(
|
||||||
'[[gallery]]...[[/gallery]]', // Shortcode parsing will ignore the inner ]...[ part and treat this as a single escaped shortcode.
|
'[[gallery]]...[[/gallery]]', // Shortcode parsing will ignore the inner ]...[ part and treat this as a single escaped shortcode.
|
||||||
'[[gallery]]…[[/gallery]]',
|
'[[gallery]]…[[/gallery]]',
|
||||||
|
@ -1223,10 +1235,6 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
|
||||||
'[[[gallery]]]...[[[/gallery]]]', // Again, shortcode parsing matches, but only the [[gallery] and [/gallery]] parts.
|
'[[[gallery]]]...[[[/gallery]]]', // Again, shortcode parsing matches, but only the [[gallery] and [/gallery]] parts.
|
||||||
'[[[gallery]]]…[[[/gallery]]]',
|
'[[[gallery]]]…[[[/gallery]]]',
|
||||||
),
|
),
|
||||||
array(
|
|
||||||
'[gal>ery ...]',
|
|
||||||
'[gal>ery …]',
|
|
||||||
),
|
|
||||||
array(
|
array(
|
||||||
'[gallery ...',
|
'[gallery ...',
|
||||||
'[gallery …',
|
'[gallery …',
|
||||||
|
@ -1300,8 +1308,40 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
|
||||||
'<!--...-->',
|
'<!--...-->',
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
'<!-- ... -- >',
|
'<!-- ... -- > ...',
|
||||||
'<!-- ... -- >',
|
'<!-- ... -- > ...',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'<!-- ...', // An unclosed comment is still a comment.
|
||||||
|
'<!-- ...',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'a<!-->b', // Browsers seem to allow this.
|
||||||
|
'a<!-->b',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'a<!--->b',
|
||||||
|
'a<!--->b',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'a<!---->b',
|
||||||
|
'a<!---->b',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'a<!----->b',
|
||||||
|
'a<!----->b',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'a<!-- c --->b',
|
||||||
|
'a<!-- c --->b',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'a<!-- c -- d -->b',
|
||||||
|
'a<!-- c -- d -->b',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'a<!-- <!-- c --> -->b<!-- close -->',
|
||||||
|
'a<!-- <!-- c --> –>b<!-- close -->',
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
'<!-- <br /> [gallery] ... -->',
|
'<!-- <br /> [gallery] ... -->',
|
||||||
|
@ -1727,11 +1767,23 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
'[code ...]...[/code]', // code is not a registered shortcode.
|
'[code ...]...[/code]', // code is not a registered shortcode.
|
||||||
'[code …]…[/code]',
|
'[code ...]...[/code]',
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
'[hello ...]...[/hello]', // hello is not a registered shortcode.
|
'[hello ...]...[/hello]', // hello is not a registered shortcode.
|
||||||
'[hello …]…[/hello]',
|
'[hello ...]…[/hello]',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'[...]...[/...]', // These are potentially usable shortcodes.
|
||||||
|
'[...]…[/...]',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'[gal>ery ...]',
|
||||||
|
'[gal>ery ...]',
|
||||||
|
),
|
||||||
|
array(
|
||||||
|
'[randomthing param="test"]',
|
||||||
|
'[randomthing param="test"]',
|
||||||
),
|
),
|
||||||
array(
|
array(
|
||||||
'[[audio]...[/audio]...', // These are potentially usable shortcodes. Unfortunately, the meaning of [[audio] is ambiguous unless we run the entire shortcode regexp.
|
'[[audio]...[/audio]...', // These are potentially usable shortcodes. Unfortunately, the meaning of [[audio] is ambiguous unless we run the entire shortcode regexp.
|
||||||
|
|
Loading…
Reference in New Issue