In wptexturize(), ensure that texturization does not corrupt contents of HTML elements, HTML comments, and smartcode attributes.

Adds a variety of unit tests/assertions.

Props miqrogroove.
Fixes #12690, #8912, #27602.


git-svn-id: https://develop.svn.wordpress.org/trunk@28727 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Scott Taylor 2014-06-10 14:33:16 +00:00
parent 9e347a40c2
commit 821c052473
2 changed files with 194 additions and 13 deletions

View File

@ -158,6 +158,11 @@ function wptexturize($text) {
$dynamic_replacements = array_values( $dynamic );
}
// If there's nothing to do, just stop.
if ( empty( $text ) ) {
return $text;
}
// Transform into regexp sub-expression used in _wptexturize_pushpop_element
// Must do this every time in case plugins use these filters in a context sensitive manner
/**
@ -180,25 +185,50 @@ function wptexturize($text) {
$no_texturize_tags_stack = array();
$no_texturize_shortcodes_stack = array();
$textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Look for shortcodes and HTML elements.
$regex = '/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. '.+?--\s*>' // Find end of comment
. '|'
. '.+?>' // Find end of element
. ')'
. '|'
. '\[' // Find start of shortcode.
. '\[?' // Shortcodes may begin with [[
. '[^\[\]<>]+' // Shortcodes do not contain other shortcodes or HTML elements.
. '\]' // Find end of shortcode.
. '\]?' // Shortcodes may end with ]]
. ')/s';
$textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
foreach ( $textarr as &$curl ) {
if ( empty( $curl ) ) {
continue;
}
// Only call _wptexturize_pushpop_element if first char is correct tag opening
// Only call _wptexturize_pushpop_element if $curl is a delimeter.
$first = $curl[0];
if ( '<' === $first ) {
_wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>');
} elseif ( '[' === $first ) {
_wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']');
if ( '<' === $first && '>' === substr( $curl, -1 ) ) {
// This is an HTML delimeter.
if ( '<!--' !== substr( $curl, 0, 4 ) ) {
_wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>' );
}
} elseif ( '[' === $first && 1 === preg_match( '/^\[[^\[\]<>]+\]$/', $curl ) ) {
// This is a shortcode delimeter.
_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']' );
} elseif ( '[' === $first && 1 === preg_match( '/^\[\[?[^\[\]<>]+\]\]?$/', $curl ) ) {
// This is an escaped shortcode delimeter.
// Do not texturize.
// Do not push to the shortcodes stack.
} elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) {
// This is neither a delimeter, nor is this content inside of no_texturize pairs. Do texturize.
// This is not a tag, nor is the texturization disabled static strings
$curl = str_replace($static_characters, $static_replacements, $curl);
// regular expressions
$curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
// 9x9 (times), but never 0x9999

View File

@ -1128,4 +1128,155 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
),
);
}
/**
* Test HTML and shortcode avoidance.
*
* @ticket 12690
* @dataProvider data_tag_avoidance
*/
function test_tag_avoidance( $input, $output ) {
return $this->assertEquals( $output, wptexturize( $input ) );
}
function data_tag_avoidance() {
return array(
array(
'[ photos by <a href="http://example.com/?a[]=1&a[]=2"> this guy </a> ]',
'[ photos by <a href="http://example.com/?a[]=1&#038;a[]=2"> this guy </a> ]',
),
array(
'[gallery ...]',
'[gallery ...]',
),
array(
'[[gallery ...]', // This tag is still valid.
'[[gallery ...]',
),
array(
'[gallery ...]]', // This tag is also valid.
'[gallery ...]]',
),
array(
'[/...]', // This would actually be ignored by the shortcode system. The decision to not texturize it is intentional, if not correct.
'[/...]',
),
array(
'[...]...[/...]', // These are potentially usable shortcodes.
'[...]&#8230;[/...]',
),
array(
'[[...]]...[[/...]]', // Shortcode parsing will ignore the inner ]...[ part and treat this as a single escaped shortcode.
'[[...]]&#8230;[[/...]]',
),
array(
'[[[...]]]...[[[/...]]]', // Again, shortcode parsing matches, but only the [[...] and [/...]] parts.
'[[[...]]]&#8230;[[[/...]]]',
),
array(
'[[code]...[/code]...', // These are potentially usable shortcodes. Unfortunately, the meaning of [[/code] is ambiguous unless we run the entire shortcode regexp.
'[[code]&#8230;[/code]...', // Same behavior as 3.9 due to buggy logic in _wptexturize_pushpop_element(). See ticket #28483.
),
array(
'[code]...[/code]]...', // These are potentially usable shortcodes. Unfortunately, the meaning of [/code]] is ambiguous unless we run the entire shortcode regexp.
'[code]...[/code]]...', // This test would not pass in 3.9 because the extra brace was always ignored by texturize.
),
array(
'[gal>ery ...]',
'[gal>ery &#8230;]',
),
array(
'[gallery ...',
'[gallery &#8230;',
),
array(
'[gallery <br ... /> ...]',
'[gallery <br ... /> &#8230;]',
),
array(
'<br [gallery ...] ... />',
'<br [gallery ...] ... />',
),
array(
'<br [gallery ...] ... /',
'<br [gallery ...] &#8230; /',
),
array(
'<br ... />',
'<br ... />',
),
array(
'<br ... />...<br ... />',
'<br ... />&#8230;<br ... />',
),
array(
'[gallery ...]...[gallery ...]',
'[gallery ...]&#8230;[gallery ...]',
),
array(
'[[gallery ...]]',
'[[gallery ...]]',
),
array(
'[[gallery ...]',
'[[gallery ...]',
),
array(
'[gallery ...]]',
'[gallery ...]]',
),
array(
'[/gallery ...]]',
'[/gallery ...]]',
),
array(
'[[gallery <br ... /> ...]]',
'[[gallery <br ... /> &#8230;]]',
),
array(
'<br [[gallery ...]] ... />',
'<br [[gallery ...]] ... />',
),
array(
'<br [[gallery ...]] ... /',
'<br [[gallery ...]] &#8230; /',
),
array(
'[[gallery ...]]...[[gallery ...]]',
'[[gallery ...]]&#8230;[[gallery ...]]',
),
array(
'[[gallery ...]...[/gallery]]',
'[[gallery ...]&#8230;[/gallery]]',
),
array(
'<!-- ... -->',
'<!-- ... -->',
),
array(
'<!--...-->',
'<!--...-->',
),
array(
'<!-- ... -- >',
'<!-- ... -- >',
),
array(
'<!-- <br /> [gallery] ... -->',
'<!-- <br /> [gallery] ... -->',
),
array(
'...<!-- ... -->...',
'&#8230;<!-- ... -->&#8230;',
),
array(
'[gallery ...]...<!-- ... -->...<br ... />',
'[gallery ...]&#8230;<!-- ... -->&#8230;<br ... />',
),
array(
'<ul><li>Hello.</li><!--<li>Goodbye.</li>--></ul>',
'<ul><li>Hello.</li><!--<li>Goodbye.</li>--></ul>',
),
);
}
}