wptexturize() improvements:

* Expand the `wptexturize()` RegEx to include the list of registered shortcodes.
* Avoid backtracking after `[` chars by not filtering params in registered shortcodes. This will cause escaped shortcodes and their params to become texturized if not registered.
* Registered shortcode params will never be texturized, even when escaped.
* Move all tests involving unregistered shortcodes to a new and improved unit.
* Update one test involving HTML within shortcode params.

Props miqrogroove.
See #29557.


git-svn-id: https://develop.svn.wordpress.org/trunk@29748 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Scott Taylor 2014-09-17 15:13:24 +00:00
parent fbf974a962
commit 606cd967f6
3 changed files with 96 additions and 50 deletions

View File

@ -28,7 +28,7 @@
* @return string The string replaced with html entities
*/
function wptexturize($text, $reset = false) {
global $wp_cockneyreplace;
global $wp_cockneyreplace, $shortcode_tags;
static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
$default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true;
@ -205,6 +205,10 @@ function wptexturize($text, $reset = false) {
// Look for shortcodes and HTML elements.
$tagnames = array_keys( $shortcode_tags );
$tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) );
$tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex().
$regex = '/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
@ -215,11 +219,9 @@ function wptexturize($text, $reset = false) {
. '|'
. '\[' // Find start of shortcode.
. '\[?' // Shortcodes may begin with [[
. '(?:'
. '[^\[\]<>]' // Shortcodes do not contain other shortcodes.
. '|'
. '<[^>]+>' // HTML elements permitted. Prevents matching ] before >.
. ')++'
. '\/?' // Closing slash may precede name.
. $tagregexp // Only match registered shortcodes, because performance.
. '[^\[\]]*' // Shortcodes do not contain other shortcodes.
. '\]' // Find end of shortcode.
. '\]?' // Shortcodes may end with ]]
. ')/s';
@ -241,18 +243,18 @@ function wptexturize($text, $reset = false) {
continue;
} elseif ( '[' === $first && 1 === preg_match( '/^\[(?:[^\[\]<>]|<[^>]+>)++\]$/', $curl ) ) {
} elseif ( '[' === $first && 1 === preg_match( '/^\[\[?\/?' . $tagregexp . '[^\[\]]*\]\]?$/', $curl ) ) {
// This is a shortcode delimiter.
_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
} elseif ( '[' === $first && 1 === preg_match( '/^\[\[?(?:[^\[\]<>]|<[^>]+>)++\]\]?$/', $curl ) ) {
// This is an escaped shortcode delimiter.
// Do not texturize.
// Do not push to the shortcodes stack.
continue;
if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) {
// Looks like a normal shortcode.
_wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes );
} else {
// Looks like an escaped shortcode.
// Do not texturize.
// Do not push to the shortcodes stack.
continue;
}
} elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) {
// This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize.
@ -313,7 +315,7 @@ function _wptexturize_pushpop_element($text, &$stack, $disabled_elements) {
// Parse out the tag name.
$space = strpos( $text, ' ' );
if ( FALSE === $space ) {
if ( false === $space ) {
$space = -1;
} else {
$space -= $name_offset;

View File

@ -231,7 +231,7 @@ function get_shortcode_regex() {
$tagregexp = join( '|', array_map('preg_quote', $tagnames) );
// WARNING! Do not change this regex without changing do_shortcode_tag() and strip_shortcode_tag()
// Also, see shortcode_unautop() and shortcode.js.
// Also, see shortcode_unautop() and shortcode.js and wptexturize().
return
'\\[' // Opening bracket
. '(\\[?)' // 1: Optional second opening bracket for escaping shortcodes: [[tag]]

View File

@ -11,7 +11,6 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
function test_disable() {
$this->assertEquals('<pre>---</pre>', wptexturize('<pre>---</pre>'));
$this->assertEquals('[a]a&#8211;b[code]---[/code]a&#8211;b[/a]', wptexturize('[a]a--b[code]---[/code]a--b[/a]'));
$this->assertEquals('<pre><code></code>--</pre>', wptexturize('<pre><code></code>--</pre>'));
$this->assertEquals( '<code>---</code>', wptexturize( '<code>---</code>' ) );
@ -1209,28 +1208,20 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
'[gallery ...]]',
),
array(
'[/...]', // This would actually be ignored by the shortcode system. The decision to not texturize it is intentional, if not correct.
'[/...]',
'[/gallery ...]', // This would actually be ignored by the shortcode system. The decision to not texturize it is intentional, if not correct.
'[/gallery ...]',
),
array(
'[...]...[/...]', // These are potentially usable shortcodes.
'[...]&#8230;[/...]',
'[&#8230;]&#8230;[/&#8230;]',
),
array(
'[[...]]...[[/...]]', // Shortcode parsing will ignore the inner ]...[ part and treat this as a single escaped shortcode.
'[[...]]&#8230;[[/...]]',
'[[gallery]]...[[/gallery]]', // Shortcode parsing will ignore the inner ]...[ part and treat this as a single escaped shortcode.
'[[gallery]]&#8230;[[/gallery]]',
),
array(
'[[[...]]]...[[[/...]]]', // Again, shortcode parsing matches, but only the [[...] and [/...]] parts.
'[[[...]]]&#8230;[[[/...]]]',
),
array(
'[[code]...[/code]...', // These are potentially usable shortcodes. Unfortunately, the meaning of [[code] is ambiguous unless we run the entire shortcode regexp.
'[[code]&#8230;[/code]&#8230;',
),
array(
'[code]...[/code]]...', // These are potentially usable shortcodes. Unfortunately, the meaning of [/code]] is ambiguous unless we run the entire shortcode regexp.
'[code]...[/code]]...', // This test would not pass in 3.9 because the extra brace was always ignored by texturize.
'[[[gallery]]]...[[[/gallery]]]', // Again, shortcode parsing matches, but only the [[gallery] and [/gallery]] parts.
'[[[gallery]]]&#8230;[[[/gallery]]]',
),
array(
'[gal>ery ...]',
@ -1345,8 +1336,8 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
'[ but also catches the <b>styled &#8220;[quote]&#8221; here</b> ]',
),
array(
'[Let\'s get crazy<input>[plugin code="<a href=\'?a[]=100\'>hello</a>"]</input>world]',
'[Let&#8217;s get crazy<input>[plugin code="<a href=\'?a[]=100\'>hello</a>"]</input>world]',
'[Let\'s get crazy<input>[caption code="<a href=\'?a[]=100\'>hello</a>"]</input>world]', // caption shortcode is invalid here because it contains [] chars.
'[Let&#8217;s get crazy<input>[caption code=&#8221;<a href=\'?a[]=100\'>hello</a>&#8220;]</input>world]',
),
);
}
@ -1698,32 +1689,85 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
'<code>hello</span>---</span>',
),
array(
'<span>hello[/code]---</span>',
'<span>hello[/code]&#8212;</span>',
'<span><code>hello</code>---</span>',
'<span><code>hello</code>&#8212;</span>',
),
array(
'[/code]hello<span>---</span>',
'[/code]hello<span>&#8212;</span>',
'<code>hello</code>world<span>---</span>',
'<code>hello</code>world<span>&#8212;</span>',
),
);
}
/**
* Test disabling shortcode texturization.
*
* @ticket 29557
* @dataProvider data_unregistered_shortcodes
*/
function test_unregistered_shortcodes( $input, $output ) {
add_filter( 'no_texturize_shortcodes', array( $this, 'filter_shortcodes' ), 10, 1 );
$output = $this->assertEquals( $output, wptexturize( $input ) );
remove_filter( 'no_texturize_shortcodes', array( $this, 'filter_shortcodes' ), 10, 1 );
return $output;
}
function filter_shortcodes( $disabled ) {
$disabled[] = 'audio';
return $disabled;
}
function data_unregistered_shortcodes() {
return array(
array(
'[a]a--b[audio]---[/audio]a--b[/a]',
'[a]a&#8211;b[audio]---[/audio]a&#8211;b[/a]',
),
array(
'[code]hello[/code]---</span>',
'[code]hello[/code]&#8212;</span>',
'[code ...]...[/code]', // code is not a registered shortcode.
'[code &#8230;]&#8230;[/code]',
),
array(
'<span>hello</span>---[code]',
'<span>hello</span>&#8212;[code]',
'[hello ...]...[/hello]', // hello is not a registered shortcode.
'[hello &#8230;]&#8230;[/hello]',
),
array(
'<span>hello[code]---</span>',
'<span>hello[code]---</span>',
'[[audio]...[/audio]...', // These are potentially usable shortcodes. Unfortunately, the meaning of [[audio] is ambiguous unless we run the entire shortcode regexp.
'[[audio]&#8230;[/audio]&#8230;',
),
array(
'[code]hello<span>---</span>',
'[code]hello<span>---</span>',
'[audio]...[/audio]]...', // These are potentially usable shortcodes. Unfortunately, the meaning of [/audio]] is ambiguous unless we run the entire shortcode regexp.
'[audio]...[/audio]]...', // This test would not pass in 3.9 because the extra brace was always ignored by texturize.
),
array(
'[code]hello</span>---</span>',
'[code]hello</span>---</span>',
'<span>hello[/audio]---</span>',
'<span>hello[/audio]&#8212;</span>',
),
array(
'[/audio]hello<span>---</span>',
'[/audio]hello<span>&#8212;</span>',
),
array(
'[audio]hello[/audio]---</span>',
'[audio]hello[/audio]&#8212;</span>',
),
array(
'<span>hello</span>---[audio]',
'<span>hello</span>&#8212;[audio]',
),
array(
'<span>hello[audio]---</span>',
'<span>hello[audio]---</span>',
),
array(
'[audio]hello<span>---</span>',
'[audio]hello<span>---</span>',
),
array(
'[audio]hello</span>---</span>',
'[audio]hello</span>---</span>',
),
);
}