Protect newlines inside of `CDATA`. This was breaking things, notably inline JS that used comments for HTML standards compat.

* Tokenize newlines in `WP_Embed::autoembed()` before running `->autoembed_callback()`
* Tokenize newlines with placeholders in `wpautop()` 
* Introduce `wp_html_split()` to DRY the RegEx from `wp_replace_in_html_tags()` and `do_shortcodes_in_html_tags()`

Adds unit tests.

Props miqrogroove, kitchin, azaozz.
Fixes #33106.


git-svn-id: https://develop.svn.wordpress.org/trunk@33469 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Scott Taylor 2015-07-28 23:02:04 +00:00
parent 1558be9dfa
commit 4f814ec9ae
5 changed files with 225 additions and 46 deletions

View File

@ -129,6 +129,12 @@ class WP_Embed {
* `->maybe_make_link()` can return false on failure.
*/
public function shortcode( $attr, $url = '' ) {
// This filter can be used to output custom HTML instead of allowing oEmbed to run.
$custom = apply_filters( 'wp_embed_shortcode_custom', false, $attr, $url );
if ( false !== $custom ) {
return $custom;
}
$post = get_post();
if ( empty( $url ) && ! empty( $attr['src'] ) ) {
@ -318,11 +324,14 @@ class WP_Embed {
* @return string Potentially modified $content.
*/
public function autoembed( $content ) {
// Strip newlines from all elements.
$content = wp_replace_in_html_tags( $content, array( "\n" => " " ) );
// Replace line breaks from all HTML elements with placeholders.
$content = wp_replace_in_html_tags( $content, array( "\n" => '<!-- wp-line-break -->' ) );
// Find URLs that are on their own line.
return preg_replace_callback( '|^(\s*)(https?://[^\s"]+)(\s*)$|im', array( $this, 'autoembed_callback' ), $content );
$content = preg_replace_callback( '|^(\s*)(https?://[^\s"]+)(\s*)$|im', array( $this, 'autoembed_callback' ), $content );
// Put the line breaks back.
return str_replace( '<!-- wp-line-break -->', "\n", $content );
}
/**

View File

@ -504,8 +504,8 @@ function wpautop( $pee, $br = true ) {
// Standardize newline characters to "\n".
$pee = str_replace(array("\r\n", "\r"), "\n", $pee);
// Strip newlines from all elements.
$pee = wp_replace_in_html_tags( $pee, array( "\n" => " " ) );
// Find newlines in all elements and add placeholders.
$pee = wp_replace_in_html_tags( $pee, array( "\n" => " <!-- wpnl --> " ) );
// Collapse line breaks before and after <option> elements so they don't get autop'd.
if ( strpos( $pee, '<option' ) !== false ) {
@ -592,9 +592,59 @@ function wpautop( $pee, $br = true ) {
if ( !empty($pre_tags) )
$pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee);
// Restore newlines in all elements.
$pee = str_replace( " <!-- wpnl --> ", "\n", $pee );
return $pee;
}
/**
* Separate HTML elements and comments from the text.
*
* @since 4.2.4
*
* @param string $input The text which has to be formatted.
* @return array The formatted text.
*/
function wp_html_split( $input ) {
static $regex;
if ( ! isset( $regex ) ) {
$comments =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$cdata =
'!\[CDATA\[' // Start of comment, after the <.
. '[^\]]*+' // Consume non-].
. '(?:' // Unroll the loop: Consume everything until ]]> is found.
. '](?!]>)' // One ] not followed by end of comment.
. '[^\]]*+' // Consume non-].
. ')*+' // Loop possessively.
. '(?:]]>)?'; // End of comment. If not found, match all input.
$regex =
'/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comments // Find end of comment.
. '|'
. '(?(?=!\[CDATA\[)' // Is this a comment?
. $cdata // Find end of comment.
. '|'
. '[^>]*>?' // Find end of element. If not found, match all input.
. ')'
. ')'
. ')/s';
}
return preg_split( $regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE );
}
/**
* Replace characters or phrases within HTML elements only.
*
@ -606,25 +656,7 @@ function wpautop( $pee, $br = true ) {
*/
function wp_replace_in_html_tags( $haystack, $replace_pairs ) {
// Find all elements.
$comments =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$regex =
'/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comments // Find end of comment.
. '|'
. '[^>]*>?' // Find end of element. If not found, match all input.
. ')'
. ')/s';
$textarr = preg_split( $regex, $haystack, -1, PREG_SPLIT_DELIM_CAPTURE );
$textarr = wp_html_split( $haystack );
$changed = false;
// Optimize when searching for one item.

View File

@ -333,29 +333,10 @@ function do_shortcodes_in_html_tags( $content, $ignore_html ) {
$trans = array( '[' => '&#91;', ']' => '&#93;' );
$pattern = get_shortcode_regex();
$comment_regex =
'!' // Start of comment, after the <.
. '(?:' // Unroll the loop: Consume everything until --> is found.
. '-(?!->)' // Dash not followed by end of comment.
. '[^\-]*+' // Consume non-dashes.
. ')*+' // Loop possessively.
. '(?:-->)?'; // End of comment. If not found, match all input.
$regex =
'/(' // Capture the entire match.
. '<' // Find start of element.
. '(?(?=!--)' // Is this a comment?
. $comment_regex // Find end of comment.
. '|'
. '[^>]*>?' // Find end of element. If not found, match all input.
. ')'
. ')/s';
$textarr = preg_split( $regex, $content, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
$textarr = wp_html_split( $content );
foreach ( $textarr as &$element ) {
if ( '<' !== $element[0] ) {
if ( '' == $element || '<' !== $element[0] ) {
continue;
}
@ -370,7 +351,7 @@ function do_shortcodes_in_html_tags( $content, $ignore_html ) {
continue;
}
if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) ) {
if ( $ignore_html || '<!--' === substr( $element, 0, 4 ) || '<![CDATA[' === substr( $element, 0, 9 ) ) {
// Encode all [ and ] chars.
$element = strtr( $element, $trans );
continue;

View File

@ -399,4 +399,50 @@ Paragraph two.';
$this->assertEquals( $expected, trim( wpautop( $content ) ) );
}
/**
* Do not allow newlines within HTML elements to become mangled.
*
* @ticket 33106
* @dataProvider data_element_sanity
*/
function test_element_sanity( $input, $output ) {
return $this->assertEquals( $output, wpautop( $input ) );
}
function data_element_sanity() {
return array(
array(
"Hello <a\nhref='world'>",
"<p>Hello <a\nhref='world'></p>\n",
),
array(
"Hello <!-- a\nhref='world' -->",
"<p>Hello <!-- a\nhref='world' --></p>\n",
),
/* Block elements inside comments will fail this test in all versions, it's not a regression.
array(
"Hello <!-- <hr> a\nhref='world' -->",
"<p>Hello <!-- <hr> a\nhref='world' --></p>\n",
),
array(
"Hello <![CDATA[ <hr> a\nhttps://youtu.be/jgz0uSaOZbE\n ]]>",
"<p>Hello <![CDATA[ <hr> a\nhttps://youtu.be/jgz0uSaOZbE\n ]]></p>\n",
),
*/
array(
"Hello <![CDATA[ a\nhttps://youtu.be/jgz0uSaOZbE\n ]]>",
"<p>Hello <![CDATA[ a\nhttps://youtu.be/jgz0uSaOZbE\n ]]></p>\n",
),
array(
"Hello <![CDATA[ <!-- a\nhttps://youtu.be/jgz0uSaOZbE\n a\n9 ]]> -->",
"<p>Hello <![CDATA[ <!-- a\nhttps://youtu.be/jgz0uSaOZbE\n a\n9 ]]> --></p>\n",
),
array(
"Hello <![CDATA[ <!-- a\nhttps://youtu.be/jgz0uSaOZbE\n a\n9 --> a\n9 ]]>",
"<p>Hello <![CDATA[ <!-- a\nhttps://youtu.be/jgz0uSaOZbE\n a\n9 --> a\n9 ]]></p>\n",
),
);
}
}

View File

@ -585,4 +585,115 @@ VIDEO;
$this->assertEquals( 'This is a comment. / Это комментарий. / Βλέπετε ένα σχόλιο.', $post->post_excerpt );
}
/**
* @ticket 33016
*/
function test_multiline_cdata() {
global $wp_embed;
$content = <<<EOF
<script>// <![CDATA[
_my_function('data');
// ]]>
</script>
EOF;
$result = $wp_embed->autoembed( $content );
$this->assertEquals( $content, $result );
}
/**
* @ticket 33016
*/
function test_multiline_comment() {
global $wp_embed;
$content = <<<EOF
<script><!--
my_function();
// --> </script>
EOF;
$result = $wp_embed->autoembed( $content );
$this->assertEquals( $content, $result );
}
/**
* @ticket 33016
*/
function test_multiline_comment_with_embeds() {
$content = <<<EOF
Start.
[embed]http://www.youtube.com/embed/TEST01YRHA0[/embed]
<script><!--
my_function();
// --> </script>
http://www.youtube.com/embed/TEST02YRHA0
[embed]http://www.example.com/embed/TEST03YRHA0[/embed]
http://www.example.com/embed/TEST04YRHA0
Stop.
EOF;
$expected = <<<EOF
<p>Start.<br />
https://youtube.com/watch?v=TEST01YRHA0<br />
<script><!--
my_function();
// --> </script><br />
https://youtube.com/watch?v=TEST02YRHA0<br />
<a href="http://www.example.com/embed/TEST03YRHA0">http://www.example.com/embed/TEST03YRHA0</a><br />
http://www.example.com/embed/TEST04YRHA0<br />
Stop.</p>
EOF;
$result = apply_filters( 'the_content', $content );
$this->assertEquals( $expected, $result );
}
/**
* @ticket 33016
*/
function filter_wp_embed_shortcode_custom( $custom, $attr, $url ) {
if ( 'https://www.example.com/?video=1' == $url ) {
$custom = "<iframe src='$url'></iframe>";
}
return $custom;
}
/**
* @ticket 33016
*/
function test_oembed_explicit_media_link() {
global $wp_embed;
add_filter( 'wp_embed_shortcode_custom', array( $this, 'filter_wp_embed_shortcode_custom' ), 10, 3 );
$content = <<<EOF
https://www.example.com/?video=1
EOF;
$expected = <<<EOF
<iframe src='https://www.example.com/?video=1'></iframe>
EOF;
$result = $wp_embed->autoembed( $content );
$this->assertEquals( $expected, $result );
$content = <<<EOF
<a href="https://www.example.com/?video=1">https://www.example.com/?video=1</a>
<script>// <![CDATA[
_my_function('data');
myvar = 'Hello world
https://www.example.com/?video=1
don't break this';
// ]]>
</script>
EOF;
$result = $wp_embed->autoembed( $content );
$this->assertEquals( $content, $result );
remove_filter( 'wp_embed_shortcode_custom', array( $this, 'filter_wp_embed_shortcode_custom' ), 10 );
}
}