diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 5cbdc9721c..2c8a2b9aa7 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -1468,49 +1468,59 @@ function _make_email_clickable_cb($matches) { * * @since 0.71 * - * @param string $ret Content to convert URIs. + * @param string $text Content to convert URIs. * @return string Content with converted URIs. */ -function make_clickable( $ret ) { - // Long strings might contain expensive edge cases ... - if ( 10000 < strlen( $ret ) ) { - $r = ''; - // ... break it up - foreach ( _split_str_by_whitespace( $ret, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses - if ( 2101 < strlen( $chunk ) ) { - $r .= $chunk; // Too big, no whitespace: bail. - } else { - $r .= make_clickable( $chunk ); - } +function make_clickable( $text ) { + $r = ''; + $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // split out HTML tags + foreach ( $textarr as $piece ) { + if ( empty( $piece ) || ( $piece[0] == '<' && ! preg_match('|^<\s*[\w]{1,20}+://|', $piece) ) ) { + $r .= $piece; + continue; + } + + // Long strings might contain expensive edge cases ... + if ( 10000 < strlen( $piece ) ) { + // ... break it up + foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses + if ( 2101 < strlen( $chunk ) ) { + $r .= $chunk; // Too big, no whitespace: bail. + } else { + $r .= make_clickable( $chunk ); + } + } + } else { + $ret = " $piece "; // Pad with whitespace to simplify the regexes + + $url_clickable = '~ + ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation + ( # 2: URL + [\\w]{1,20}+:// # Scheme and hier-part prefix + (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long + [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character + (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character + [\'.,;:!?)] # Punctuation URL character + [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character + )* + ) + (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing) + ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character. + // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times. + + $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret ); + + $ret = preg_replace_callback( '#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret ); + $ret = preg_replace_callback( '#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret ); + + $ret = substr( $ret, 1, -1 ); // Remove our whitespace padding. + $r .= $ret; } - return $r; } - $ret = " $ret "; // Pad with whitespace to simplify the regexes - - $url_clickable = '~ - ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation - ( # 2: URL - [\\w]{1,20}+:// # Scheme and hier-part prefix - (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long - [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character - (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character - [\'.,;:!?)] # Punctuation URL character - [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character - )* - ) - (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing) - ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character. - // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times. - - $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret ); - - $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret); - $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret); - // Cleanup of accidental links within links - $ret = preg_replace("#(]+?>|>))]+?>([^>]+?)#i", "$1$3", $ret); - return substr( $ret, 1, -1 ); // Remove our whitespace padding. + $r = preg_replace( '#(]+?>|>))]+?>([^>]+?)#i', "$1$3", $r ); + return $r; } /**