Optimize regexp usage in `wptexturize()` for a "3x Performance Boost."

Props miqrogroove.
See #28724.


git-svn-id: https://develop.svn.wordpress.org/trunk@28986 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Scott Taylor 2014-07-04 01:14:08 +00:00
parent 4b58f47a5c
commit 4e8afc6caa
1 changed files with 41 additions and 20 deletions

View File

@ -96,11 +96,13 @@ function wptexturize($text, $reset = false) {
$static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney ); $static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney );
$static_replacements = array_merge( array( '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); $static_replacements = array_merge( array( '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace );
$spaces = wp_spaces_regexp();
// Pattern-based replacements of characters. // Pattern-based replacements of characters.
// Sort the remaining patterns into several arrays for performance tuning.
$dynamic_characters = array( 'apos' => array(), 'quote' => array(), 'dash' => array() );
$dynamic_replacements = array( 'apos' => array(), 'quote' => array(), 'dash' => array() );
$dynamic = array(); $dynamic = array();
$spaces = wp_spaces_regexp();
// '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation.
if ( "'" !== $apos || "'" !== $closing_single_quote ) { if ( "'" !== $apos || "'" !== $closing_single_quote ) {
@ -115,10 +117,7 @@ function wptexturize($text, $reset = false) {
$dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos; $dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos;
} }
// Quoted Numbers like "42" or '42.00' // Quoted Numbers like '0.42'
if ( '"' !== $opening_quote && '"' !== $closing_quote ) {
$dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $opening_quote . '$1' . $closing_quote;
}
if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) {
$dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote; $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote;
} }
@ -133,16 +132,30 @@ function wptexturize($text, $reset = false) {
$dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos; $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos;
} }
// 9" (double prime)
if ( '"' !== $double_prime ) {
$dynamic[ '/(?<=\d)"/' ] = $double_prime;
}
// 9' (prime) // 9' (prime)
if ( "'" !== $prime ) { if ( "'" !== $prime ) {
$dynamic[ '/(?<=\d)\'/' ] = $prime; $dynamic[ '/(?<=\d)\'/' ] = $prime;
} }
// Single quotes followed by spaces or ending punctuation.
if ( "'" !== $closing_single_quote ) {
$dynamic[ '/\'(?=\Z|[.,)}\-\]]|&gt;|' . $spaces . ')/' ] = $closing_single_quote;
}
$dynamic_characters['apos'] = array_keys( $dynamic );
$dynamic_replacements['apos'] = array_values( $dynamic );
$dynamic = array();
// Quoted Numbers like "42"
if ( '"' !== $opening_quote && '"' !== $closing_quote ) {
$dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $opening_quote . '$1' . $closing_quote;
}
// 9" (double prime)
if ( '"' !== $double_prime ) {
$dynamic[ '/(?<=\d)"/' ] = $double_prime;
}
// Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces.
if ( '"' !== $opening_quote ) { if ( '"' !== $opening_quote ) {
$dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote; $dynamic[ '/(?<=\A|[([{\-]|&lt;|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote;
@ -152,20 +165,19 @@ function wptexturize($text, $reset = false) {
if ( '"' !== $closing_quote ) { if ( '"' !== $closing_quote ) {
$dynamic[ '/"/' ] = $closing_quote; $dynamic[ '/"/' ] = $closing_quote;
} }
// Single quotes followed by spaces or ending punctuation. $dynamic_characters['quote'] = array_keys( $dynamic );
if ( "'" !== $closing_single_quote ) { $dynamic_replacements['quote'] = array_values( $dynamic );
$dynamic[ '/\'(?=\Z|[.,)}\-\]]|&gt;|' . $spaces . ')/' ] = $closing_single_quote; $dynamic = array();
}
// Dashes and spaces // Dashes and spaces
$dynamic[ '/---/' ] = $em_dash; $dynamic[ '/---/' ] = $em_dash;
$dynamic[ '/(?<=' . $spaces . ')--(?=' . $spaces . ')/' ] = $em_dash; $dynamic[ '/(?<=' . $spaces . ')--(?=' . $spaces . ')/' ] = $em_dash;
$dynamic[ '/(?<!xn)--/' ] = $en_dash; $dynamic[ '/(?<!xn)--/' ] = $en_dash;
$dynamic[ '/(?<=' . $spaces . ')-(?=' . $spaces . ')/' ] = $en_dash; $dynamic[ '/(?<=' . $spaces . ')-(?=' . $spaces . ')/' ] = $en_dash;
$dynamic_characters = array_keys( $dynamic ); $dynamic_characters['dash'] = array_keys( $dynamic );
$dynamic_replacements = array_values( $dynamic ); $dynamic_replacements['dash'] = array_values( $dynamic );
} }
// Must do this every time in case plugins use these filters in a context sensitive manner // Must do this every time in case plugins use these filters in a context sensitive manner
@ -237,7 +249,16 @@ function wptexturize($text, $reset = false) {
// This is neither a delimeter, nor is this content inside of no_texturize pairs. Do texturize. // This is neither a delimeter, nor is this content inside of no_texturize pairs. Do texturize.
$curl = str_replace( $static_characters, $static_replacements, $curl ); $curl = str_replace( $static_characters, $static_replacements, $curl );
$curl = preg_replace( $dynamic_characters, $dynamic_replacements, $curl );
if ( false !== strpos( $curl, "'" ) ) {
$curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
}
if ( false !== strpos( $curl, '"' ) ) {
$curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );
}
if ( false !== strpos( $curl, '-' ) ) {
$curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl );
}
// 9x9 (times), but never 0x9999 // 9x9 (times), but never 0x9999
if ( 1 === preg_match( '/(?<=\d)x-?\d/', $curl ) ) { if ( 1 === preg_match( '/(?<=\d)x-?\d/', $curl ) ) {