wptexturize()
improvements:
* Make sure that strings ending with a number and quotation mark get the proper smart quotes * Introduce `wptexturize_primes()`, a logic tree to determine whether or not "7'." represents seven feet, then converts the special char into either a prime char or a closing quote char. Adds unit tests. Props miqrogroove. Fixes #29256. git-svn-id: https://develop.svn.wordpress.org/trunk@32863 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
parent
da826f59a5
commit
ed8b9a8d27
@ -46,7 +46,17 @@ function wptexturize( $text, $reset = false ) {
|
||||
$dynamic_replacements = null,
|
||||
$default_no_texturize_tags = null,
|
||||
$default_no_texturize_shortcodes = null,
|
||||
$run_texturize = true;
|
||||
$run_texturize = true,
|
||||
$apos = null,
|
||||
$prime = null,
|
||||
$double_prime = null,
|
||||
$opening_quote = null,
|
||||
$closing_quote = null,
|
||||
$opening_single_quote = null,
|
||||
$closing_single_quote = null,
|
||||
$open_q_flag = '<!--oq-->',
|
||||
$open_sq_flag = '<!--osq-->',
|
||||
$apos_flag = '<!--apos-->';
|
||||
|
||||
// If there's nothing to do, just stop.
|
||||
if ( empty( $text ) || false === $run_texturize ) {
|
||||
@ -129,40 +139,30 @@ function wptexturize( $text, $reset = false ) {
|
||||
|
||||
// '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation.
|
||||
if ( "'" !== $apos || "'" !== $closing_single_quote ) {
|
||||
$dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_single_quote;
|
||||
$dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote;
|
||||
}
|
||||
if ( "'" !== $apos || '"' !== $closing_quote ) {
|
||||
$dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos . '$1' . $closing_quote;
|
||||
$dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote;
|
||||
}
|
||||
|
||||
// '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0.
|
||||
if ( "'" !== $apos ) {
|
||||
$dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos;
|
||||
$dynamic[ '/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/' ] = $apos_flag;
|
||||
}
|
||||
|
||||
// Quoted Numbers like '0.42'
|
||||
if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) {
|
||||
$dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $opening_single_quote . '$1' . $closing_single_quote;
|
||||
$dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote;
|
||||
}
|
||||
|
||||
// Single quote at start, or preceded by (, {, <, [, ", -, or spaces.
|
||||
if ( "'" !== $opening_single_quote ) {
|
||||
$dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $opening_single_quote;
|
||||
$dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $open_sq_flag;
|
||||
}
|
||||
|
||||
// Apostrophe in a word. No spaces, double apostrophes, or other punctuation.
|
||||
if ( "'" !== $apos ) {
|
||||
$dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos;
|
||||
}
|
||||
|
||||
// 9' (prime)
|
||||
if ( "'" !== $prime ) {
|
||||
$dynamic[ '/(?<=\d)\'/' ] = $prime;
|
||||
}
|
||||
|
||||
// Single quotes followed by spaces or ending punctuation.
|
||||
if ( "'" !== $closing_single_quote ) {
|
||||
$dynamic[ '/\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $closing_single_quote;
|
||||
$dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag;
|
||||
}
|
||||
|
||||
$dynamic_characters['apos'] = array_keys( $dynamic );
|
||||
@ -171,22 +171,12 @@ function wptexturize( $text, $reset = false ) {
|
||||
|
||||
// Quoted Numbers like "42"
|
||||
if ( '"' !== $opening_quote && '"' !== $closing_quote ) {
|
||||
$dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $opening_quote . '$1' . $closing_quote;
|
||||
}
|
||||
|
||||
// 9" (double prime)
|
||||
if ( '"' !== $double_prime ) {
|
||||
$dynamic[ '/(?<=\d)"/' ] = $double_prime;
|
||||
$dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote;
|
||||
}
|
||||
|
||||
// Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces.
|
||||
if ( '"' !== $opening_quote ) {
|
||||
$dynamic[ '/(?<=\A|[([{\-]|<|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote;
|
||||
}
|
||||
|
||||
// Any remaining double quotes.
|
||||
if ( '"' !== $closing_quote ) {
|
||||
$dynamic[ '/"/' ] = $closing_quote;
|
||||
$dynamic[ '/(?<=\A|[([{\-]|<|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag;
|
||||
}
|
||||
|
||||
$dynamic_characters['quote'] = array_keys( $dynamic );
|
||||
@ -300,9 +290,14 @@ function wptexturize( $text, $reset = false ) {
|
||||
|
||||
if ( false !== strpos( $curl, "'" ) ) {
|
||||
$curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl );
|
||||
$curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote );
|
||||
$curl = str_replace( $apos_flag, $apos, $curl );
|
||||
$curl = str_replace( $open_sq_flag, $opening_single_quote, $curl );
|
||||
}
|
||||
if ( false !== strpos( $curl, '"' ) ) {
|
||||
$curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl );
|
||||
$curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote );
|
||||
$curl = str_replace( $open_q_flag, $opening_quote, $curl );
|
||||
}
|
||||
if ( false !== strpos( $curl, '-' ) ) {
|
||||
$curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl );
|
||||
@ -321,6 +316,74 @@ function wptexturize( $text, $reset = false ) {
|
||||
return preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $text );
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements a logic tree to determine whether or not "7'." represents seven feet,
|
||||
* then converts the special char into either a prime char or a closing quote char.
|
||||
*
|
||||
* @since 4.3.0
|
||||
*
|
||||
* @param string $haystack The plain text to be searched.
|
||||
* @param string $needle The character to search for such as ' or ".
|
||||
* @param string $prime The prime char to use for replacement.
|
||||
* @param string $open_quote The opening quote char. Opening quote replacement must be accomplished already.
|
||||
* @param string $close_quote The closing quote char to use for replacement.
|
||||
* @return string The $haystack value after primes and quotes replacements.
|
||||
*/
|
||||
function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) {
|
||||
$spaces = wp_spaces_regexp();
|
||||
$flag = '<!--wp-prime-or-quote-->';
|
||||
$quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ")/";
|
||||
$prime_pattern = "/(?<=\\d)$needle/";
|
||||
$flag_after_digit = "/(?<=\\d)$flag/";
|
||||
$flag_no_digit = "/(?<!\\d)$flag/";
|
||||
|
||||
$sentences = explode( $open_quote, $haystack );
|
||||
|
||||
foreach( $sentences as $key => &$sentence ) {
|
||||
if ( false === strpos( $sentence, $needle ) ) {
|
||||
continue;
|
||||
} elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) {
|
||||
$sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count );
|
||||
if ( $count > 1 ) {
|
||||
// This sentence appears to have multiple closing quotes. Attempt Vulcan logic.
|
||||
$sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 );
|
||||
if ( 0 === $count2 ) {
|
||||
// Try looking for a quote followed by a period.
|
||||
$count2 = substr_count( $sentence, "$flag." );
|
||||
if ( $count2 > 0 ) {
|
||||
// Assume the rightmost quote-period match is the end of quotation.
|
||||
$pos = strrpos( $sentence, "$flag." );
|
||||
} else {
|
||||
// When all else fails, make the rightmost candidate a closing quote.
|
||||
// This is most likely to be problematic in the context of bug #18549.
|
||||
$pos = strrpos( $sentence, $flag );
|
||||
}
|
||||
$sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) );
|
||||
}
|
||||
// Use conventional replacement on any remaining primes and quotes.
|
||||
$sentence = preg_replace( $prime_pattern, $prime, $sentence );
|
||||
$sentence = preg_replace( $flag_after_digit, $prime, $sentence );
|
||||
$sentence = str_replace( $flag, $close_quote, $sentence );
|
||||
} elseif ( 1 == $count ) {
|
||||
// Found only one closing quote candidate, so give it priority over primes.
|
||||
$sentence = str_replace( $flag, $close_quote, $sentence );
|
||||
$sentence = preg_replace( $prime_pattern, $prime, $sentence );
|
||||
} else {
|
||||
// No closing quotes found. Just run primes pattern.
|
||||
$sentence = preg_replace( $prime_pattern, $prime, $sentence );
|
||||
}
|
||||
} else {
|
||||
$sentence = preg_replace( $prime_pattern, $prime, $sentence );
|
||||
$sentence = preg_replace( $quote_pattern, $close_quote, $sentence );
|
||||
}
|
||||
if ( '"' == $needle && false !== strpos( $sentence, '"' ) ) {
|
||||
$sentence = str_replace( '"', $close_quote, $sentence );
|
||||
}
|
||||
}
|
||||
|
||||
return implode( $open_quote, $sentences );
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for disabled element tags. Push element to stack on tag open and pop
|
||||
* on tag close.
|
||||
|
@ -90,8 +90,8 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
|
||||
//$this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”… and ellipses.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"... and ellipses.'));
|
||||
//$this->assertEquals('Here is “a test <a href="http://example.com">with a link</a>”.', wptexturize('Here is "a test <a href="http://example.com">with a link</a>".'));
|
||||
//$this->assertEquals('Here is “<a href="http://example.com">a test with a link</a>”and a work stuck to the end.', wptexturize('Here is "<a href="http://example.com">a test with a link</a>"and a work stuck to the end.'));
|
||||
//$this->assertEquals('A test with a finishing number, “like 23”.', wptexturize('A test with a finishing number, "like 23".'));
|
||||
//$this->assertEquals('A test with a number, “like 62”, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.'));
|
||||
$this->assertEquals('A test with a finishing number, “like 23”.', wptexturize('A test with a finishing number, "like 23".'));
|
||||
$this->assertEquals('A test with a number, “like 62”, is nice to have.', wptexturize('A test with a number, "like 62", is nice to have.'));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -121,7 +121,7 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
|
||||
$this->assertEquals('‘Class of ’99’?', wptexturize("'Class of '99'?"));
|
||||
$this->assertEquals('‘Class of ’99’s’', wptexturize("'Class of '99's'"));
|
||||
$this->assertEquals('‘Class of ’99’s’', wptexturize("'Class of '99’s'"));
|
||||
//$this->assertEquals('“Class of 99”', wptexturize("\"Class of 99\""));
|
||||
$this->assertEquals('“Class of 99”', wptexturize("\"Class of 99\""));
|
||||
$this->assertEquals('“Class of ’99”', wptexturize("\"Class of '99\""));
|
||||
$this->assertEquals('{“Class of ’99”}', wptexturize("{\"Class of '99\"}"));
|
||||
$this->assertEquals(' “Class of ’99” ', wptexturize(" \"Class of '99\" "));
|
||||
@ -1900,4 +1900,152 @@ class Tests_Formatting_WPTexturize extends WP_UnitTestCase {
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure primes logic is not too greedy at the end of a quotation.
|
||||
*
|
||||
* @ticket 29256
|
||||
* @dataProvider data_primes_vs_quotes
|
||||
*/
|
||||
function test_primes_vs_quotes( $input, $output ) {
|
||||
return $this->assertEquals( $output, wptexturize( $input ) );
|
||||
}
|
||||
|
||||
function data_primes_vs_quotes() {
|
||||
return array(
|
||||
array(
|
||||
"George's porch is 99' long.",
|
||||
"George’s porch is 99′ long.",
|
||||
),
|
||||
array(
|
||||
'The best year "was that time in 2012" when everyone partied, he said.',
|
||||
'The best year “was that time in 2012” when everyone partied, he said.',
|
||||
),
|
||||
array(
|
||||
"I need 4 x 20' = 80' of trim.", // Works only with a space before the = char.
|
||||
"I need 4 x 20′ = 80′ of trim.",
|
||||
),
|
||||
array(
|
||||
'"Lorem ipsum dolor sit amet 1234"',
|
||||
'“Lorem ipsum dolor sit amet 1234”',
|
||||
),
|
||||
array(
|
||||
"'Etiam eu egestas dui 1234'",
|
||||
"‘Etiam eu egestas dui 1234’",
|
||||
),
|
||||
array(
|
||||
'according to our source, "33% of all students scored less than 50" on the test.',
|
||||
'according to our source, “33% of all students scored less than 50” on the test.',
|
||||
),
|
||||
array(
|
||||
"The doctor said, 'An average height is between 5' and 6' in study group 7'. He then produced a 6' chart of averages. A man of 7', incredibly, is very possible.",
|
||||
"The doctor said, ‘An average height is between 5′ and 6′ in study group 7’. He then produced a 6′ chart of averages. A man of 7′, incredibly, is very possible.",
|
||||
),
|
||||
array(
|
||||
'Pirates have voted on "The Expendables 3" with their clicks -- and it turns out the Sylvester Stallone-starrer hasn\'t been astoundingly popular among digital thieves, relatively speaking.
|
||||
|
||||
As of Sunday, 5.12 million people worldwide had pirated "Expendables 3" since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
|
||||
|
||||
That likely contributed to the action movie\'s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after "Captain America: The Winter Soldier" (7.31 million), "Divergent" (6.29 million) and "The Amazing Spider-Man 2" (5.88 million). Moreover, that\'s despite "Expendables 3" becoming available more than three weeks prior to the film\'s U.S. theatrical debut.
|
||||
|
||||
String with a number followed by a single quote \'Expendables 3\' vestibulum in arcu mi.',
|
||||
|
||||
'Pirates have voted on “The Expendables 3” with their clicks — and it turns out the Sylvester Stallone-starrer hasn’t been astoundingly popular among digital thieves, relatively speaking.
|
||||
|
||||
As of Sunday, 5.12 million people worldwide had pirated “Expendables 3” since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
|
||||
|
||||
That likely contributed to the action movie’s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after “Captain America: The Winter Soldier” (7.31 million), “Divergent” (6.29 million) and “The Amazing Spider-Man 2” (5.88 million). Moreover, that’s despite “Expendables 3” becoming available more than three weeks prior to the film’s U.S. theatrical debut.
|
||||
|
||||
String with a number followed by a single quote ‘Expendables 3’ vestibulum in arcu mi.',
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure translation actually works.
|
||||
*
|
||||
* Also make sure opening and closing quotes are allowed to be identical.
|
||||
*
|
||||
* @ticket 29256
|
||||
* @dataProvider data_primes_quotes_translation
|
||||
*/
|
||||
function test_primes_quotes_translation( $input, $output ) {
|
||||
add_filter( 'gettext_with_context', array( $this, 'filter_translate2' ), 10, 4 );
|
||||
|
||||
$result = wptexturize( $input, true );
|
||||
|
||||
remove_filter( 'gettext_with_context', array( $this, 'filter_translate2' ), 10, 4 );
|
||||
wptexturize( 'reset', true );
|
||||
|
||||
return $this->assertEquals( $output, $result );
|
||||
}
|
||||
|
||||
function filter_translate2( $translations, $text, $context, $domain ) {
|
||||
switch ($text) {
|
||||
case '–' : return '!endash!';
|
||||
case '—' : return '!emdash!';
|
||||
case '‘' : return '!q1!';
|
||||
case '’' :
|
||||
if ( 'apostrophe' == $context ) {
|
||||
return '!apos!';
|
||||
} else {
|
||||
return '!q1!';
|
||||
}
|
||||
case '“' : return '!q2!';
|
||||
case '”' : return '!q2!';
|
||||
case '′' : return '!prime1!';
|
||||
case '″' : return '!prime2!';
|
||||
default : return $translations;
|
||||
}
|
||||
}
|
||||
|
||||
function data_primes_quotes_translation() {
|
||||
return array(
|
||||
array(
|
||||
"George's porch is 99' long.",
|
||||
"George!apos!s porch is 99!prime1! long.",
|
||||
),
|
||||
array(
|
||||
'The best year "was that time in 2012" when everyone partied, he said.',
|
||||
'The best year !q2!was that time in 2012!q2! when everyone partied, he said.',
|
||||
),
|
||||
array(
|
||||
"I need 4 x 20' = 80' of trim.", // Works only with a space before the = char.
|
||||
"I need 4 x 20!prime1! = 80!prime1! of trim.",
|
||||
),
|
||||
array(
|
||||
'"Lorem ipsum dolor sit amet 1234"',
|
||||
'!q2!Lorem ipsum dolor sit amet 1234!q2!',
|
||||
),
|
||||
array(
|
||||
"'Etiam eu egestas dui 1234'",
|
||||
"!q1!Etiam eu egestas dui 1234!q1!",
|
||||
),
|
||||
array(
|
||||
'according to our source, "33% of all students scored less than 50" on the test.',
|
||||
'according to our source, !q2!33% of all students scored less than 50!q2! on the test.',
|
||||
),
|
||||
array(
|
||||
"The doctor said, 'An average height is between 5' and 6' in study group 7'. He then produced a 6' chart of averages. A man of 7', incredibly, is very possible.",
|
||||
"The doctor said, !q1!An average height is between 5!prime1! and 6!prime1! in study group 7!q1!. He then produced a 6!prime1! chart of averages. A man of 7!prime1!, incredibly, is very possible.",
|
||||
),
|
||||
array(
|
||||
'Pirates have voted on "The Expendables 3" with their clicks -- and it turns out the Sylvester Stallone-starrer hasn\'t been astoundingly popular among digital thieves, relatively speaking.
|
||||
|
||||
As of Sunday, 5.12 million people worldwide had pirated "Expendables 3" since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
|
||||
|
||||
That likely contributed to the action movie\'s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after "Captain America: The Winter Soldier" (7.31 million), "Divergent" (6.29 million) and "The Amazing Spider-Man 2" (5.88 million). Moreover, that\'s despite "Expendables 3" becoming available more than three weeks prior to the film\'s U.S. theatrical debut.
|
||||
|
||||
String with a number followed by a single quote \'Expendables 3\' vestibulum in arcu mi.',
|
||||
|
||||
'Pirates have voted on !q2!The Expendables 3!q2! with their clicks !emdash! and it turns out the Sylvester Stallone-starrer hasn!apos!t been astoundingly popular among digital thieves, relatively speaking.
|
||||
|
||||
As of Sunday, 5.12 million people worldwide had pirated !q2!Expendables 3!q2! since a high-quality copy hit torrent-sharing sites July 23, according to piracy-tracking firm Excipio.
|
||||
|
||||
That likely contributed to the action movie!apos!s dismal box-office debut this weekend. But over the same July 23-Aug. 18 time period, the movie was No. 4 in downloads, after !q2!Captain America: The Winter Soldier!q2! (7.31 million), !q2!Divergent!q2! (6.29 million) and !q2!The Amazing Spider-Man 2!q2! (5.88 million). Moreover, that!apos!s despite !q2!Expendables 3!q2! becoming available more than three weeks prior to the film!apos!s U.S. theatrical debut.
|
||||
|
||||
String with a number followed by a single quote !q1!Expendables 3!q1! vestibulum in arcu mi.',
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user