I18N: Introduce the `Plural_Forms` class.

Historically, we've evaluated the plural forms for each language using `create_function()`. This is being deprecated in PHP 7.2, so needs to be replaced.

The `Plural_Forms` class parses the `Plural-Forms` header from the PO file, and internally caches the result of all subsequent plural form tests, allowing it to match the performance of the existing code.

Props rmccue.
Fixes #41562.



git-svn-id: https://develop.svn.wordpress.org/trunk@41722 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Gary Pendergast 2017-10-04 01:29:59 +00:00
parent 3fcfefd05c
commit f3a52234ea
8 changed files with 600 additions and 6 deletions

View File

@ -0,0 +1,343 @@
<?php
/**
* A gettext Plural-Forms parser.
*
* @since 4.9.0
*/
class Plural_Forms {
/**
* Operator characters.
*
* @since 4.9.0
* @var string OP_CHARS Operator characters.
*/
const OP_CHARS = '|&><!=%?:';
/**
* Valid number characters.
*
* @since 4.9.0
* @var string NUM_CHARS Valid number characters.
*/
const NUM_CHARS = '0123456789';
/**
* Operator precedence.
*
* Operator precedence from highest to lowest. Higher numbers indicate
* higher precedence, and are executed first.
*
* @see https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B#Operator_precedence
*
* @since 4.9.0
* @var array $op_precedence Operator precedence from highest to lowest.
*/
protected static $op_precedence = array(
'%' => 6,
'<' => 5,
'<=' => 5,
'>' => 5,
'>=' => 5,
'==' => 4,
'!=' => 4,
'&&' => 3,
'||' => 2,
'?:' => 1,
'?' => 1,
'(' => 0,
')' => 0,
);
/**
* Tokens generated from the string.
*
* @since 4.9.0
* @var array $tokens List of tokens.
*/
protected $tokens = array();
/**
* Cache for repeated calls to the function.
*
* @since 4.9.0
* @var array $cache Map of $n => $result
*/
protected $cache = [];
/**
* Constructor.
*
* @since 4.9.0
*
* @param string $str Plural function (just the bit after `plural=` from Plural-Forms)
*/
public function __construct( $str ) {
$this->parse( $str );
}
/**
* Parse a Plural-Forms string into tokens.
*
* Uses the shunting-yard algorithm to convert the string to Reverse Polish
* Notation tokens.
*
* @since 4.9.0
*
* @param string $str String to parse.
*/
protected function parse( $str ) {
$pos = 0;
$len = strlen( $str );
// Convert infix operators to postfix using the shunting-yard algorithm.
$output = array();
$stack = array();
while ( $pos < $len ) {
$next = substr( $str, $pos, 1 );
switch ( $next ) {
// Ignore whitespace
case ' ':
case "\t":
$pos++;
break;
// Variable (n)
case 'n':
$output[] = [ 'var' ];
$pos++;
break;
// Parentheses
case '(':
$stack[] = $next;
$pos++;
break;
case ')':
$found = false;
while ( ! empty( $stack ) ) {
$o2 = $stack[ count( $stack ) - 1 ];
if ( $o2 !== '(' ) {
$output[] = [ 'op', array_pop( $stack ) ];
continue;
}
// Discard open paren.
array_pop( $stack );
$found = true;
break;
}
if ( ! $found ) {
throw new Exception( 'Mismatched parentheses' );
}
$pos++;
break;
// Operators
case '|':
case '&':
case '>':
case '<':
case '!':
case '=':
case '%':
case '?':
$end_operator = strspn( $str, self::OP_CHARS, $pos );
$operator = substr( $str, $pos, $end_operator );
if ( ! array_key_exists( $operator, self::$op_precedence ) ) {
throw new Exception( sprintf( 'Unknown operator "%s"', $operator ) );
}
while ( ! empty( $stack ) ) {
$o2 = $stack[ count( $stack ) - 1 ];
// Ternary is right-associative in C
if ( $operator === '?:' || $operator === '?' ) {
if ( self::$op_precedence[ $operator ] >= self::$op_precedence[ $o2 ] ) {
break;
}
} elseif ( self::$op_precedence[ $operator ] > self::$op_precedence[ $o2 ] ) {
break;
}
$output[] = [ 'op', array_pop( $stack ) ];
}
$stack[] = $operator;
$pos += $end_operator;
break;
// Ternary "else"
case ':':
$found = false;
$s_pos = count( $stack ) - 1;
while ( $s_pos >= 0 ) {
$o2 = $stack[ $s_pos ];
if ( $o2 !== '?' ) {
$output[] = [ 'op', array_pop( $stack ) ];
$s_pos--;
continue;
}
// Replace.
$stack[ $s_pos ] = '?:';
$found = true;
break;
}
if ( ! $found ) {
throw new Exception( 'Missing starting "?" ternary operator' );
}
$pos++;
break;
// Default - number or invalid
default:
if ( $next >= '0' && $next <= '9' ) {
$span = strspn( $str, self::NUM_CHARS, $pos );
$output[] = [ 'value', intval( substr( $str, $pos, $span ) ) ];
$pos += $span;
continue;
}
throw new Exception( sprintf( 'Unknown symbol "%s"', $next ) );
}
}
while ( ! empty( $stack ) ) {
$o2 = array_pop( $stack );
if ( $o2 === '(' || $o2 === ')' ) {
throw new Exception( 'Mismatched parentheses' );
}
$output[] = [ 'op', $o2 ];
}
$this->tokens = $output;
}
/**
* Get the plural form for a number.
*
* Caches the value for repeated calls.
*
* @since 4.9.0
*
* @param int $num Number to get plural form for.
* @return int Plural form value.
*/
public function get( $num ) {
if ( isset( $this->cache[ $num ] ) ) {
return $this->cache[ $num ];
}
return $this->cache[ $num ] = $this->execute( $num );
}
/**
* Execute the plural form function.
*
* @since 4.9.0
*
* @param int $n Variable "n" to substitute.
* @return int Plural form value.
*/
public function execute( $n ) {
$stack = array();
$i = 0;
$total = count( $this->tokens );
while ( $i < $total ) {
$next = $this->tokens[$i];
$i++;
if ( $next[0] === 'var' ) {
$stack[] = $n;
continue;
} elseif ( $next[0] === 'value' ) {
$stack[] = $next[1];
continue;
}
// Only operators left.
switch ( $next[1] ) {
case '%':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 % $v2;
break;
case '||':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 || $v2;
break;
case '&&':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 && $v2;
break;
case '<':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 < $v2;
break;
case '<=':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 <= $v2;
break;
case '>':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 > $v2;
break;
case '>=':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 >= $v2;
break;
case '!=':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 != $v2;
break;
case '==':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 == $v2;
break;
case '?:':
$v3 = array_pop( $stack );
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 ? $v2 : $v3;
break;
default:
throw new Exception( sprintf( 'Unknown operator "%s"', $next[1] ) );
}
}
if ( count( $stack ) !== 1 ) {
throw new Exception( 'Too many values remaining on the stack' );
}
return (int) $stack[0];
}
}

View File

@ -7,6 +7,7 @@
* @subpackage translations
*/
require_once dirname(__FILE__) . '/plural-forms.php';
require_once dirname(__FILE__) . '/entry.php';
if ( ! class_exists( 'Translations', false ) ):
@ -187,7 +188,7 @@ class Gettext_Translations extends Translations {
function nplurals_and_expression_from_header($header) {
if (preg_match('/^\s*nplurals\s*=\s*(\d+)\s*;\s+plural\s*=\s*(.+)$/', $header, $matches)) {
$nplurals = (int)$matches[1];
$expression = trim($this->parenthesize_plural_exression($matches[2]));
$expression = trim( $matches[2] );
return array($nplurals, $expression);
} else {
return array(2, 'n != 1');
@ -201,11 +202,13 @@ class Gettext_Translations extends Translations {
* @param string $expression
*/
function make_plural_form_function($nplurals, $expression) {
$expression = str_replace('n', '$n', $expression);
$func_body = "
\$index = (int)($expression);
return (\$index < $nplurals)? \$index : $nplurals - 1;";
return create_function('$n', $func_body);
try {
$handler = new Plural_Forms( rtrim( $expression, ';' ) );
return array( $handler, 'get' );
} catch ( Exception $e ) {
// Fall back to default plural-form function.
return $this->make_plural_form_function( 2, 'n != 1' );
}
}
/**

View File

@ -1,5 +1,8 @@
<?php
/**
* @group pomo
*/
class Tests_POMO_MO extends WP_UnitTestCase {
function test_mo_simple() {

View File

@ -1,5 +1,8 @@
<?php
/**
* @group pomo
*/
class Tests_POMO_NOOPTranslations extends WP_UnitTestCase {
function setUp() {
parent::setUp();

View File

@ -0,0 +1,232 @@
<?php
/**
* @group pomo
*/
class PluralFormsTest extends WP_UnitTestCase {
/**
* Legacy plural form function.
*
* @param int $nplurals
* @param string $expression
*/
protected static function make_plural_form_function($nplurals, $expression) {
$expression = str_replace('n', '$n', $expression);
$func_body = "
\$index = (int)($expression);
return (\$index < $nplurals)? \$index : $nplurals - 1;";
return create_function('$n', $func_body);
}
/**
* Parenthesize plural expression.
*
* Legacy workaround for PHP's flipped precedence order for ternary.
*
* @param string $expression the expression without parentheses
* @return string the expression with parentheses added
*/
protected static function parenthesize_plural_expression($expression) {
$expression .= ';';
$res = '';
$depth = 0;
for ($i = 0; $i < strlen($expression); ++$i) {
$char = $expression[$i];
switch ($char) {
case '?':
$res .= ' ? (';
$depth++;
break;
case ':':
$res .= ') : (';
break;
case ';':
$res .= str_repeat(')', $depth) . ';';
$depth= 0;
break;
default:
$res .= $char;
}
}
return rtrim($res, ';');
}
public static function locales_provider() {
if ( ! class_exists( 'GP_Locales' ) ) {
$filename = download_url( 'https://raw.githubusercontent.com/GlotPress/GlotPress-WP/develop/locales/locales.php' );
if ( is_wp_error( $filename ) ) {
self::markTestSkipped( 'Unable to retrieve GP_Locales file' );
}
require_once $filename;
}
$locales = GP_Locales::locales();
$plural_expressions = array();
foreach ( $locales as $slug => $locale ) {
$plural_expression = $locale->plural_expression;
if ( $plural_expression !== 'n != 1' ) {
$plural_expressions[] = array( $slug, $locale->nplurals, $plural_expression );
}
}
return $plural_expressions;
}
/**
* @dataProvider locales_provider
* @group external-http
*/
public function test_regression( $lang, $nplurals, $expression ) {
$parenthesized = self::parenthesize_plural_expression( $expression );
$old_style = self::make_plural_form_function( $nplurals, $parenthesized );
$pluralForms = new Plural_Forms( $expression );
$generated_old = array();
$generated_new = array();
foreach ( range( 0, 200 ) as $i ) {
$generated_old[] = $old_style( $i );
$generated_new[] = $pluralForms->get( $i );
}
$this->assertSame( $generated_old, $generated_new );
}
public static function simple_provider() {
return array(
array(
// Simple equivalence.
'n != 1',
array(
-1 => 1,
0 => 1,
1 => 0,
2 => 1,
5 => 1,
10 => 1,
),
),
array(
// Ternary
'n ? 1 : 2',
array(
-1 => 1,
0 => 2,
1 => 1,
2 => 1,
),
),
array(
// Comparison
'n > 1 ? 1 : 2',
array(
-2 => 2,
-1 => 2,
0 => 2,
1 => 2,
2 => 1,
3 => 1,
),
),
array(
'n > 1 ? n > 2 ? 1 : 2 : 3',
array(
-2 => 3,
-1 => 3,
0 => 3,
1 => 3,
2 => 2,
3 => 1,
4 => 1,
),
),
);
}
/**
* @dataProvider simple_provider
*/
public function test_simple( $expression, $expected ) {
$pluralForms = new Plural_Forms( $expression );
$actual = array();
foreach ( array_keys( $expected ) as $num ) {
$actual[ $num ] = $pluralForms->get( $num );
}
$this->assertSame( $expected, $actual );
}
/**
* @expectedException Exception
* @expectedExceptionMessage Unknown symbol "#"
*/
public function test_invalid_operator() {
$pluralForms = new Plural_Forms( 'n # 2' );
}
/**
* @expectedException Exception
* @expectedExceptionMessage Unknown operator "&"
*/
public function test_partial_operator() {
$pluralForms = new Plural_Forms( 'n & 1' );
}
/**
* @expectedException Exception
* @expectedExceptionMessage Mismatched parentheses
*/
public function test_mismatched_open_paren() {
$pluralForms = new Plural_Forms( '((n)' );
}
/**
* @expectedException Exception
* @expectedExceptionMessage Mismatched parentheses
*/
public function test_mismatched_close_paren() {
$pluralForms = new Plural_Forms( '(n))' );
}
/**
* @expectedException Exception
* @expectedExceptionMessage Missing starting "?" ternary operator
*/
public function test_missing_ternary_operator() {
$pluralForms = new Plural_Forms( 'n : 2' );
}
/**
* @expectedException Exception
* @expectedExceptionMessage Unknown operator "?"
*/
public function test_missing_ternary_else() {
$pluralForms = new Plural_Forms( 'n ? 1' );
$pluralForms->get( 1 );
}
/**
* @expectedException Exception
* @expectedExceptionMessage Too many values remaining on the stack
*/
public function test_overflow_stack() {
$pluralForms = new Plural_Forms( 'n n' );
$pluralForms->get( 1 );
}
public function test_cache() {
$mock = $this->getMockBuilder( 'Plural_Forms' )
->setMethods(array('execute'))
->setConstructorArgs(array('n != 1'))
->getMock();
$mock->expects($this->once())
->method('execute')
->with($this->identicalTo(2))
->willReturn(1);
$first = $mock->get( 2 );
$second = $mock->get( 2 );
$this->assertEquals( $first, $second );
}
}

View File

@ -1,5 +1,8 @@
<?php
/**
* @group pomo
*/
class Tests_POMO_PO extends WP_UnitTestCase {
function setUp() {
require_once ABSPATH . '/wp-includes/pomo/po.php';

View File

@ -1,5 +1,8 @@
<?php
/**
* @group pomo
*/
class Tests_POMO_TranslationEntry extends WP_UnitTestCase {
function test_create_entry() {

View File

@ -1,4 +1,8 @@
<?php
/**
* @group pomo
*/
class Tests_POMO_Translations extends WP_UnitTestCase {
function test_add_entry() {