Privacy: add helper function for anonymizing data in a standardized way.

Props jesperher, allendav, iandunn, birgire, azaozz.
Fixes #43545.

git-svn-id: https://develop.svn.wordpress.org/trunk@42971 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Andrew Ozz 2018-04-12 21:19:24 +00:00
parent 1d0963afbd
commit 43b5e9ac20
4 changed files with 389 additions and 175 deletions

View File

@ -234,7 +234,6 @@ class WP_Community_Events {
*/
public static function get_unsafe_client_ip() {
$client_ip = false;
$ip_prefix = '';
// In order of preference, with the best ones for this purpose first.
$address_headers = array(
@ -265,57 +264,13 @@ class WP_Community_Events {
return false;
}
// Detect what kind of IP address this is.
$is_ipv6 = substr_count( $client_ip, ':' ) > 1;
$is_ipv4 = ( 3 === substr_count( $client_ip, '.' ) );
$anon_ip = wp_privacy_anonymize_ip( $client_ip, true );
if ( $is_ipv6 && $is_ipv4 ) {
// IPv6 compatibility mode, temporarily strip the IPv6 part, and treat it like IPv4.
$ip_prefix = '::ffff:';
$client_ip = preg_replace( '/^\[?[0-9a-f:]*:/i', '', $client_ip );
$client_ip = str_replace( ']', '', $client_ip );
$is_ipv6 = false;
}
if ( $is_ipv6 ) {
// IPv6 addresses will always be enclosed in [] if there's a port.
$left_bracket = strpos( $client_ip, '[' );
$right_bracket = strpos( $client_ip, ']' );
$percent = strpos( $client_ip, '%' );
$netmask = 'ffff:ffff:ffff:ffff:0000:0000:0000:0000';
// Strip the port (and [] from IPv6 addresses), if they exist.
if ( false !== $left_bracket && false !== $right_bracket ) {
$client_ip = substr( $client_ip, $left_bracket + 1, $right_bracket - $left_bracket - 1 );
} elseif ( false !== $left_bracket || false !== $right_bracket ) {
// The IP has one bracket, but not both, so it's malformed.
return false;
}
// Strip the reachability scope.
if ( false !== $percent ) {
$client_ip = substr( $client_ip, 0, $percent );
}
// No invalid characters should be left.
if ( preg_match( '/[^0-9a-f:]/i', $client_ip ) ) {
return false;
}
// Partially anonymize the IP by reducing it to the corresponding network ID.
if ( function_exists( 'inet_pton' ) && function_exists( 'inet_ntop' ) ) {
$client_ip = inet_ntop( inet_pton( $client_ip ) & inet_pton( $netmask ) );
}
} elseif ( $is_ipv4 ) {
// Strip any port and partially anonymize the IP.
$last_octet_position = strrpos( $client_ip, '.' );
$client_ip = substr( $client_ip, 0, $last_octet_position ) . '.0';
} else {
if ( '0.0.0.0' === $anon_ip || '::' === $anon_ip ) {
return false;
}
// Restore the IPv6 prefix to compatibility mode addresses.
return $ip_prefix . $client_ip;
return $anon_ip;
}
/**

View File

@ -6127,3 +6127,121 @@ All at ###SITENAME###
), $email_change_email['message'], $email_change_email['headers']
);
}
/**
* Return an anonymized IPv4 or IPv6 address.
*
* @since 5.0.0 Abstracted from `WP_Community_Events::get_unsafe_client_ip()`.
*
* @param string $ip_addr The IPv4 or IPv6 address to be anonymized.
* @param bool $ipv6_fallback Optional. Whether to return the original IPv6 address if the needed functions
* to anonymize it are not present. Default false, return `::` (unspecified address).
* @return string The anonymized IP address.
*/
function wp_privacy_anonymize_ip( $ip_addr, $ipv6_fallback = false ) {
// Detect what kind of IP address this is.
$ip_prefix = '';
$is_ipv6 = substr_count( $ip_addr, ':' ) > 1;
$is_ipv4 = ( 3 === substr_count( $ip_addr, '.' ) );
if ( $is_ipv6 && $is_ipv4 ) {
// IPv6 compatibility mode, temporarily strip the IPv6 part, and treat it like IPv4.
$ip_prefix = '::ffff:';
$ip_addr = preg_replace( '/^\[?[0-9a-f:]*:/i', '', $ip_addr );
$ip_addr = str_replace( ']', '', $ip_addr );
$is_ipv6 = false;
}
if ( $is_ipv6 ) {
// IPv6 addresses will always be enclosed in [] if there's a port.
$left_bracket = strpos( $ip_addr, '[' );
$right_bracket = strpos( $ip_addr, ']' );
$percent = strpos( $ip_addr, '%' );
$netmask = 'ffff:ffff:ffff:ffff:0000:0000:0000:0000';
// Strip the port (and [] from IPv6 addresses), if they exist.
if ( false !== $left_bracket && false !== $right_bracket ) {
$ip_addr = substr( $ip_addr, $left_bracket + 1, $right_bracket - $left_bracket - 1 );
} elseif ( false !== $left_bracket || false !== $right_bracket ) {
// The IP has one bracket, but not both, so it's malformed.
return '::';
}
// Strip the reachability scope.
if ( false !== $percent ) {
$ip_addr = substr( $ip_addr, 0, $percent );
}
// No invalid characters should be left.
if ( preg_match( '/[^0-9a-f:]/i', $ip_addr ) ) {
return '::';
}
// Partially anonymize the IP by reducing it to the corresponding network ID.
if ( function_exists( 'inet_pton' ) && function_exists( 'inet_ntop' ) ) {
$ip_addr = inet_ntop( inet_pton( $ip_addr ) & inet_pton( $netmask ) );
if ( false === $ip_addr) {
return '::';
}
} elseif ( ! $ipv6_fallback ) {
return '::';
}
} elseif ( $is_ipv4 ) {
// Strip any port and partially anonymize the IP.
$last_octet_position = strrpos( $ip_addr, '.' );
$ip_addr = substr( $ip_addr, 0, $last_octet_position ) . '.0';
} else {
return '0.0.0.0';
}
// Restore the IPv6 prefix to compatibility mode addresses.
return $ip_prefix . $ip_addr;
}
/**
* Return uniform "anonymous" data by type.
*
* @since 5.0.0
*
* @param string $type The type of data to be anonymized.
* @param string $data Optional The data to be anonymized.
* @return string The anonymous data for the requested type.
*/
function wp_privacy_anonymize_data( $type, $data = '' ) {
switch ( $type ) {
case 'email':
$anonymous = 'deleted@site.invalid';
break;
case 'url':
$anonymous = 'https://site.invalid';
break;
case 'ip':
$anonymous = wp_privacy_anonymize_ip( $data );
break;
case 'date':
$anonymous = '0000-00-00 00:00:00';
break;
case 'text':
/* translators: deleted text */
$anonymous = __( '[deleted]' );
break;
case 'longtext':
/* translators: deleted long text */
$anonymous = __( 'This content was deleted by the author.' );
break;
default:
$anonymous = '';
}
/**
* Filters the anonymous data for each type.
*
* @since 5.0.0
*
* @param string $anonymous Anonymized data.
* @param string $type Type of the data.
* @param string $data Original data.
*/
return apply_filters( 'wp_privacy_anonymize_data', $anonymous, $type, $data );
}

View File

@ -479,154 +479,48 @@ class Test_WP_Community_Events extends WP_UnitTestCase {
/**
* Test that get_unsafe_client_ip() properly anonymizes all possible address formats
*
* @dataProvider data_get_unsafe_client_ip_anonymization
* @dataProvider data_get_unsafe_client_ip
*
* @ticket 41083
*/
public function test_get_unsafe_client_ip_anonymization( $raw_ip, $expected_result ) {
$_SERVER['REMOTE_ADDR'] = $raw_ip;
$actual_result = WP_Community_Events::get_unsafe_client_ip();
public function test_get_unsafe_client_ip( $raw_ip, $expected_result ) {
$_SERVER['REMOTE_ADDR'] = 'this should not be used';
$_SERVER['HTTP_CLIENT_IP'] = $raw_ip;
$actual_result = WP_Community_Events::get_unsafe_client_ip();
$this->assertEquals( $expected_result, $actual_result );
}
public function data_get_unsafe_client_ip_anonymization() {
/**
* Provide test cases for `test_get_unsafe_client_ip()`.
*
* @return array
*/
public function data_get_unsafe_client_ip() {
return array(
// Invalid IP.
array(
'', // Raw IP address
false, // Expected result
),
// Invalid IP. Sometimes proxies add things like this, or other arbitrary strings.
array(
'unknown',
false,
),
// Invalid IP. Sometimes proxies add things like this, or other arbitrary strings.
// Handle '::' returned from `wp_privacy_anonymize_ip()`.
array(
'or=\"[1000:0000:0000:0000:0000:0000:0000:0001',
false,
),
// Invalid IP. Sometimes proxies add things like this, or other arbitrary strings.
// Handle '0.0.0.0' returned from `wp_privacy_anonymize_ip()`.
array(
'or=\"1000:0000:0000:0000:0000:0000:0000:0001',
'unknown',
false,
),
// Invalid IP. Sometimes proxies add things like this, or other arbitrary strings.
// Valid IPv4.
array(
'1000:0000:0000:0000:0000:0000:0000:0001or=\"',
false,
'198.143.164.252',
'198.143.164.0',
),
// Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings.
array(
'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]:400',
'1000::',
),
// Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings.
array(
'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]',
'1000::',
),
// Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings.
array(
'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]400',
'1000::',
),
// Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings.
array(
'[1000:0000:0000:0000:0000:0000:0000:0001]:235\"or=',
'1000::',
),
// IPv4, no port
array(
'10.20.30.45',
'10.20.30.0',
),
// IPv4, port
array(
'10.20.30.45:20000',
'10.20.30.0',
),
// IPv6, no port
// Valid IPv6.
array(
'2a03:2880:2110:df07:face:b00c::1',
'2a03:2880:2110:df07::',
),
// IPv6, port
array(
'[2a03:2880:2110:df07:face:b00c::1]:20000',
'2a03:2880:2110:df07::',
),
// IPv6, no port, reducible representation
array(
'0000:0000:0000:0000:0000:0000:0000:0001',
'::',
),
// IPv6, no port, partially reducible representation
array(
'1000:0000:0000:0000:0000:0000:0000:0001',
'1000::',
),
// IPv6, port, reducible representation
array(
'[0000:0000:0000:0000:0000:0000:0000:0001]:1234',
'::',
),
// IPv6, port, partially reducible representation
array(
'[1000:0000:0000:0000:0000:0000:0000:0001]:5678',
'1000::',
),
// IPv6, no port, reduced representation
array(
'::',
'::',
),
// IPv6, no port, reduced representation
array(
'::1',
'::',
),
// IPv6, port, reduced representation
array(
'[::]:20000',
'::',
),
// IPv6, address brackets without port delimiter and number, reduced representation
array(
'[::1]',
'::',
),
// IPv6, no port, compatibility mode
array(
'::ffff:10.15.20.25',
'::ffff:10.15.20.0',
),
// IPv6, port, compatibility mode
array(
'[::FFFF:10.15.20.25]:30000',
'::ffff:10.15.20.0',
),
// IPv6, no port, compatibility mode shorthand
array(
'::127.0.0.1',
'::ffff:127.0.0.0',
),
// IPv6, port, compatibility mode shorthand
array(
'[::127.0.0.1]:30000',
'::ffff:127.0.0.0',
),
// IPv6 with reachability scope
array(
'fe80::b059:65f4:e877:c40%16',
'fe80::',
),
// IPv6 with reachability scope
array(
'FE80::B059:65F4:E877:C40%eth0',
'fe80::',
),
);
}
}

View File

@ -0,0 +1,247 @@
<?php
/**
* Test anonymization functions.
*
* @package WordPress
*
* @since 5.0.0
*/
/**
* Class Tests_Functions_Anonymization.
*
* @group functions.php
* @group privacy
*
* @since 5.0.0
*/
class Tests_Functions_Anonymization extends WP_UnitTestCase {
/**
* Test that wp_privacy_anonymize_ip() properly anonymizes all possible IP address formats.
*
* @dataProvider data_wp_privacy_anonymize_ip
*
* @ticket 41083
* @ticket 43545
*
* @param string $raw_ip Raw IP address.
* @param string $expected_result Expected result.
*/
public function test_wp_privacy_anonymize_ip( $raw_ip, $expected_result ) {
if ( ! function_exists( 'inet_ntop' ) || ! function_exists( 'inet_pton' ) ) {
$this->markTestSkipped( 'This test requires both the inet_ntop() and inet_pton() functions.' );
}
$actual_result = wp_privacy_anonymize_data( 'ip', $raw_ip );
/* Todo test ipv6_fallback mode if keeping it.*/
$this->assertEquals( $expected_result, $actual_result );
}
/**
* Provide test cases for `test_wp_privacy_anonymize_ip()`.
*
* @since 5.0.0 Moved from `Test_WP_Community_Events::data_get_unsafe_client_ip_anonymization()`.
*
* @return array {
* @type array {
* @string string $raw_ip Raw IP address.
* @string string $expected_result Expected result.
* }
* }
*/
public function data_wp_privacy_anonymize_ip() {
return array(
// Invalid IP.
array(
null,
'0.0.0.0',
),
array(
'',
'0.0.0.0',
),
array(
'0.0.0.0.0',
'0.0.0.0',
),
array(
'0000:0000:0000:0000:0000:0000:0127:2258',
'::',
),
// Invalid IP. Sometimes proxies add things like this, or other arbitrary strings.
array(
'unknown',
'0.0.0.0',
),
// Invalid IP. Sometimes proxies add things like this, or other arbitrary strings.
array(
'or=\"[1000:0000:0000:0000:0000:0000:0000:0001',
'::',
),
// Invalid IP. Sometimes proxies add things like this, or other arbitrary strings.
array(
'or=\"1000:0000:0000:0000:0000:0000:0000:0001',
'::',
),
// Invalid IP. Sometimes proxies add things like this, or other arbitrary strings.
array(
'1000:0000:0000:0000:0000:0000:0000:0001or=\"',
'::',
),
// Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings.
array(
'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]:400',
'1000::',
),
// Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings.
array(
'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]',
'1000::',
),
// Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings.
array(
'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]400',
'1000::',
),
// Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings.
array(
'[1000:0000:0000:0000:0000:0000:0000:0001]:235\"or=',
'1000::',
),
// IPv4, no port.
array(
'10.20.30.45',
'10.20.30.0',
),
// IPv4, port.
array(
'10.20.30.45:20000',
'10.20.30.0',
),
// IPv4, netmask.
array(
'10.20.30.45/24',
'10.20.30.0',
),
// IPv6, no port.
array(
'2a03:2880:2110:df07:face:b00c::1',
'2a03:2880:2110:df07::',
),
// IPv6, port.
array(
'[2a03:2880:2110:df07:face:b00c::1]:20000',
'2a03:2880:2110:df07::',
),
// IPv6, no port, reducible representation.
array(
'0000:0000:0000:0000:0000:0000:0000:0001',
'::',
),
// IPv6, no port, partially reducible representation.
array(
'1000:0000:0000:0000:0000:0000:0000:0001',
'1000::',
),
// IPv6, port, reducible representation.
array(
'[0000:0000:0000:0000:0000:0000:0000:0001]:1234',
'::',
),
// IPv6, port, partially reducible representation.
array(
'[1000:0000:0000:0000:0000:0000:0000:0001]:5678',
'1000::',
),
// IPv6, no port, reduced representation.
array(
'::',
'::',
),
// IPv6, no port, reduced representation.
array(
'::1',
'::',
),
// IPv6, port, reduced representation.
array(
'[::]:20000',
'::',
),
// IPv6, address brackets without port delimiter and number, reduced representation.
array(
'[::1]',
'::',
),
// IPv6, no port, compatibility mode.
array(
'::ffff:10.15.20.25',
'::ffff:10.15.20.0',
),
// IPv6, port, compatibility mode.
array(
'[::FFFF:10.15.20.25]:30000',
'::ffff:10.15.20.0',
),
// IPv6, no port, compatibility mode shorthand.
array(
'::127.0.0.1',
'::ffff:127.0.0.0',
),
// IPv6, port, compatibility mode shorthand.
array(
'[::127.0.0.1]:30000',
'::ffff:127.0.0.0',
),
// IPv6 with reachability scope.
array(
'fe80::b059:65f4:e877:c40%16',
'fe80::',
),
// IPv6 with reachability scope.
array(
'FE80::B059:65F4:E877:C40%eth0',
'fe80::',
),
);
}
/**
* Test email anonymization of `wp_privacy_anonymize_data()`.
*/
public function test_anonymize_email() {
$this->assertEquals( 'deleted@site.invalid', wp_privacy_anonymize_data( 'email', 'bar@example.com' ) );
}
/**
* Test url anonymization of `wp_privacy_anonymize_data()`.
*/
public function test_anonymize_url() {
$this->assertEquals( 'https://site.invalid', wp_privacy_anonymize_data( 'url', 'https://example.com/author/username' ) );
}
/**
* Test date anonymization of `wp_privacy_anonymize_data()`.
*/
public function test_anonymize_date() {
$this->assertEquals( '0000-00-00 00:00:00', wp_privacy_anonymize_data( 'date', '2003-12-25 12:34:56' ) );
}
/**
* Test text anonymization of `wp_privacy_anonymize_data()`.
*/
public function test_anonymize_text() {
$text = __( 'Four score and seven years ago' );
$this->assertEquals( '[deleted]', wp_privacy_anonymize_data( 'text', $text ) );
}
/**
* Test long text anonymization of `wp_privacy_anonymize_data()`.
*/
public function test_anonymize_long_text() {
$text = __( 'Four score and seven years ago' );
$this->assertEquals( 'This content was deleted by the author.', wp_privacy_anonymize_data( 'longtext', $text ) );
}
}