From 43b5e9ac204b12b9a2955b3f9f18d21e3f8439a8 Mon Sep 17 00:00:00 2001 From: Andrew Ozz Date: Thu, 12 Apr 2018 21:19:24 +0000 Subject: [PATCH] Privacy: add helper function for anonymizing data in a standardized way. Props jesperher, allendav, iandunn, birgire, azaozz. Fixes #43545. git-svn-id: https://develop.svn.wordpress.org/trunk@42971 602fd350-edb4-49c9-b593-d223f7449a82 --- .../includes/class-wp-community-events.php | 51 +--- src/wp-includes/functions.php | 118 +++++++++ .../tests/admin/includesCommunityEvents.php | 148 ++--------- .../phpunit/tests/functions/anonymization.php | 247 ++++++++++++++++++ 4 files changed, 389 insertions(+), 175 deletions(-) create mode 100644 tests/phpunit/tests/functions/anonymization.php diff --git a/src/wp-admin/includes/class-wp-community-events.php b/src/wp-admin/includes/class-wp-community-events.php index fe16dc46e4..77bf34d813 100644 --- a/src/wp-admin/includes/class-wp-community-events.php +++ b/src/wp-admin/includes/class-wp-community-events.php @@ -234,7 +234,6 @@ class WP_Community_Events { */ public static function get_unsafe_client_ip() { $client_ip = false; - $ip_prefix = ''; // In order of preference, with the best ones for this purpose first. $address_headers = array( @@ -265,57 +264,13 @@ class WP_Community_Events { return false; } - // Detect what kind of IP address this is. - $is_ipv6 = substr_count( $client_ip, ':' ) > 1; - $is_ipv4 = ( 3 === substr_count( $client_ip, '.' ) ); + $anon_ip = wp_privacy_anonymize_ip( $client_ip, true ); - if ( $is_ipv6 && $is_ipv4 ) { - // IPv6 compatibility mode, temporarily strip the IPv6 part, and treat it like IPv4. - $ip_prefix = '::ffff:'; - $client_ip = preg_replace( '/^\[?[0-9a-f:]*:/i', '', $client_ip ); - $client_ip = str_replace( ']', '', $client_ip ); - $is_ipv6 = false; - } - - if ( $is_ipv6 ) { - // IPv6 addresses will always be enclosed in [] if there's a port. - $left_bracket = strpos( $client_ip, '[' ); - $right_bracket = strpos( $client_ip, ']' ); - $percent = strpos( $client_ip, '%' ); - $netmask = 'ffff:ffff:ffff:ffff:0000:0000:0000:0000'; - - // Strip the port (and [] from IPv6 addresses), if they exist. - if ( false !== $left_bracket && false !== $right_bracket ) { - $client_ip = substr( $client_ip, $left_bracket + 1, $right_bracket - $left_bracket - 1 ); - } elseif ( false !== $left_bracket || false !== $right_bracket ) { - // The IP has one bracket, but not both, so it's malformed. - return false; - } - - // Strip the reachability scope. - if ( false !== $percent ) { - $client_ip = substr( $client_ip, 0, $percent ); - } - - // No invalid characters should be left. - if ( preg_match( '/[^0-9a-f:]/i', $client_ip ) ) { - return false; - } - - // Partially anonymize the IP by reducing it to the corresponding network ID. - if ( function_exists( 'inet_pton' ) && function_exists( 'inet_ntop' ) ) { - $client_ip = inet_ntop( inet_pton( $client_ip ) & inet_pton( $netmask ) ); - } - } elseif ( $is_ipv4 ) { - // Strip any port and partially anonymize the IP. - $last_octet_position = strrpos( $client_ip, '.' ); - $client_ip = substr( $client_ip, 0, $last_octet_position ) . '.0'; - } else { + if ( '0.0.0.0' === $anon_ip || '::' === $anon_ip ) { return false; } - // Restore the IPv6 prefix to compatibility mode addresses. - return $ip_prefix . $client_ip; + return $anon_ip; } /** diff --git a/src/wp-includes/functions.php b/src/wp-includes/functions.php index 3f11cce7e0..d53939caeb 100644 --- a/src/wp-includes/functions.php +++ b/src/wp-includes/functions.php @@ -6127,3 +6127,121 @@ All at ###SITENAME### ), $email_change_email['message'], $email_change_email['headers'] ); } + +/** + * Return an anonymized IPv4 or IPv6 address. + * + * @since 5.0.0 Abstracted from `WP_Community_Events::get_unsafe_client_ip()`. + * + * @param string $ip_addr The IPv4 or IPv6 address to be anonymized. + * @param bool $ipv6_fallback Optional. Whether to return the original IPv6 address if the needed functions + * to anonymize it are not present. Default false, return `::` (unspecified address). + * @return string The anonymized IP address. + */ +function wp_privacy_anonymize_ip( $ip_addr, $ipv6_fallback = false ) { + // Detect what kind of IP address this is. + $ip_prefix = ''; + $is_ipv6 = substr_count( $ip_addr, ':' ) > 1; + $is_ipv4 = ( 3 === substr_count( $ip_addr, '.' ) ); + + if ( $is_ipv6 && $is_ipv4 ) { + // IPv6 compatibility mode, temporarily strip the IPv6 part, and treat it like IPv4. + $ip_prefix = '::ffff:'; + $ip_addr = preg_replace( '/^\[?[0-9a-f:]*:/i', '', $ip_addr ); + $ip_addr = str_replace( ']', '', $ip_addr ); + $is_ipv6 = false; + } + + if ( $is_ipv6 ) { + // IPv6 addresses will always be enclosed in [] if there's a port. + $left_bracket = strpos( $ip_addr, '[' ); + $right_bracket = strpos( $ip_addr, ']' ); + $percent = strpos( $ip_addr, '%' ); + $netmask = 'ffff:ffff:ffff:ffff:0000:0000:0000:0000'; + + // Strip the port (and [] from IPv6 addresses), if they exist. + if ( false !== $left_bracket && false !== $right_bracket ) { + $ip_addr = substr( $ip_addr, $left_bracket + 1, $right_bracket - $left_bracket - 1 ); + } elseif ( false !== $left_bracket || false !== $right_bracket ) { + // The IP has one bracket, but not both, so it's malformed. + return '::'; + } + + // Strip the reachability scope. + if ( false !== $percent ) { + $ip_addr = substr( $ip_addr, 0, $percent ); + } + + // No invalid characters should be left. + if ( preg_match( '/[^0-9a-f:]/i', $ip_addr ) ) { + return '::'; + } + + // Partially anonymize the IP by reducing it to the corresponding network ID. + if ( function_exists( 'inet_pton' ) && function_exists( 'inet_ntop' ) ) { + $ip_addr = inet_ntop( inet_pton( $ip_addr ) & inet_pton( $netmask ) ); + if ( false === $ip_addr) { + return '::'; + } + } elseif ( ! $ipv6_fallback ) { + return '::'; + } + } elseif ( $is_ipv4 ) { + // Strip any port and partially anonymize the IP. + $last_octet_position = strrpos( $ip_addr, '.' ); + $ip_addr = substr( $ip_addr, 0, $last_octet_position ) . '.0'; + } else { + return '0.0.0.0'; + } + + // Restore the IPv6 prefix to compatibility mode addresses. + return $ip_prefix . $ip_addr; +} + +/** + * Return uniform "anonymous" data by type. + * + * @since 5.0.0 + * + * @param string $type The type of data to be anonymized. + * @param string $data Optional The data to be anonymized. + * @return string The anonymous data for the requested type. + */ +function wp_privacy_anonymize_data( $type, $data = '' ) { + + switch ( $type ) { + case 'email': + $anonymous = 'deleted@site.invalid'; + break; + case 'url': + $anonymous = 'https://site.invalid'; + break; + case 'ip': + $anonymous = wp_privacy_anonymize_ip( $data ); + break; + case 'date': + $anonymous = '0000-00-00 00:00:00'; + break; + case 'text': + /* translators: deleted text */ + $anonymous = __( '[deleted]' ); + break; + case 'longtext': + /* translators: deleted long text */ + $anonymous = __( 'This content was deleted by the author.' ); + break; + default: + $anonymous = ''; + } + + /** + * Filters the anonymous data for each type. + * + * @since 5.0.0 + * + * @param string $anonymous Anonymized data. + * @param string $type Type of the data. + * @param string $data Original data. + */ + return apply_filters( 'wp_privacy_anonymize_data', $anonymous, $type, $data ); +} diff --git a/tests/phpunit/tests/admin/includesCommunityEvents.php b/tests/phpunit/tests/admin/includesCommunityEvents.php index bd6a6cdbe6..0de7f0ab06 100644 --- a/tests/phpunit/tests/admin/includesCommunityEvents.php +++ b/tests/phpunit/tests/admin/includesCommunityEvents.php @@ -479,154 +479,48 @@ class Test_WP_Community_Events extends WP_UnitTestCase { /** * Test that get_unsafe_client_ip() properly anonymizes all possible address formats * - * @dataProvider data_get_unsafe_client_ip_anonymization + * @dataProvider data_get_unsafe_client_ip * * @ticket 41083 */ - public function test_get_unsafe_client_ip_anonymization( $raw_ip, $expected_result ) { - $_SERVER['REMOTE_ADDR'] = $raw_ip; - $actual_result = WP_Community_Events::get_unsafe_client_ip(); + public function test_get_unsafe_client_ip( $raw_ip, $expected_result ) { + $_SERVER['REMOTE_ADDR'] = 'this should not be used'; + $_SERVER['HTTP_CLIENT_IP'] = $raw_ip; + $actual_result = WP_Community_Events::get_unsafe_client_ip(); $this->assertEquals( $expected_result, $actual_result ); } - public function data_get_unsafe_client_ip_anonymization() { + /** + * Provide test cases for `test_get_unsafe_client_ip()`. + * + * @return array + */ + public function data_get_unsafe_client_ip() { return array( - // Invalid IP. - array( - '', // Raw IP address - false, // Expected result - ), - // Invalid IP. Sometimes proxies add things like this, or other arbitrary strings. - array( - 'unknown', - false, - ), - // Invalid IP. Sometimes proxies add things like this, or other arbitrary strings. + // Handle '::' returned from `wp_privacy_anonymize_ip()`. array( 'or=\"[1000:0000:0000:0000:0000:0000:0000:0001', false, ), - // Invalid IP. Sometimes proxies add things like this, or other arbitrary strings. + + // Handle '0.0.0.0' returned from `wp_privacy_anonymize_ip()`. array( - 'or=\"1000:0000:0000:0000:0000:0000:0000:0001', + 'unknown', false, ), - // Invalid IP. Sometimes proxies add things like this, or other arbitrary strings. + + // Valid IPv4. array( - '1000:0000:0000:0000:0000:0000:0000:0001or=\"', - false, + '198.143.164.252', + '198.143.164.0', ), - // Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings. - array( - 'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]:400', - '1000::', - ), - // Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings. - array( - 'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]', - '1000::', - ), - // Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings. - array( - 'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]400', - '1000::', - ), - // Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings. - array( - '[1000:0000:0000:0000:0000:0000:0000:0001]:235\"or=', - '1000::', - ), - // IPv4, no port - array( - '10.20.30.45', - '10.20.30.0', - ), - // IPv4, port - array( - '10.20.30.45:20000', - '10.20.30.0', - ), - // IPv6, no port + + // Valid IPv6. array( '2a03:2880:2110:df07:face:b00c::1', '2a03:2880:2110:df07::', ), - // IPv6, port - array( - '[2a03:2880:2110:df07:face:b00c::1]:20000', - '2a03:2880:2110:df07::', - ), - // IPv6, no port, reducible representation - array( - '0000:0000:0000:0000:0000:0000:0000:0001', - '::', - ), - // IPv6, no port, partially reducible representation - array( - '1000:0000:0000:0000:0000:0000:0000:0001', - '1000::', - ), - // IPv6, port, reducible representation - array( - '[0000:0000:0000:0000:0000:0000:0000:0001]:1234', - '::', - ), - // IPv6, port, partially reducible representation - array( - '[1000:0000:0000:0000:0000:0000:0000:0001]:5678', - '1000::', - ), - // IPv6, no port, reduced representation - array( - '::', - '::', - ), - // IPv6, no port, reduced representation - array( - '::1', - '::', - ), - // IPv6, port, reduced representation - array( - '[::]:20000', - '::', - ), - // IPv6, address brackets without port delimiter and number, reduced representation - array( - '[::1]', - '::', - ), - // IPv6, no port, compatibility mode - array( - '::ffff:10.15.20.25', - '::ffff:10.15.20.0', - ), - // IPv6, port, compatibility mode - array( - '[::FFFF:10.15.20.25]:30000', - '::ffff:10.15.20.0', - ), - // IPv6, no port, compatibility mode shorthand - array( - '::127.0.0.1', - '::ffff:127.0.0.0', - ), - // IPv6, port, compatibility mode shorthand - array( - '[::127.0.0.1]:30000', - '::ffff:127.0.0.0', - ), - // IPv6 with reachability scope - array( - 'fe80::b059:65f4:e877:c40%16', - 'fe80::', - ), - // IPv6 with reachability scope - array( - 'FE80::B059:65F4:E877:C40%eth0', - 'fe80::', - ), ); } } diff --git a/tests/phpunit/tests/functions/anonymization.php b/tests/phpunit/tests/functions/anonymization.php new file mode 100644 index 0000000000..a5c9c8af15 --- /dev/null +++ b/tests/phpunit/tests/functions/anonymization.php @@ -0,0 +1,247 @@ +markTestSkipped( 'This test requires both the inet_ntop() and inet_pton() functions.' ); + } + + $actual_result = wp_privacy_anonymize_data( 'ip', $raw_ip ); + + /* Todo test ipv6_fallback mode if keeping it.*/ + + $this->assertEquals( $expected_result, $actual_result ); + } + + /** + * Provide test cases for `test_wp_privacy_anonymize_ip()`. + * + * @since 5.0.0 Moved from `Test_WP_Community_Events::data_get_unsafe_client_ip_anonymization()`. + * + * @return array { + * @type array { + * @string string $raw_ip Raw IP address. + * @string string $expected_result Expected result. + * } + * } + */ + public function data_wp_privacy_anonymize_ip() { + return array( + // Invalid IP. + array( + null, + '0.0.0.0', + ), + array( + '', + '0.0.0.0', + ), + array( + '0.0.0.0.0', + '0.0.0.0', + ), + array( + '0000:0000:0000:0000:0000:0000:0127:2258', + '::', + ), + // Invalid IP. Sometimes proxies add things like this, or other arbitrary strings. + array( + 'unknown', + '0.0.0.0', + ), + // Invalid IP. Sometimes proxies add things like this, or other arbitrary strings. + array( + 'or=\"[1000:0000:0000:0000:0000:0000:0000:0001', + '::', + ), + // Invalid IP. Sometimes proxies add things like this, or other arbitrary strings. + array( + 'or=\"1000:0000:0000:0000:0000:0000:0000:0001', + '::', + ), + // Invalid IP. Sometimes proxies add things like this, or other arbitrary strings. + array( + '1000:0000:0000:0000:0000:0000:0000:0001or=\"', + '::', + ), + // Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings. + array( + 'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]:400', + '1000::', + ), + // Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings. + array( + 'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]', + '1000::', + ), + // Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings. + array( + 'or=\"[1000:0000:0000:0000:0000:0000:0000:0001]400', + '1000::', + ), + // Malformed string with valid IP substring. Sometimes proxies add things like this, or other arbitrary strings. + array( + '[1000:0000:0000:0000:0000:0000:0000:0001]:235\"or=', + '1000::', + ), + // IPv4, no port. + array( + '10.20.30.45', + '10.20.30.0', + ), + // IPv4, port. + array( + '10.20.30.45:20000', + '10.20.30.0', + ), + // IPv4, netmask. + array( + '10.20.30.45/24', + '10.20.30.0', + ), + // IPv6, no port. + array( + '2a03:2880:2110:df07:face:b00c::1', + '2a03:2880:2110:df07::', + ), + // IPv6, port. + array( + '[2a03:2880:2110:df07:face:b00c::1]:20000', + '2a03:2880:2110:df07::', + ), + // IPv6, no port, reducible representation. + array( + '0000:0000:0000:0000:0000:0000:0000:0001', + '::', + ), + // IPv6, no port, partially reducible representation. + array( + '1000:0000:0000:0000:0000:0000:0000:0001', + '1000::', + ), + // IPv6, port, reducible representation. + array( + '[0000:0000:0000:0000:0000:0000:0000:0001]:1234', + '::', + ), + // IPv6, port, partially reducible representation. + array( + '[1000:0000:0000:0000:0000:0000:0000:0001]:5678', + '1000::', + ), + // IPv6, no port, reduced representation. + array( + '::', + '::', + ), + // IPv6, no port, reduced representation. + array( + '::1', + '::', + ), + // IPv6, port, reduced representation. + array( + '[::]:20000', + '::', + ), + // IPv6, address brackets without port delimiter and number, reduced representation. + array( + '[::1]', + '::', + ), + // IPv6, no port, compatibility mode. + array( + '::ffff:10.15.20.25', + '::ffff:10.15.20.0', + ), + // IPv6, port, compatibility mode. + array( + '[::FFFF:10.15.20.25]:30000', + '::ffff:10.15.20.0', + ), + // IPv6, no port, compatibility mode shorthand. + array( + '::127.0.0.1', + '::ffff:127.0.0.0', + ), + // IPv6, port, compatibility mode shorthand. + array( + '[::127.0.0.1]:30000', + '::ffff:127.0.0.0', + ), + // IPv6 with reachability scope. + array( + 'fe80::b059:65f4:e877:c40%16', + 'fe80::', + ), + // IPv6 with reachability scope. + array( + 'FE80::B059:65F4:E877:C40%eth0', + 'fe80::', + ), + ); + } + + /** + * Test email anonymization of `wp_privacy_anonymize_data()`. + */ + public function test_anonymize_email() { + $this->assertEquals( 'deleted@site.invalid', wp_privacy_anonymize_data( 'email', 'bar@example.com' ) ); + } + + /** + * Test url anonymization of `wp_privacy_anonymize_data()`. + */ + public function test_anonymize_url() { + $this->assertEquals( 'https://site.invalid', wp_privacy_anonymize_data( 'url', 'https://example.com/author/username' ) ); + } + + /** + * Test date anonymization of `wp_privacy_anonymize_data()`. + */ + public function test_anonymize_date() { + $this->assertEquals( '0000-00-00 00:00:00', wp_privacy_anonymize_data( 'date', '2003-12-25 12:34:56' ) ); + } + + /** + * Test text anonymization of `wp_privacy_anonymize_data()`. + */ + public function test_anonymize_text() { + $text = __( 'Four score and seven years ago' ); + $this->assertEquals( '[deleted]', wp_privacy_anonymize_data( 'text', $text ) ); + } + + /** + * Test long text anonymization of `wp_privacy_anonymize_data()`. + */ + public function test_anonymize_long_text() { + $text = __( 'Four score and seven years ago' ); + $this->assertEquals( 'This content was deleted by the author.', wp_privacy_anonymize_data( 'longtext', $text ) ); + } +}