From 11bac0085477e4f88e3f742ccc0d57aaa0853d98 Mon Sep 17 00:00:00 2001 From: Andrew Ozz Date: Mon, 21 Jul 2008 03:21:09 +0000 Subject: [PATCH] kses - properly escape non-Unicode entities. Fixes #6583. Props schiller. git-svn-id: https://develop.svn.wordpress.org/trunk@8386 602fd350-edb4-49c9-b593-d223f7449a82 --- wp-includes/kses.php | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/wp-includes/kses.php b/wp-includes/kses.php index c7538b983b..6092424424 100644 --- a/wp-includes/kses.php +++ b/wp-includes/kses.php @@ -273,12 +273,12 @@ if (!CUSTOM_TAGS) { 'u' => array(), 'ul' => array ( 'class' => array (), - 'style' => array (), + 'style' => array (), 'type' => array ()), 'ol' => array ( 'class' => array (), 'start' => array (), - 'style' => array (), + 'style' => array (), 'type' => array ()), 'var' => array ()); /** @@ -911,7 +911,7 @@ function wp_kses_normalize_entities($string) { $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string); $string = preg_replace_callback('/&#0*([0-9]{1,5});/', create_function('$matches', 'return wp_kses_normalize_entities2($matches[1]);'), $string); - $string = preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', '&#\\1\\2;', $string); + $string = preg_replace_callback('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', create_function('$matches', 'return wp_kses_normalize_entities3($matches[2]);'), $string); return $string; } @@ -928,7 +928,33 @@ function wp_kses_normalize_entities($string) { * @return string Correctly encoded entity */ function wp_kses_normalize_entities2($i) { - return (($i > 65535) ? "&#$i;" : "&#$i;"); + return ( (!valid_unicode($i)) || ($i > 65535) ? "&#$i;" : "&#$i;"); +} + +/** + * wp_kses_normalize_entities3() - Callback for wp_kses_normalize_entities() for regular expression + * + * This function helps wp_kses_normalize_entities() to only accept valid Unicode numeric entities + * in hex form. + * + * @param string $h Hex string of encoded entity + * @return string Correctly encoded entity + */ +function wp_kses_normalize_entities3($hexchars) { + return ( (!valid_unicode(hexdec($hexchars))) ? "&#x$hexchars;" : "&#x$hexchars;"); +} + +/** + * valid_unicode() - Helper function to determine if a Unicode value is valid. + * + * @param int $i Unicode value + * @return bool true if the value was a valid Unicode number + */ +function valid_unicode($i) { + return ( $i == 0x9 || $i == 0xa || $i == 0xd || + ($i >= 0x20 && $i <= 0xd7ff) || + ($i >= 0xe000 && $i <= 0xfffd) || + ($i >= 0x10000 && $i <= 0x10ffff) ); } /**