Wordpress/tests/phpunit/data/formatting/utf-8/urlencode.py
Jonathan Desrosiers d376fedd89 Ensure svn:eol-style is consistently set for all files.
See #42594.

git-svn-id: https://develop.svn.wordpress.org/trunk@46586 602fd350-edb4-49c9-b593-d223f7449a82
2019-10-25 16:36:41 +00:00

34 lines
986 B
Python

# Generates urlencoded.txt from utf-8.txt
#
# urlencoded.txt is used by Tests_Formatting_Utf8UriEncode
import urllib, codecs, re
import sys
# uncapitalize pct-encoded values, leave the rest alone
capfix = re.compile("%([0-9A-Z]{2})");
def fix(match):
octet = match.group(1)
intval = int(octet, 16)
if intval < 128:
return chr(intval).lower()
return '%' + octet.lower()
def urlencode(line):
"""Percent-encode each byte of non-ASCII unicode characters."""
line = urllib.quote(line.strip().encode("utf-8"))
line = capfix.sub(fix, line)
return line
if __name__ == "__main__":
args = sys.argv[1:]
if args and args[0] in ("-h", "--help"):
print "Usage: python urlencode.py < utf-8.txt > urlencoded.txt"
sys.exit(2)
sys.stdin = codecs.getreader("utf-8")(sys.stdin)
sys.stdout = codecs.getwriter("ascii")(sys.stdout)
lines = sys.stdin.readlines()
sys.stdout.write( "\n".join(map(urlencode, lines)) )