2014-06-03 15:28:12 -04:00
|
|
|
# Test a UTF-8 encoded literal
|
|
|
|
s = "asdf©qwer"
|
|
|
|
for i in range(len(s)):
|
2021-04-20 01:22:44 -04:00
|
|
|
print("s[%d]: %s %X" % (i, s[i], ord(s[i])))
|
2014-06-03 15:28:12 -04:00
|
|
|
|
|
|
|
# Test all three forms of Unicode escape, and
|
|
|
|
# all blocks of UTF-8 byte patterns
|
|
|
|
s = "a\xA9\xFF\u0123\u0800\uFFEE\U0001F44C"
|
|
|
|
for i in range(-len(s), len(s)):
|
2021-04-20 01:22:44 -04:00
|
|
|
print("s[%d]: %s %X" % (i, s[i], ord(s[i])))
|
|
|
|
print("s[:%d]: %d chars, '%s'" % (i, len(s[:i]), s[:i]))
|
2014-06-03 15:28:12 -04:00
|
|
|
for j in range(i, len(s)):
|
2021-04-20 01:22:44 -04:00
|
|
|
print("s[%d:%d]: %d chars, '%s'" % (i, j, len(s[i:j]), s[i:j]))
|
|
|
|
print("s[%d:]: %d chars, '%s'" % (i, len(s[i:]), s[i:]))
|
2014-06-03 15:28:12 -04:00
|
|
|
|
|
|
|
# Test UTF-8 encode and decode
|
|
|
|
enc = s.encode()
|
|
|
|
print(enc, enc.decode() == s)
|
2015-04-04 17:05:30 -04:00
|
|
|
|
|
|
|
# printing of unicode chars using repr
|
2016-10-16 20:43:47 -04:00
|
|
|
# NOTE: for some characters (eg \u10ff) we differ to CPython
|
2021-08-17 21:18:46 -04:00
|
|
|
print(repr("a\u2000"))
|
2015-09-07 12:19:17 -04:00
|
|
|
|
|
|
|
# test invalid escape code
|
|
|
|
try:
|
|
|
|
eval('"\\U00110000"')
|
|
|
|
except SyntaxError:
|
2021-04-20 01:22:44 -04:00
|
|
|
print("SyntaxError")
|
2015-09-07 16:36:24 -04:00
|
|
|
|
|
|
|
# test unicode string given to int
|
|
|
|
try:
|
2021-04-20 01:22:44 -04:00
|
|
|
int("\u0200")
|
2015-09-07 16:36:24 -04:00
|
|
|
except ValueError:
|
2021-04-20 01:22:44 -04:00
|
|
|
print("ValueError")
|
2017-06-23 20:38:32 -04:00
|
|
|
|
|
|
|
# test invalid UTF-8 string
|
|
|
|
try:
|
2021-04-20 01:22:44 -04:00
|
|
|
str(b"ab\xa1", "utf8")
|
2017-06-23 20:38:32 -04:00
|
|
|
except UnicodeError:
|
2021-04-20 01:22:44 -04:00
|
|
|
print("UnicodeError")
|
2017-06-23 20:38:32 -04:00
|
|
|
try:
|
2021-04-20 01:22:44 -04:00
|
|
|
str(b"ab\xf8", "utf8")
|
2017-06-23 20:38:32 -04:00
|
|
|
except UnicodeError:
|
2021-04-20 01:22:44 -04:00
|
|
|
print("UnicodeError")
|
2017-06-23 20:38:32 -04:00
|
|
|
try:
|
2021-04-20 01:22:44 -04:00
|
|
|
str(bytearray(b"ab\xc0a"), "utf8")
|
2017-06-23 20:38:32 -04:00
|
|
|
except UnicodeError:
|
2021-04-20 01:22:44 -04:00
|
|
|
print("UnicodeError")
|
2018-11-26 00:13:08 -05:00
|
|
|
try:
|
2021-04-20 01:22:44 -04:00
|
|
|
str(b"\xf0\xe0\xed\xe8", "utf8")
|
2018-11-26 00:13:08 -05:00
|
|
|
except UnicodeError:
|
2021-04-20 01:22:44 -04:00
|
|
|
print("UnicodeError")
|