forked from PythonCharmers/python-future
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_surrogateescape.py
More file actions
136 lines (118 loc) · 5.63 KB
/
test_surrogateescape.py
File metadata and controls
136 lines (118 loc) · 5.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# -*- coding: utf-8 -*-
"""
Tests for the surrogateescape codec
"""
from __future__ import absolute_import, division, unicode_literals
from future.builtins import (bytes, dict, int, range, round, str, super,
ascii, chr, hex, input, next, oct, open, pow,
filter, map, zip)
from future.utils.surrogateescape import register_surrogateescape
from future.tests.base import unittest, expectedFailurePY26, expectedFailurePY2
class TestSurrogateEscape(unittest.TestCase):
def setUp(self):
register_surrogateescape()
@expectedFailurePY26 # Python 2.6 str.decode() takes no keyword args
def test_surrogateescape(self):
"""
From the backport of the email package
"""
s = b'From: foo@bar.com\nTo: baz\nMime-Version: 1.0\nContent-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: base64\n\ncMO2c3RhbA\xc3\xa1=\n'
u = 'From: foo@bar.com\nTo: baz\nMime-Version: 1.0\nContent-Type: text/plain; charset=utf-8\nContent-Transfer-Encoding: base64\n\ncMO2c3RhbA\udcc3\udca1=\n'
s2 = s.decode('ASCII', errors='surrogateescape')
self.assertEqual(s2, u)
def test_encode_ascii_surrogateescape(self):
"""
This crops up in the email module. It would be nice if it worked ...
"""
payload = str(u'cMO2c3RhbA\udcc3\udca1=\n')
b = payload.encode('ascii', 'surrogateescape')
self.assertEqual(b, b'cMO2c3RhbA\xc3\xa1=\n')
@expectedFailurePY2
def test_encode_ascii_surrogateescape_non_newstr(self):
"""
As above but without a newstr object. Fails on Py2.
"""
payload = u'cMO2c3RhbA\udcc3\udca1=\n'
b = payload.encode('ascii', 'surrogateescape')
self.assertEqual(b, b'cMO2c3RhbA\xc3\xa1=\n')
class SurrogateEscapeTest(unittest.TestCase):
"""
These tests are from Python 3.3's test suite
"""
def setUp(self):
register_surrogateescape()
def test_utf8(self):
# Bad byte
self.assertEqual(b"foo\x80bar".decode("utf-8", "surrogateescape"),
"foo\udc80bar")
self.assertEqual(str("foo\udc80bar").encode("utf-8", "surrogateescape"),
b"foo\x80bar")
# bad-utf-8 encoded surrogate
# self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "surrogateescape"),
# "\udced\udcb0\udc80")
self.assertEqual(str("\udced\udcb0\udc80").encode("utf-8", "surrogateescape"),
b"\xed\xb0\x80")
def test_ascii(self):
# bad byte
self.assertEqual(b"foo\x80bar".decode("ascii", "surrogateescape"),
"foo\udc80bar")
# Fails:
# self.assertEqual("foo\udc80bar".encode("ascii", "surrogateescape"),
# b"foo\x80bar")
@expectedFailurePY2
def test_charmap(self):
# bad byte: \xa5 is unmapped in iso-8859-3
self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "surrogateescape"),
"foo\udca5bar")
self.assertEqual("foo\udca5bar".encode("iso-8859-3", "surrogateescape"),
b"foo\xa5bar")
def test_latin1(self):
# Issue6373
self.assertEqual("\udce4\udceb\udcef\udcf6\udcfc".encode("latin-1", "surrogateescape"),
b"\xe4\xeb\xef\xf6\xfc")
# FIXME:
@expectedFailurePY2
def test_encoding_works_normally(self):
"""
Test that encoding into various encodings (particularly utf-16)
still works with the surrogateescape error handler in action ...
"""
TEST_UNICODE_STR = u'ℝεα∂@ßʟ℮ ☂ℯṧт υηḯ¢☺ḓ℮'
# Tk icon as a .gif:
TEST_BYTE_STR = b'GIF89a\x0e\x00\x0b\x00\x80\xff\x00\xff\x00\x00\xc0\xc0\xc0!\xf9\x04\x01\x00\x00\x01\x00,\x00\x00\x00\x00\x0e\x00\x0b\x00@\x02\x1f\x0c\x8e\x10\xbb\xcan\x90\x99\xaf&\xd8\x1a\xce\x9ar\x06F\xd7\xf1\x90\xa1c\x9e\xe8\x84\x99\x89\x97\xa2J\x01\x00;\x1a\x14\x00;;\xba\nD\x14\x00\x00;;'
# s1 = 'quéstionable'
s1 = TEST_UNICODE_STR
b1 = s1.encode('utf-8')
b2 = s1.encode('utf-16')
# b3 = s1.encode('latin-1')
self.assertEqual(b1, str(s1).encode('utf-8', 'surrogateescape'))
self.assertEqual(b2, str(s1).encode('utf-16', 'surrogateescape'))
# self.assertEqual(b3, str(s1).encode('latin-1', 'surrogateescape'))
s2 = 'きたないのよりきれいな方がいい'
b4 = s2.encode('utf-8')
b5 = s2.encode('utf-16')
b6 = s2.encode('shift-jis')
self.assertEqual(b4, str(s2).encode('utf-8', 'surrogateescape'))
self.assertEqual(b5, str(s2).encode('utf-16', 'surrogateescape'))
self.assertEqual(b6, str(s2).encode('shift-jis', 'surrogateescape'))
def test_decoding_works_normally(self):
"""
Test that decoding into various encodings (particularly utf-16)
still works with the surrogateescape error handler in action ...
"""
s1 = 'quéstionable'
b1 = s1.encode('utf-8')
b2 = s1.encode('utf-16')
b3 = s1.encode('latin-1')
self.assertEqual(s1, b1.decode('utf-8', 'surrogateescape'))
self.assertEqual(s1, b2.decode('utf-16', 'surrogateescape'))
self.assertEqual(s1, b3.decode('latin-1', 'surrogateescape'))
s2 = '文'
b4 = s2.encode('utf-8')
b5 = s2.encode('utf-16')
b6 = s2.encode('shift-jis')
self.assertEqual(s2, b4.decode('utf-8', 'surrogateescape'))
self.assertEqual(s2, b5.decode('utf-16', 'surrogateescape'))
self.assertEqual(s2, b6.decode('shift-jis', 'surrogateescape'))
if __name__ == '__main__':
unittest.main()