Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 74 additions & 2 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,7 @@ def _verify_parse_output(self, operations):
"Character data: '\xb5'",
"End element: 'root'",
]
for operation, expected_operation in zip(operations, expected_operations):
self.assertEqual(operation, expected_operation)
self.assertEqual(operations, expected_operations)

def test_parse_bytes(self):
out = self.Outputter()
Expand Down Expand Up @@ -277,6 +276,79 @@ def test_parse_again(self):
self.assertEqual(expat.ErrorString(cm.exception.code),
expat.errors.XML_ERROR_FINISHED)

@support.subTests('encoding', [
'utf-8', 'utf-16', 'utf-16be', 'utf-16le',
'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
'cp1256', 'cp1257', 'cp1258',
'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
'mac-roman', 'mac-turkish',
'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'ptcp154',
])
def test_supported_ecodings(self, encoding):
out = self.Outputter()
parser = expat.ParserCreate()
self._hookup_callbacks(parser, out)
c = 'éπя\u05d0\u060c€'.encode(encoding, 'ignore').decode(encoding)[0]
data = (f'<?xml version="1.0" encoding="{encoding}"?>\n'
f'<root>{c}</root>').encode(encoding)
parser.Parse(data, True)
self.assertEqual(out.out, [
('XML declaration', ('1.0', encoding, -1)),
"Start element: 'root' {}",
f'Character data: {c!r}',
"End element: 'root'",
])

@support.subTests('encoding', [
'UTF-8', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be',
'koi8-u', 'cp1125', 'cp1251', 'iso8859-5', 'mac-cyrillic',
])
def test_supported_ecodings2(self, encoding):
out = self.Outputter()
parser = expat.ParserCreate()
self._hookup_callbacks(parser, out)
data = (f'<?xml version="1.0" encoding="{encoding}"?>\n'
'<!-- коментар -->'
'<корінь атрибут="значення">зміст</корінь>').encode(encoding)
parser.Parse(data, True)
self.assertEqual(out.out, [
('XML declaration', ('1.0', encoding, -1)),
"Comment: ' коментар '",
"Start element: 'корінь' {'атрибут': 'значення'}",
"Character data: 'зміст'",
"End element: 'корінь'",
])

@support.subTests('encoding', [
'UTF-7',
"Big5-HKSCS", "Big5",
"cp932", "cp949", "cp950",
"EUC_JIS-2004", "EUC_JISX0213", "EUC-JP", "EUC-KR",
"GB18030", "GB2312", "GBK",
"ISO-2022-KR",
"johab",
"Shift_JIS", "Shift_JIS-2004", "Shift_JISX0213",
])
def test_unsupportes_ecodings(self, encoding):
parser = expat.ParserCreate()
data = (f'<?xml version="1.0" encoding="{encoding}"?>\n'
'<root></root>').encode(encoding)
with self.assertRaises(ValueError):
parser.Parse(data, True)

def test_unknown_ecoding(self):
parser = expat.ParserCreate()
data = b'<?xml version="1.0" encoding="xyz"?>\n<root></root>'
with self.assertRaises(LookupError):
parser.Parse(data, True)


class NamespaceSeparatorTest(unittest.TestCase):
def test_legal(self):
# Tests that make sure we get errors when the namespace_separator value
Expand Down
27 changes: 15 additions & 12 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,12 +975,12 @@ def check(encoding, body=''):
check("cp437", '\u221a')
check("mac-roman", '\u02da')

def xml(encoding):
return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
def bxml(encoding):
return xml(encoding).encode(encoding)
def xml(encoding, body=''):
return "<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % (encoding, body)
def bxml(encoding, body=''):
return xml(encoding, body).encode(encoding)
supported_encodings = [
'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
'utf-8', 'utf-16', 'utf-16be', 'utf-16le',
'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
Expand All @@ -991,13 +991,14 @@ def bxml(encoding):
'cp1256', 'cp1257', 'cp1258',
'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
'mac-roman', 'mac-turkish',
'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
'iso2022-jp-3', 'iso2022-jp-ext',
'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
'hz', 'ptcp154',
'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 'ptcp154',
]
for encoding in supported_encodings:
self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
with self.subTest(encoding=encoding):
self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
c = 'éπя\u05d0\u060c€'.encode(encoding, 'ignore').decode(encoding)[0]
self.assertEqual(ET.tostring(ET.XML(bxml(encoding, c))),
('<xml>&#%d;</xml>' % ord(c)).encode())

unsupported_ascii_compatible_encodings = [
'big5', 'big5hkscs',
Expand All @@ -1009,14 +1010,16 @@ def bxml(encoding):
'utf-7',
]
for encoding in unsupported_ascii_compatible_encodings:
self.assertRaises(ValueError, ET.XML, bxml(encoding))
with self.subTest(encoding=encoding):
self.assertRaises(ValueError, ET.XML, bxml(encoding))

unsupported_ascii_incompatible_encodings = [
'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
'utf_32', 'utf_32_be', 'utf_32_le',
]
for encoding in unsupported_ascii_incompatible_encodings:
self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
with self.subTest(encoding=encoding):
self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))

self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
Expand Down
Loading