| 1 | # coding=utf-8 |
| 2 | |
| 3 | TEST_FILENAMES = ( |
| 4 | u'Ärtonwall.mp3', |
| 5 | u'test_file', |
| 6 | u'Blah blah.txt', |
| 7 | ) |
| 8 | |
| 9 | # The following main helps to generate a test class for other operating |
| 10 | # systems. |
| 11 | |
| 12 | if __name__ == "__main__": |
| 13 | import sys, os |
| 14 | import tempfile |
| 15 | import shutil |
| 16 | import platform |
| 17 | |
| 18 | if len(sys.argv) != 2: |
| 19 | print "Usage: %s lumière" % sys.argv[0] |
| 20 | sys.exit(1) |
| 21 | |
| 22 | print |
| 23 | print "class MyWeirdOS(StringUtils, unittest.TestCase):" |
| 24 | print " uname = '%s'" % ' '.join(platform.uname()) |
| 25 | print " argv = %s" % repr(sys.argv[1]) |
| 26 | print " platform = '%s'" % sys.platform |
| 27 | print " filesystemencoding = '%s'" % sys.getfilesystemencoding() |
| 28 | print " stdoutencoding = '%s'" % sys.stdout.encoding |
| 29 | |
| 30 | try: |
| 31 | tmpdir = tempfile.mkdtemp() |
| 32 | for fname in TEST_FILENAMES: |
| 33 | open(os.path.join(tmpdir, fname), 'w').close() |
| 34 | |
| 35 | # Use Unicode API under Windows or MacOS X |
| 36 | if sys.platform in ('win32', 'darwin'): |
| 37 | dirlist = os.listdir(unicode(tmpdir)) |
| 38 | else: |
| 39 | dirlist = os.listdir(tmpdir) |
| 40 | |
| 41 | print " dirlist = %s" % repr(dirlist) |
| 42 | except: |
| 43 | print " # Oops, I cannot write filenames containing non-ascii characters" |
| 44 | print |
| 45 | |
| 46 | shutil.rmtree(tmpdir) |
| 47 | sys.exit(0) |
| 48 | |
| 49 | from twisted.trial import unittest |
| 50 | from mock import patch |
| 51 | import sys |
| 52 | |
| 53 | from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \ |
| 54 | unicode_to_stdout, unicode_platform, listdir_unicode, open_unicode, \ |
| 55 | FilenameEncodingError, get_term_encoding |
| 56 | from twisted.python import usage |
| 57 | |
| 58 | class StringUtilsErrors(unittest.TestCase): |
| 59 | @patch('sys.stdout') |
| 60 | def test_get_term_encoding(self, mock): |
| 61 | mock.encoding = None |
| 62 | |
| 63 | self.failUnlessEqual(get_term_encoding(), 'ascii') |
| 64 | |
| 65 | @patch('sys.stdout') |
| 66 | def test_argv_to_unicode(self, mock): |
| 67 | mock.encoding = 'utf-8' |
| 68 | |
| 69 | self.failUnlessRaises(usage.UsageError, |
| 70 | argv_to_unicode, |
| 71 | u'lumière'.encode('latin1')) |
| 72 | |
| 73 | def test_unicode_to_url(self): |
| 74 | pass |
| 75 | |
| 76 | @patch('sys.stdout') |
| 77 | def test_unicode_to_stdout(self, mock): |
| 78 | # Encoding koi8-r cannot represent 'è' |
| 79 | mock.encoding = 'koi8-r' |
| 80 | self.failUnlessEqual(unicode_to_stdout(u'lumière'), 'lumi?re') |
| 81 | |
| 82 | @patch('os.listdir') |
| 83 | def test_unicode_normalization(self, mock): |
| 84 | # Pretend to run on an Unicode platform such as Windows |
| 85 | orig_platform = sys.platform |
| 86 | sys.platform = 'win32' |
| 87 | |
| 88 | mock.return_value = [u'A\u0308rtonwall.mp3'] |
| 89 | self.failUnlessEqual(listdir_unicode(u'/dummy'), [u'\xc4rtonwall.mp3']) |
| 90 | |
| 91 | sys.platform = orig_platform |
| 92 | |
| 93 | # The following tests applies only to platforms which don't store filenames as |
| 94 | # Unicode entities on the filesystem. |
| 95 | class StringUtilsNonUnicodePlatform(unittest.TestCase): |
| 96 | def setUp(self): |
| 97 | # Mock sys.platform because unicode_platform() uses it |
| 98 | self.original_platform = sys.platform |
| 99 | sys.platform = 'linux' |
| 100 | |
| 101 | def tearDown(self): |
| 102 | sys.platform = self.original_platform |
| 103 | |
| 104 | @patch('sys.getfilesystemencoding') |
| 105 | @patch('os.listdir') |
| 106 | def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding): |
| 107 | # What happen if a latin1-encoded filenames is encountered on an UTF-8 |
| 108 | # filesystem? |
| 109 | mock_listdir.return_value = [ |
| 110 | u'lumière'.encode('utf-8'), |
| 111 | u'lumière'.encode('latin1')] |
| 112 | |
| 113 | mock_getfilesystemencoding.return_value = 'utf-8' |
| 114 | |
| 115 | self.failUnlessRaises(FilenameEncodingError, |
| 116 | listdir_unicode, |
| 117 | u'/dummy') |
| 118 | |
| 119 | # We're trying to list a directory whose name cannot be represented in |
| 120 | # the filesystem encoding. This should fail. |
| 121 | mock_getfilesystemencoding.return_value = 'ascii' |
| 122 | self.failUnlessRaises(FilenameEncodingError, |
| 123 | listdir_unicode, |
| 124 | u'/lumière') |
| 125 | |
| 126 | @patch('sys.getfilesystemencoding') |
| 127 | def test_open_unicode(self, mock): |
| 128 | mock.return_value = 'ascii' |
| 129 | |
| 130 | self.failUnlessRaises(FilenameEncodingError, |
| 131 | open_unicode, |
| 132 | u'lumière') |
| 133 | |
| 134 | class StringUtils(): |
| 135 | def setUp(self): |
| 136 | # Mock sys.platform because unicode_platform() uses it |
| 137 | self.original_platform = sys.platform |
| 138 | sys.platform = self.platform |
| 139 | |
| 140 | def tearDown(self): |
| 141 | sys.platform = self.original_platform |
| 142 | |
| 143 | @patch('sys.stdout') |
| 144 | def test_argv_to_unicode(self, mock): |
| 145 | if 'argv' not in dir(self): |
| 146 | raise unittest.SkipTest("There's no way to pass non-ASCII arguments in CLI on this (mocked) platform") |
| 147 | |
| 148 | mock.encoding = self.stdoutencoding |
| 149 | |
| 150 | argu = u'lumière' |
| 151 | argv = self.argv |
| 152 | |
| 153 | self.failUnlessEqual(argv_to_unicode(argv), argu) |
| 154 | |
| 155 | def test_unicode_to_url(self): |
| 156 | self.failUnless(unicode_to_url(u'lumière'), u'lumière'.encode('utf-8')) |
| 157 | |
| 158 | @patch('sys.stdout') |
| 159 | def test_unicode_to_stdout(self, mock): |
| 160 | if 'argv' not in dir(self): |
| 161 | raise unittest.SkipTest("There's no way to pass non-ASCII arguments in CLI on this (mocked) platform") |
| 162 | |
| 163 | mock.encoding = self.stdoutencoding |
| 164 | self.failUnlessEqual(unicode_to_stdout(u'lumière'), self.argv) |
| 165 | |
| 166 | def test_unicode_platform(self): |
| 167 | matrix = { |
| 168 | 'linux2': False, |
| 169 | 'win32': True, |
| 170 | 'darwin': True, |
| 171 | } |
| 172 | |
| 173 | self.failUnlessEqual(unicode_platform(), matrix[self.platform]) |
| 174 | |
| 175 | @patch('sys.getfilesystemencoding') |
| 176 | @patch('os.listdir') |
| 177 | def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding): |
| 178 | |
| 179 | mock_listdir.return_value = self.dirlist |
| 180 | mock_getfilesystemencoding.return_value = self.filesystemencoding |
| 181 | |
| 182 | filenames = listdir_unicode(u'/dummy') |
| 183 | |
| 184 | for fname in TEST_FILENAMES: |
| 185 | self.failUnless(isinstance(fname, unicode)) |
| 186 | |
| 187 | if fname not in filenames: |
| 188 | self.fail("Cannot find %r in %r" % (fname, filenames)) |
| 189 | |
| 190 | @patch('os.open') |
| 191 | def test_open_unicode(self, mock): |
| 192 | |
| 193 | self.failUnlessRaises(IOError, |
| 194 | open_unicode, |
| 195 | u'/dummy_directory/lumière.txt') |
| 196 | |
| 197 | |
| 198 | class UbuntuKarmicUTF8(StringUtils, unittest.TestCase): |
| 199 | uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' |
| 200 | argv = 'lumi\xc3\xa8re' |
| 201 | platform = 'linux2' |
| 202 | filesystemencoding = 'UTF-8' |
| 203 | stdoutencoding = 'UTF-8' |
| 204 | dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt'] |
| 205 | |
| 206 | |
| 207 | class UbuntuKarmicLatin1(StringUtils, unittest.TestCase): |
| 208 | uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' |
| 209 | argv = 'lumi\xe8re' |
| 210 | platform = 'linux2' |
| 211 | filesystemencoding = 'ISO-8859-1' |
| 212 | stdoutencoding = 'ISO-8859-1' |
| 213 | dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3'] |
| 214 | |
| 215 | class WindowsXP(StringUtils, unittest.TestCase): |
| 216 | uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' |
| 217 | argv = 'lumi\xe8re' |
| 218 | platform = 'win32' |
| 219 | filesystemencoding = 'mbcs' |
| 220 | stdoutencoding = 'cp850' |
| 221 | dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] |
| 222 | |
| 223 | todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565." |
| 224 | |
| 225 | class WindowsXP_UTF8(StringUtils, unittest.TestCase): |
| 226 | uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' |
| 227 | argv = 'lumi\xe8re' |
| 228 | platform = 'win32' |
| 229 | filesystemencoding = 'mbcs' |
| 230 | stdoutencoding = 'cp65001' |
| 231 | dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] |
| 232 | |
| 233 | todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565." |
| 234 | |
| 235 | class WindowsVista(StringUtils, unittest.TestCase): |
| 236 | uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel' |
| 237 | argv = 'lumi\xe8re' |
| 238 | platform = 'win32' |
| 239 | filesystemencoding = 'mbcs' |
| 240 | stdoutencoding = 'cp850' |
| 241 | dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] |
| 242 | |
| 243 | todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565." |
| 244 | |
| 245 | class MacOSXLeopard(StringUtils, unittest.TestCase): |
| 246 | uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' |
| 247 | argv = 'lumi\xc3\xa8re' |
| 248 | platform = 'darwin' |
| 249 | filesystemencoding = 'utf-8' |
| 250 | stdoutencoding = 'UTF-8' |
| 251 | dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] |
| 252 | |
| 253 | class MacOSXLeopard7bit(StringUtils, unittest.TestCase): |
| 254 | uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' |
| 255 | #argv = 'lumiere' |
| 256 | platform = 'darwin' |
| 257 | filesystemencoding = 'utf-8' |
| 258 | stdoutencoding = 'US-ASCII' |
| 259 | dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] |