Ticket #534: unicode-filenames-handling-v4.diff

File unicode-filenames-handling-v4.diff, 31.7 KB (added by francois, at 2010-05-20T00:55:07Z)
  • docs/frontends/CLI.txt

    Thu May 20 02:43:56 CEST 2010  Francois Deppierraz <francois@ctrlaltdel.ch>
      * Fix handling of correctly encoded unicode filenames (#534)
      
      Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
      backup', have been improved to correctly handle filenames containing non-ASCII
      characters.
        
      In the case where Tahoe encounters a filename which cannot be decoded using the
      system encoding, an error will be returned and the operation will fail.  Under
      Linux, this typically happens when the filesystem contains filenames encoded
      with another encoding, for instance latin1, than the system locale, for
      instance UTF-8.  In such case, you'll need to fix your system with tools such
      as 'convmv' before using Tahoe CLI.
        
      All CLI commands have been improved to support non-ASCII parameters such as
      filenames and aliases on all supported Operating Systems except Windows as of
      now.
    diff -rN -u old-tahoe-534/docs/frontends/CLI.txt new-tahoe-534/docs/frontends/CLI.txt
    old new  
    123123perspective on the graph of files and directories.
    124124
    125125Each tahoe node remembers a list of starting points, named "aliases",
    126 in a file named ~/.tahoe/private/aliases . These aliases are short
    127 strings that stand in for a directory read- or write- cap. If you use
    128 the command line "ls" without any "[STARTING_DIR]:" argument, then it
    129 will use the default alias, which is "tahoe", therefore "tahoe ls" has
    130 the same effect as "tahoe ls tahoe:".  The same goes for the other
    131 commands which can reasonably use a default alias: get, put, mkdir,
    132 mv, and rm.
     126in a file named ~/.tahoe/private/aliases . These aliases are short UTF-8
     127encoded strings that stand in for a directory read- or write- cap. If
     128you use the command line "ls" without any "[STARTING_DIR]:" argument,
     129then it will use the default alias, which is "tahoe", therefore "tahoe
     130ls" has the same effect as "tahoe ls tahoe:".  The same goes for the
     131other commands which can reasonably use a default alias: get, put,
     132mkdir, mv, and rm.
    133133
    134134For backwards compatibility with Tahoe-1.0, if the "tahoe": alias is not
    135135found in ~/.tahoe/private/aliases, the CLI will use the contents of
  • NEWS

    diff -rN -u old-tahoe-534/NEWS new-tahoe-534/NEWS
    old new  
    11User visible changes in Tahoe-LAFS.  -*- outline -*-
    22
     3* Release 1.7.0
     4
     5** Bugfixes
     6
     7*** Unicode filenames handling
     8
     9Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
     10backup', have been improved to correctly handle filenames containing non-ASCII
     11characters.
     12
     13In the case where Tahoe encounters a filename which cannot be decoded using the
     14system encoding, an error will be returned and the operation will fail.  Under
     15Linux, this typically happens when the filesystem contains filenames encoded
     16with another encoding, for instance latin1, than the system locale, for
     17instance UTF-8.  In such case, you'll need to fix your system with tools such
     18as 'convmv' before using Tahoe CLI.
     19
     20All CLI commands have been improved to support non-ASCII parameters such as
     21filenames and aliases on all supported Operating Systems except Windows as of
     22now.
     23
    324* Release 1.6.1 (2010-02-27)
    425
    526** Bugfixes
  • src/allmydata/scripts/cli.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/cli.py new-tahoe-534/src/allmydata/scripts/cli.py
    old new  
    11import os.path, re, sys, fnmatch
    22from twisted.python import usage
    33from allmydata.scripts.common import BaseOptions, get_aliases
     4from allmydata.util.stringutils import argv_to_unicode
    45
    56NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?")
    67
     
    4950
    5051class MakeDirectoryOptions(VDriveOptions):
    5152    def parseArgs(self, where=""):
    52         self.where = where
     53        self.where = argv_to_unicode(where)
    5354    longdesc = """Create a new directory, either unlinked or as a subdirectory."""
    5455
    5556class AddAliasOptions(VDriveOptions):
    5657    def parseArgs(self, alias, cap):
    57         self.alias = alias
     58        self.alias = argv_to_unicode(alias)
    5859        self.cap = cap
    5960
    6061    def getSynopsis(self):
     
    6465
    6566class CreateAliasOptions(VDriveOptions):
    6667    def parseArgs(self, alias):
    67         self.alias = alias
     68        self.alias = argv_to_unicode(alias)
    6869
    6970    def getSynopsis(self):
    7071        return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),)
     
    8384        ("json", None, "Show the raw JSON output"),
    8485        ]
    8586    def parseArgs(self, where=""):
    86         self.where = where
     87        self.where = argv_to_unicode(where)
    8788
    8889    longdesc = """
    8990    List the contents of some portion of the grid.
     
    118119        # tahoe get FOO bar              # write to local file
    119120        # tahoe get tahoe:FOO bar        # same
    120121
    121         self.from_file = arg1
    122         self.to_file = arg2
     122        self.from_file = argv_to_unicode(arg1)
     123
     124        if arg2:
     125            self.to_file = argv_to_unicode(arg2)
     126        else:
     127            self.to_file = None
     128
    123129        if self.to_file == "-":
    124130            self.to_file = None
    125131
     
    151157        # see Examples below
    152158
    153159        if arg1 is not None and arg2 is not None:
    154             self.from_file = arg1
    155             self.to_file = arg2
     160            self.from_file = argv_to_unicode(arg1)
     161            self.to_file =  argv_to_unicode(arg2)
    156162        elif arg1 is not None and arg2 is None:
    157             self.from_file = arg1 # might be "-"
     163            self.from_file = argv_to_unicode(arg1) # might be "-"
    158164            self.to_file = None
    159165        else:
    160166            self.from_file = None
    161167            self.to_file = None
    162         if self.from_file == "-":
     168        if self.from_file == u"-":
    163169            self.from_file = None
    164170
    165171    def getSynopsis(self):
     
    197203    def parseArgs(self, *args):
    198204        if len(args) < 2:
    199205            raise usage.UsageError("cp requires at least two arguments")
    200         self.sources = args[:-1]
    201         self.destination = args[-1]
     206        self.sources = map(argv_to_unicode, args[:-1])
     207        self.destination = argv_to_unicode(args[-1])
    202208    def getSynopsis(self):
    203209        return "Usage: tahoe [options] cp FROM.. TO"
    204210    longdesc = """
     
    228234
    229235class RmOptions(VDriveOptions):
    230236    def parseArgs(self, where):
    231         self.where = where
     237        self.where = argv_to_unicode(where)
    232238
    233239    def getSynopsis(self):
    234240        return "%s rm REMOTE_FILE" % (os.path.basename(sys.argv[0]),)
    235241
    236242class MvOptions(VDriveOptions):
    237243    def parseArgs(self, frompath, topath):
    238         self.from_file = frompath
    239         self.to_file = topath
     244        self.from_file = argv_to_unicode(frompath)
     245        self.to_file = argv_to_unicode(topath)
    240246
    241247    def getSynopsis(self):
    242248        return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),)
     
    254260
    255261class LnOptions(VDriveOptions):
    256262    def parseArgs(self, frompath, topath):
    257         self.from_file = frompath
    258         self.to_file = topath
     263        self.from_file = argv_to_unicode(frompath)
     264        self.to_file = argv_to_unicode(topath)
    259265
    260266    def getSynopsis(self):
    261267        return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
     
    279285        self['exclude'] = set()
    280286
    281287    def parseArgs(self, localdir, topath):
    282         self.from_dir = localdir
    283         self.to_dir = topath
     288        self.from_dir = argv_to_unicode(localdir)
     289        self.to_dir = argv_to_unicode(topath)
    284290
    285291    def getSynopsis(Self):
    286292        return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
     
    337343        ("info", "i", "Open the t=info page for the file"),
    338344        ]
    339345    def parseArgs(self, where=''):
    340         self.where = where
     346        self.where = argv_to_unicode(where)
    341347
    342348    def getSynopsis(self):
    343349        return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
     
    354360        ("raw", "r", "Display raw JSON data instead of parsed"),
    355361        ]
    356362    def parseArgs(self, where=''):
    357         self.where = where
     363        self.where = argv_to_unicode(where)
    358364
    359365    def getSynopsis(self):
    360366        return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
     
    367373        ("raw", "r", "Display raw JSON data instead of parsed"),
    368374        ]
    369375    def parseArgs(self, where=''):
    370         self.where = where
     376        self.where = argv_to_unicode(where)
    371377
    372378    def getSynopsis(self):
    373379        return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
     
    383389        ("add-lease", None, "Add/renew lease on all shares"),
    384390        ]
    385391    def parseArgs(self, where=''):
    386         self.where = where
     392        self.where = argv_to_unicode(where)
    387393
    388394    def getSynopsis(self):
    389395        return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
     
    402408        ("verbose", "v", "Be noisy about what is happening."),
    403409        ]
    404410    def parseArgs(self, where=''):
    405         self.where = where
     411        self.where = argv_to_unicode(where)
    406412
    407413    def getSynopsis(self):
    408414        return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
  • src/allmydata/scripts/common.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/common.py new-tahoe-534/src/allmydata/scripts/common.py
    old new  
    11
    22import os, sys, urllib
     3import codecs
    34from twisted.python import usage
    4 
     5from allmydata.util.stringutils import unicode_to_url
     6from allmydata.util.assertutil import precondition
    57
    68class BaseOptions:
    79    # unit tests can override these to point at StringIO instances
     
    100102    except EnvironmentError:
    101103        pass
    102104    try:
    103         f = open(aliasfile, "r")
     105        f = codecs.open(aliasfile, "r", "utf-8")
    104106        for line in f.readlines():
    105107            line = line.strip()
    106108            if line.startswith("#") or not line:
    107109                continue
    108110            name, cap = line.split(":", 1)
    109111            # normalize it: remove http: prefix, urldecode
    110             cap = cap.strip()
     112            cap = cap.strip().encode('utf-8')
    111113            aliases[name] = uri.from_string_dirnode(cap).to_string()
    112114    except EnvironmentError:
    113115        pass
     
    138140    # and default is not found in aliases, an UnknownAliasError is
    139141    # raised.
    140142    path = path.strip()
    141     if uri.has_uri_prefix(path):
     143    if uri.has_uri_prefix(path.encode('utf-8')):
    142144        # We used to require "URI:blah:./foo" in order to get a subpath,
    143145        # stripping out the ":./" sequence. We still allow that for compatibility,
    144146        # but now also allow just "URI:blah/foo".
     
    180182
    181183def escape_path(path):
    182184    segments = path.split("/")
    183     return "/".join([urllib.quote(s) for s in segments])
     185    return "/".join([urllib.quote(unicode_to_url(s)) for s in segments])
  • src/allmydata/scripts/tahoe_add_alias.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py new-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py
    old new  
    11
    22import os.path
     3import codecs
     4import sys
    35from allmydata import uri
    46from allmydata.scripts.common_http import do_http, check_http_error
    57from allmydata.scripts.common import get_aliases
    68from allmydata.util.fileutil import move_into_place
     9from allmydata.util.stringutils import unicode_to_stdout
     10
    711
    812def add_line_to_aliasfile(aliasfile, alias, cap):
    913    # we use os.path.exists, rather than catching EnvironmentError, to avoid
    1014    # clobbering the valuable alias file in case of spurious or transient
    1115    # filesystem errors.
    1216    if os.path.exists(aliasfile):
    13         f = open(aliasfile, "r")
     17        f = codecs.open(aliasfile, "r", "utf-8")
    1418        aliases = f.read()
    1519        f.close()
    1620        if not aliases.endswith("\n"):
     
    1822    else:
    1923        aliases = ""
    2024    aliases += "%s: %s\n" % (alias, cap)
    21     f = open(aliasfile+".tmp", "w")
     25    f = codecs.open(aliasfile+".tmp", "w", "utf-8")
    2226    f.write(aliases)
    2327    f.close()
    2428    move_into_place(aliasfile+".tmp", aliasfile)
     
    4145
    4246    add_line_to_aliasfile(aliasfile, alias, cap)
    4347
    44     print >>stdout, "Alias '%s' added" % (alias,)
     48    print >>stdout, "Alias '%s' added" % (unicode_to_stdout(alias),)
    4549    return 0
    4650
    4751def create_alias(options):
     
    7478
    7579    add_line_to_aliasfile(aliasfile, alias, new_uri)
    7680
    77     print >>stdout, "Alias '%s' created" % (alias,)
     81    print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),)
    7882    return 0
    7983
    8084def list_aliases(options):
  • src/allmydata/scripts/tahoe_backup.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_backup.py new-tahoe-534/src/allmydata/scripts/tahoe_backup.py
    old new  
    99from allmydata.scripts.common_http import do_http
    1010from allmydata.util import time_format
    1111from allmydata.scripts import backupdb
     12import sys
     13from allmydata.util.stringutils import unicode_to_stdout, listdir_unicode, open_unicode
     14from allmydata.util.assertutil import precondition
     15from twisted.python import usage
     16
    1217
    1318class HTTPError(Exception):
    1419    pass
     
    154159
    155160    def verboseprint(self, msg):
    156161        if self.verbosity >= 2:
     162            if isinstance(msg, unicode):
     163                msg = unicode_to_stdout(msg)
     164
    157165            print >>self.options.stdout, msg
    158166
    159167    def warn(self, msg):
    160168        print >>self.options.stderr, msg
    161169
    162170    def process(self, localpath):
     171        precondition(isinstance(localpath, unicode), localpath)
    163172        # returns newdircap
    164173
    165174        self.verboseprint("processing %s" % localpath)
     
    167176        compare_contents = {} # childname -> rocap
    168177
    169178        try:
    170             children = os.listdir(localpath)
     179            children = listdir_unicode(localpath)
    171180        except EnvironmentError:
    172181            self.directories_skipped += 1
    173182            self.warn("WARNING: permission denied on directory %s" % localpath)
     
    283292
    284293    # This function will raise an IOError exception when called on an unreadable file
    285294    def upload(self, childpath):
     295        precondition(isinstance(childpath, unicode), childpath)
     296
    286297        #self.verboseprint("uploading %s.." % childpath)
    287298        metadata = get_local_metadata(childpath)
    288299
     
    291302
    292303        if must_upload:
    293304            self.verboseprint("uploading %s.." % childpath)
    294             infileobj = open(os.path.expanduser(childpath), "rb")
     305            infileobj = open_unicode(os.path.expanduser(childpath), "rb")
    295306            url = self.options['node-url'] + "uri"
    296307            resp = do_http("PUT", url, infileobj)
    297308            if resp.status not in (200, 201):
  • src/allmydata/scripts/tahoe_cp.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_cp.py new-tahoe-534/src/allmydata/scripts/tahoe_cp.py
    old new  
    22import os.path
    33import urllib
    44import simplejson
     5import sys
    56from cStringIO import StringIO
    67from twisted.python.failure import Failure
    78from allmydata.scripts.common import get_alias, escape_path, \
    89                                     DefaultAliasMarker, UnknownAliasError
    910from allmydata.scripts.common_http import do_http
    1011from allmydata import uri
     12from twisted.python import usage
     13from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
     14from allmydata.util.assertutil import precondition
     15
    1116
    1217def ascii_or_none(s):
    1318    if s is None:
     
    7075
    7176class LocalFileSource:
    7277    def __init__(self, pathname):
     78        precondition(isinstance(pathname, unicode), pathname)
    7379        self.pathname = pathname
    7480
    7581    def need_to_copy_bytes(self):
     
    8086
    8187class LocalFileTarget:
    8288    def __init__(self, pathname):
     89        precondition(isinstance(pathname, unicode), pathname)
    8390        self.pathname = pathname
    8491    def put_file(self, inf):
    8592        outf = open(self.pathname, "wb")
     
    9299
    93100class LocalMissingTarget:
    94101    def __init__(self, pathname):
     102        precondition(isinstance(pathname, unicode), pathname)
    95103        self.pathname = pathname
    96104
    97105    def put_file(self, inf):
     
    105113
    106114class LocalDirectorySource:
    107115    def __init__(self, progressfunc, pathname):
     116        precondition(isinstance(pathname, unicode), pathname)
     117
    108118        self.progressfunc = progressfunc
    109119        self.pathname = pathname
    110120        self.children = None
     
    113123        if self.children is not None:
    114124            return
    115125        self.children = {}
    116         children = os.listdir(self.pathname)
     126        children = listdir_unicode(self.pathname)
    117127        for i,n in enumerate(children):
    118128            self.progressfunc("examining %d of %d" % (i, len(children)))
    119129            pn = os.path.join(self.pathname, n)
     
    130140
    131141class LocalDirectoryTarget:
    132142    def __init__(self, progressfunc, pathname):
     143        precondition(isinstance(pathname, unicode), pathname)
     144
    133145        self.progressfunc = progressfunc
    134146        self.pathname = pathname
    135147        self.children = None
     
    138150        if self.children is not None:
    139151            return
    140152        self.children = {}
    141         children = os.listdir(self.pathname)
     153        children = listdir_unicode(self.pathname)
    142154        for i,n in enumerate(children):
    143155            self.progressfunc("examining %d of %d" % (i, len(children)))
    144156            pn = os.path.join(self.pathname, n)
     
    161173        return LocalDirectoryTarget(self.progressfunc, pathname)
    162174
    163175    def put_file(self, name, inf):
     176        precondition(isinstance(name, unicode), name)
    164177        pathname = os.path.join(self.pathname, name)
    165         outf = open(pathname, "wb")
     178        outf = open_unicode(pathname, "wb")
    166179        while True:
    167180            data = inf.read(32768)
    168181            if not data:
     
    355368                if self.writecap:
    356369                    url = self.nodeurl + "/".join(["uri",
    357370                                                   urllib.quote(self.writecap),
    358                                                    urllib.quote(name.encode('utf-8'))])
     371                                                   urllib.quote(unicode_to_url(name))])
    359372                self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
    360373                                                      writecap, readcap, url)
    361374            elif data[0] == "dirnode":
  • src/allmydata/scripts/tahoe_ls.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_ls.py new-tahoe-534/src/allmydata/scripts/tahoe_ls.py
    old new  
    44from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
    55                                     UnknownAliasError
    66from allmydata.scripts.common_http import do_http
     7from allmydata.util.stringutils import unicode_to_stdout
    78
    89def list(options):
    910    nodeurl = options['node-url']
     
    130131            line.append(ctime_s)
    131132        if not options["classify"]:
    132133            classify = ""
    133         line.append(name + classify)
     134        line.append(unicode_to_stdout(name) + classify)
    134135        if options["uri"]:
    135136            line.append(uri)
    136137        if options["readonly-uri"]:
  • src/allmydata/scripts/tahoe_manifest.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_manifest.py new-tahoe-534/src/allmydata/scripts/tahoe_manifest.py
    old new  
    8585                try:
    8686                    print >>stdout, d["cap"], "/".join(d["path"])
    8787                except UnicodeEncodeError:
    88                     print >>stdout, d["cap"], "/".join([p.encode("utf-8")
     88                    print >>stdout, d["cap"], "/".join([unicode_to_stdout(p)
    8989                                                        for p in d["path"]])
    9090
    9191def manifest(options):
  • src/allmydata/scripts/tahoe_mkdir.py

    diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py new-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py
    old new  
    22import urllib
    33from allmydata.scripts.common_http import do_http, check_http_error
    44from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError
     5from allmydata.util.stringutils import unicode_to_url
    56
    67def mkdir(options):
    78    nodeurl = options['node-url']
     
    3536        path = path[:-1]
    3637    # path (in argv) must be "/".join([s.encode("utf-8") for s in segments])
    3738    url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
    38                                            urllib.quote(path))
     39                                           urllib.quote(unicode_to_url(path)))
    3940    resp = do_http("POST", url)
    4041    check_http_error(resp, stderr)
    4142    new_uri = resp.read().strip()
  • src/allmydata/test/test_cli.py

    diff -rN -u old-tahoe-534/src/allmydata/test/test_cli.py new-tahoe-534/src/allmydata/test/test_cli.py
    old new  
    66import urllib
    77import re
    88import simplejson
     9import sys
    910
    1011from allmydata.util import fileutil, hashutil, base32
    1112from allmydata import uri
     
    2627from twisted.internet import threads # CLI tests use deferToThread
    2728from twisted.python import usage
    2829
     30from allmydata.util.stringutils import listdir_unicode, open_unicode, \
     31     unicode_platform, FilenameEncodingError
     32
    2933timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
    3034
    3135
     
    284288                   "work": "WA",
    285289                   "c": "CA"}
    286290        def ga1(path):
    287             return get_alias(aliases, path, "tahoe")
     291            return get_alias(aliases, path, u"tahoe")
    288292        uses_lettercolon = common.platform_uses_lettercolon_drivename()
    289293        self.failUnlessEqual(ga1("bare"), ("TA", "bare"))
    290294        self.failUnlessEqual(ga1("baredir/file"), ("TA", "baredir/file"))
     
    379383        # default set to something that isn't in the aliases argument should
    380384        # raise an UnknownAliasError.
    381385        def ga4(path):
    382             return get_alias(aliases, path, "badddefault:")
     386            return get_alias(aliases, path, u"badddefault:")
    383387        self.failUnlessRaises(common.UnknownAliasError, ga4, "afile")
    384388        self.failUnlessRaises(common.UnknownAliasError, ga4, "a/dir/path/")
    385389
     
    387391            old = common.pretend_platform_uses_lettercolon
    388392            try:
    389393                common.pretend_platform_uses_lettercolon = True
    390                 retval = get_alias(aliases, path, "baddefault:")
     394                retval = get_alias(aliases, path, u"baddefault:")
    391395            finally:
    392396                common.pretend_platform_uses_lettercolon = old
    393397            return retval
    394398        self.failUnlessRaises(common.UnknownAliasError, ga5, "C:\\Windows")
    395399
     400    def test_listdir_unicode_good(self):
     401        basedir = u"cli/common/listdir_unicode_good"
     402        fileutil.make_dirs(basedir)
     403
     404        files = (u'Lôzane', u'Bern', u'Genève')
     405
     406        for file in files:
     407            open(os.path.join(basedir, file), "w").close()
     408
     409        for file in listdir_unicode(basedir):
     410            self.failUnlessEqual(file in files, True)
     411
     412    def test_listdir_unicode_bad(self):
     413        if unicode_platform():
     414            raise unittest.SkipTest("This test doesn't make any sense on architecture which handle filenames natively as Unicode entities.")
     415
     416        basedir = u"cli/common/listdir_unicode_bad"
     417        fileutil.make_dirs(basedir)
     418
     419        files = (u'Lôzane', u'Bern', u'Genève')
     420
     421        # We use a wrong encoding on purpose
     422        if sys.getfilesystemencoding() == 'UTF-8':
     423            encoding = 'latin1'
     424        else:
     425            encoding = 'UTF-8'
     426
     427        for file in files:
     428            path = os.path.join(basedir, file).encode(encoding)
     429            open(path, "w").close()
     430
     431        self.failUnlessRaises(FilenameEncodingError, listdir_unicode, basedir)
    396432
    397433class Help(unittest.TestCase):
    398434
     
    592628            self.failUnless(aliases["un-corrupted2"].startswith("URI:DIR2:"))
    593629        d.addCallback(_check_not_corrupted)
    594630
    595         return d
    596631
     632    def test_create_unicode(self):
     633        if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'):
     634            raise unittest.SkipTest("Arbitrary filenames are not supported by this platform")
     635
     636        if sys.stdout.encoding not in ('UTF-8'):
     637            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
     638
     639        self.basedir = "cli/CreateAlias/create_unicode"
     640        self.set_up_grid()
     641        aliasfile = os.path.join(self.get_clientdir(), "private", "aliases")
     642
     643        d = self.do_cli("create-alias", "études")
     644        def _check_create_unicode((rc,stdout,stderr)):
     645            self.failUnlessEqual(rc, 0)
     646            self.failIf(stderr)
     647
     648            # If stdout only supports ascii, accentuated characters are
     649            # being replaced by '?'
     650            if sys.stdout.encoding == "ANSI_X3.4-1968":
     651                self.failUnless("Alias '?tudes' created" in stdout)
     652            else:
     653                self.failUnless("Alias 'études' created" in stdout)
     654
     655            aliases = get_aliases(self.get_clientdir())
     656            self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
     657        d.addCallback(_check_create_unicode)
     658
     659        d.addCallback(lambda res: self.do_cli("ls", "études:"))
     660        def _check_ls1((rc, stdout, stderr)):
     661            self.failUnlessEqual(rc, 0)
     662            self.failIf(stderr)
     663
     664            self.failUnlessEqual(stdout, "")
     665        d.addCallback(_check_ls1)
     666
     667        d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt",
     668          stdin="Blah blah blah"))
     669
     670        d.addCallback(lambda res: self.do_cli("ls", "études:"))
     671        def _check_ls2((rc, stdout, stderr)):
     672            self.failUnlessEqual(rc, 0)
     673            self.failIf(stderr)
     674
     675            self.failUnlessEqual(stdout, "uploaded.txt\n")
     676        d.addCallback(_check_ls2)
     677
     678        d.addCallback(lambda res: self.do_cli("get", "études:uploaded.txt"))
     679        def _check_get((rc, stdout, stderr)):
     680            self.failUnlessEqual(rc, 0)
     681            self.failIf(stderr)
     682            self.failUnlessEqual(stdout, "Blah blah blah")
     683        d.addCallback(_check_get)
     684
     685        # Ensure that an Unicode filename in an Unicode alias works as expected
     686        d.addCallback(lambda res: self.do_cli("put", "-", "études:lumière.txt",
     687          stdin="Let the sunshine In!"))
     688
     689        d.addCallback(lambda res: self.do_cli("get",
     690                      get_aliases(self.get_clientdir())[u"études"] + "/lumière.txt"))
     691        def _check_get((rc, stdout, stderr)):
     692            self.failUnlessEqual(rc, 0)
     693            self.failIf(stderr)
     694            self.failUnlessEqual(stdout, "Let the sunshine In!")
     695        d.addCallback(_check_get)
     696
     697        return d
    597698
    598699class Ln(GridTestMixin, CLITestMixin, unittest.TestCase):
    599700    def _create_test_file(self):
     
    865966        return d
    866967
    867968
     969    def test_immutable_from_file_unicode(self):
     970        if sys.stdout.encoding not in ('UTF-8'):
     971            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
     972     
     973        # tahoe put file.txt "à trier.txt"
     974        self.basedir = os.path.dirname(self.mktemp())
     975        self.set_up_grid()
     976
     977        rel_fn = os.path.join(self.basedir, "DATAFILE")
     978        abs_fn = os.path.abspath(rel_fn)
     979        # we make the file small enough to fit in a LIT file, for speed
     980        DATA = "short file"
     981        f = open(rel_fn, "w")
     982        f.write(DATA)
     983        f.close()
     984
     985        d = self.do_cli("create-alias", "tahoe")
     986
     987        d.addCallback(lambda res:
     988                      self.do_cli("put", rel_fn, "à trier.txt"))
     989        def _uploaded((rc,stdout,stderr)):
     990            readcap = stdout.strip()
     991            self.failUnless(readcap.startswith("URI:LIT:"))
     992            self.failUnless("201 Created" in stderr, stderr)
     993            self.readcap = readcap
     994        d.addCallback(_uploaded)
     995
     996        d.addCallback(lambda res:
     997                      self.do_cli("get", "tahoe:à trier.txt"))
     998        d.addCallback(lambda (rc,stdout,stderr):
     999                      self.failUnlessEqual(stdout, DATA))
     1000
     1001        return d
     1002
    8681003class List(GridTestMixin, CLITestMixin, unittest.TestCase):
    8691004    def test_list(self):
    8701005        self.basedir = "cli/List/list"
     
    11461281                              o.parseOptions, ["onearg"])
    11471282
    11481283    def test_unicode_filename(self):
     1284        if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'):
     1285            raise unittest.SkipTest("Arbitrary filenames are not supported by this platform")
     1286
     1287        if sys.stdout.encoding not in ('UTF-8'):
     1288            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
     1289
    11491290        self.basedir = "cli/Cp/unicode_filename"
    11501291        self.set_up_grid()
     1292        d = self.do_cli("create-alias", "tahoe")
    11511293
    1152         fn1 = os.path.join(self.basedir, "Ärtonwall")
     1294        # Use unicode strings when calling os functions
     1295        fn1 = os.path.join(self.basedir, u"Ärtonwall")
    11531296        DATA1 = "unicode file content"
    11541297        fileutil.write(fn1, DATA1)
    11551298
    1156         fn2 = os.path.join(self.basedir, "Metallica")
    1157         DATA2 = "non-unicode file content"
    1158         fileutil.write(fn2, DATA2)
    1159 
    1160         # Bug #534
    1161         # Assure that uploading a file whose name contains unicode character
    1162         # doesn't prevent further uploads in the same directory
    1163         d = self.do_cli("create-alias", "tahoe")
    1164         d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
    1165         d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
     1299        d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:"))
    11661300
    11671301        d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
    11681302        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
    11691303
     1304        fn2 = os.path.join(self.basedir, u"Metallica")
     1305        DATA2 = "non-unicode file content"
     1306        fileutil.write(fn2, DATA2)
     1307
     1308        d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:"))
     1309
    11701310        d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
    11711311        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
    11721312
     1313        d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
     1314        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, "Metallica\nÄrtonwall\n"))
     1315
    11731316        return d
    1174     test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms.  See issue ticket #534."
    11751317
    11761318    def test_dangling_symlink_vs_recursion(self):
    11771319        if not hasattr(os, 'symlink'):
     
    12781420        return d
    12791421
    12801422
     1423class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
     1424    def test_unicode_mkdir(self):
     1425        self.basedir = os.path.dirname(self.mktemp())
     1426        self.set_up_grid()
     1427
     1428        d = self.do_cli("create-alias", "tahoe")
     1429        d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
     1430
     1431        return d
     1432 
     1433
    12811434class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
    12821435
    12831436    def writeto(self, path, data):