Thu May 20 01:33:25 CEST 2010  Francois Deppierraz <francois@ctrlaltdel.ch>
  * Add dependency on Michael Foord's mock library

Thu May 20 02:41:05 CEST 2010  Francois Deppierraz <francois@ctrlaltdel.ch>
  * stringutils.py: Unicode helper functions + associated tests
  
  This file contains a bunch of helper functions which converts
  unicode string from and to argv, filenames and stdout.

Thu May 20 02:43:56 CEST 2010  Francois Deppierraz <francois@ctrlaltdel.ch>
  * Fix handling of correctly encoded unicode filenames (#534)
  
  Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
  backup', have been improved to correctly handle filenames containing non-ASCII
  characters.
    
  In the case where Tahoe encounters a filename which cannot be decoded using the
  system encoding, an error will be returned and the operation will fail.  Under
  Linux, this typically happens when the filesystem contains filenames encoded
  with another encoding, for instance latin1, than the system locale, for
  instance UTF-8.  In such case, you'll need to fix your system with tools such
  as 'convmv' before using Tahoe CLI.
    
  All CLI commands have been improved to support non-ASCII parameters such as
  filenames and aliases on all supported Operating Systems except Windows as of
  now.

New patches:

[Add dependency on Michael Foord's mock library
Francois Deppierraz <francois@ctrlaltdel.ch>**20100519233325
 Ignore-this: 9bb01bf1e4780f6b98ed394c3b772a80
] hunk ./_auto_deps.py 34
                   # Needed for SFTP. Commented-out pending tests, see #953.
                   # "pycrypto >= 2.0.1",
 
+                  # Mock - Mocking and Testing Library
+                  # http://www.voidspace.org.uk/python/mock/
+                  "mock",
+
                   # Will be needed to test web apps, but not yet. See #1001.
                   #"windmill >= 1.3",
                   ]
[stringutils.py: Unicode helper functions + associated tests
Francois Deppierraz <francois@ctrlaltdel.ch>**20100520004105
 Ignore-this: 7a73fc31de2fd39d437d6abd278bfa9a
 
 This file contains a bunch of helper functions which converts
 unicode string from and to argv, filenames and stdout.
] {
addfile ./src/allmydata/test/test_stringutils.py
hunk ./src/allmydata/test/test_stringutils.py 1
+# coding=utf-8
+
+TEST_FILENAMES = (
+  u'Ärtonwall.mp3',
+  u'test_file',
+  u'Blah blah.txt',
+)
+
+# The following main helps to generate a test class for other operating
+# systems.
+
+if __name__ == "__main__":
+    import sys, os
+    import tempfile
+    import shutil
+    import platform
+    
+    if len(sys.argv) != 2:
+        print "Usage: %s lumière" % sys.argv[0]
+        sys.exit(1)
+    
+    print
+    print "class MyWeirdOS(StringUtils, unittest.TestCase):"
+    print "    uname = '%s'" % ' '.join(platform.uname())
+    print "    argv = %s" % repr(sys.argv[1])
+    print "    platform = '%s'" % sys.platform
+    print "    filesystemencoding = '%s'" % sys.getfilesystemencoding()
+    print "    stdoutencoding = '%s'" % sys.stdout.encoding
+
+    try:
+        tmpdir = tempfile.mkdtemp()
+        for fname in TEST_FILENAMES:
+            open(os.path.join(tmpdir, fname), 'w').close() 
+
+        # Use Unicode API under Windows or MacOS X
+        if sys.platform in ('win32', 'darwin'):
+            dirlist = os.listdir(unicode(tmpdir))
+        else:
+            dirlist = os.listdir(tmpdir)
+
+        print "    dirlist = %s" % repr(dirlist)
+    except:
+        print "    # Oops, I cannot write filenames containing non-ascii characters"
+    print
+
+    shutil.rmtree(tmpdir)
+    sys.exit(0)
+
+from twisted.trial import unittest
+from mock import patch
+import sys
+
+from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \
+    unicode_to_stdout, unicode_platform, listdir_unicode, open_unicode, \
+    FilenameEncodingError, get_term_encoding
+from twisted.python import usage
+
+class StringUtilsErrors(unittest.TestCase):
+    @patch('sys.stdout')
+    def test_get_term_encoding(self, mock):
+        mock.encoding = None
+        
+        self.failUnlessEqual(get_term_encoding(), 'ascii')
+
+    @patch('sys.stdout')
+    def test_argv_to_unicode(self, mock):
+        mock.encoding = 'utf-8'
+
+        self.failUnlessRaises(usage.UsageError,
+                              argv_to_unicode,
+                              u'lumière'.encode('latin1'))
+
+    def test_unicode_to_url(self):
+        pass
+
+    @patch('sys.stdout')
+    def test_unicode_to_stdout(self, mock):
+        # Encoding koi8-r cannot represent 'è'
+        mock.encoding = 'koi8-r'
+        self.failUnlessEqual(unicode_to_stdout(u'lumière'), 'lumi?re')
+
+    @patch('os.listdir')
+    def test_unicode_normalization(self, mock):
+        # Pretend to run on an Unicode platform such as Windows
+        orig_platform = sys.platform
+        sys.platform = 'win32'
+
+        mock.return_value = [u'A\u0308rtonwall.mp3']
+        self.failUnlessEqual(listdir_unicode(u'/dummy'), [u'\xc4rtonwall.mp3'])
+
+        sys.platform = orig_platform
+
+# The following tests applies only to platforms which don't store filenames as
+# Unicode entities on the filesystem.
+class StringUtilsNonUnicodePlatform(unittest.TestCase):
+    def setUp(self):
+        # Mock sys.platform because unicode_platform() uses it
+        self.original_platform = sys.platform
+        sys.platform = 'linux'
+
+    def tearDown(self):
+        sys.platform = self.original_platform
+
+    @patch('sys.getfilesystemencoding')
+    @patch('os.listdir')
+    def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
+        # What happen if a latin1-encoded filenames is encountered on an UTF-8
+        # filesystem?
+        mock_listdir.return_value = [
+            u'lumière'.encode('utf-8'),
+            u'lumière'.encode('latin1')]
+
+        mock_getfilesystemencoding.return_value = 'utf-8'
+       
+        self.failUnlessRaises(FilenameEncodingError,
+                              listdir_unicode,
+                              u'/dummy')
+        
+        # We're trying to list a directory whose name cannot be represented in
+        # the filesystem encoding.  This should fail.
+        mock_getfilesystemencoding.return_value = 'ascii'
+        self.failUnlessRaises(FilenameEncodingError,
+                              listdir_unicode,
+                              u'/lumière')
+
+    @patch('sys.getfilesystemencoding')
+    def test_open_unicode(self, mock):
+        mock.return_value = 'ascii'
+
+        self.failUnlessRaises(FilenameEncodingError,
+                              open_unicode,
+                              u'lumière')
+
+class StringUtils():
+    def setUp(self):
+        # Mock sys.platform because unicode_platform() uses it
+        self.original_platform = sys.platform
+        sys.platform = self.platform
+
+    def tearDown(self):
+        sys.platform = self.original_platform
+
+    @patch('sys.stdout')
+    def test_argv_to_unicode(self, mock):
+        if 'argv' not in dir(self):
+            raise unittest.SkipTest("There's no way to pass non-ASCII arguments in CLI on this (mocked) platform")
+
+        mock.encoding = self.stdoutencoding
+
+        argu = u'lumière'
+        argv = self.argv
+
+        self.failUnlessEqual(argv_to_unicode(argv), argu)
+
+    def test_unicode_to_url(self):
+        self.failUnless(unicode_to_url(u'lumière'), u'lumière'.encode('utf-8'))
+
+    @patch('sys.stdout')
+    def test_unicode_to_stdout(self, mock):
+        if 'argv' not in dir(self):
+            raise unittest.SkipTest("There's no way to pass non-ASCII arguments in CLI on this (mocked) platform")
+
+        mock.encoding = self.stdoutencoding
+        self.failUnlessEqual(unicode_to_stdout(u'lumière'), self.argv)
+
+    def test_unicode_platform(self):
+        matrix = {
+          'linux2': False,
+          'win32':  True,
+          'darwin': True,
+        }
+
+        self.failUnlessEqual(unicode_platform(), matrix[self.platform])
+ 
+    @patch('sys.getfilesystemencoding')
+    @patch('os.listdir')
+    def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
+
+        mock_listdir.return_value = self.dirlist
+        mock_getfilesystemencoding.return_value = self.filesystemencoding
+       
+        filenames = listdir_unicode(u'/dummy')
+
+        for fname in TEST_FILENAMES:
+            self.failUnless(isinstance(fname, unicode))
+
+            if fname not in filenames:
+                self.fail("Cannot find %r in %r" % (fname, filenames))
+
+    @patch('os.open')
+    def test_open_unicode(self, mock):
+
+        self.failUnlessRaises(IOError,
+                              open_unicode,
+                              u'/dummy_directory/lumière.txt')
+
+
+class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
+    uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
+    argv = 'lumi\xc3\xa8re'
+    platform = 'linux2'
+    filesystemencoding = 'UTF-8'
+    stdoutencoding = 'UTF-8'
+    dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
+
+
+class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
+    uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
+    argv = 'lumi\xe8re'
+    platform = 'linux2'
+    filesystemencoding = 'ISO-8859-1'
+    stdoutencoding = 'ISO-8859-1'
+    dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
+
+class WindowsXP(StringUtils, unittest.TestCase):
+    uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
+    argv = 'lumi\xe8re'
+    platform = 'win32'
+    filesystemencoding = 'mbcs'
+    stdoutencoding = 'cp850'
+    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
+
+    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
+
+class WindowsXP_UTF8(StringUtils, unittest.TestCase):
+    uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
+    argv = 'lumi\xe8re'
+    platform = 'win32'
+    filesystemencoding = 'mbcs'
+    stdoutencoding = 'cp65001'
+    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
+
+    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
+
+class WindowsVista(StringUtils, unittest.TestCase):
+    uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
+    argv = 'lumi\xe8re'
+    platform = 'win32'
+    filesystemencoding = 'mbcs'
+    stdoutencoding = 'cp850'
+    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
+
+    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
+
+class MacOSXLeopard(StringUtils, unittest.TestCase):
+    uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
+    argv = 'lumi\xc3\xa8re'
+    platform = 'darwin'
+    filesystemencoding = 'utf-8'
+    stdoutencoding = 'UTF-8'
+    dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
+
+class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
+    uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
+    #argv = 'lumiere'
+    platform = 'darwin'
+    filesystemencoding = 'utf-8'
+    stdoutencoding = 'US-ASCII'
+    dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
addfile ./src/allmydata/util/stringutils.py
hunk ./src/allmydata/util/stringutils.py 1
+"""
+Functions used to convert inputs from whatever encoding used in the system to
+unicode and back.
+"""
+
+import sys
+import os
+import unicodedata
+from allmydata.util.assertutil import precondition
+from twisted.python import usage
+
+def get_term_encoding():
+    """
+    Returns expected encoding for writing to the terminal and reading
+    arguments from the command-line.
+    """
+
+    if sys.stdout.encoding == None:
+        return 'ascii'
+    else:
+        return sys.stdout.encoding
+
+def argv_to_unicode(s):
+    """
+    Decode given argv element to unicode.
+    """
+    # Try to decode the command-line argument with the encoding returned by
+    # get_term_encoding(), if this fails print an error message to the user.
+
+    precondition(isinstance(s, str), s)
+
+    try:
+        return unicode(s, get_term_encoding())
+    except UnicodeDecodeError:
+        raise usage.UsageError("Argument '%s' cannot be decoded as %s." %
+                               (s, get_term_encoding()))
+
+def unicode_to_url(s):
+    """
+    Encode an unicode object used in an URL.
+    """
+    # According to RFC 2718, non-ascii characters in url's must be UTF-8 encoded.
+
+    precondition(isinstance(s, unicode), s)
+    return s.encode('utf-8')
+
+def unicode_to_stdout(s):
+    """
+    Encode an unicode object for representation on stdout.
+    """
+
+    precondition(isinstance(s, unicode), s)
+    return s.encode(get_term_encoding(), 'replace')
+
+def unicode_platform():
+    """
+    Does the current platform handle Unicode filenames natively ?
+    """
+
+    return sys.platform in ('win32', 'darwin')
+
+class FilenameEncodingError(Exception):
+    """
+    Filename cannot be encoded using the current encoding of your filesystem
+    (%s). Please configure your locale correctly or rename this file.
+    """
+
+    pass
+
+def listdir_unicode_unix(path):
+    """
+    This function emulates an Unicode API under Unix similar to one available
+    under Windows or MacOS X.
+
+    If badly encoded filenames are encountered, an exception is raised.
+    """
+    precondition(isinstance(path, unicode), path)
+
+    encoding = sys.getfilesystemencoding()
+    try:
+        byte_path = path.encode(encoding)
+    except UnicodeEncodeError:
+        raise FilenameEncodingError(path)
+
+    try:
+        return [unicode(fn, encoding) for fn in os.listdir(byte_path)]
+    except UnicodeDecodeError:
+        raise FilenameEncodingError(fn)
+
+def listdir_unicode(path, encoding = None):
+    """
+    Wrapper around listdir() which provides safe access to the convenient
+    Unicode API even under Unix.
+    """
+
+    precondition(isinstance(path, unicode), path)
+
+    # On Windows and MacOS X, the Unicode API is used
+    if unicode_platform():
+        dirlist = os.listdir(path)
+
+    # On other platforms (ie. Unix systems), the byte-level API is used
+    else:
+        dirlist = listdir_unicode_unix(path)
+
+    # Normalize the resulting unicode filenames
+    #
+    # This prevents different OS from generating non-equal unicode strings for
+    # the same filename representation
+    return [unicodedata.normalize('NFC', fname) for fname in dirlist]
+
+def open_unicode(path, mode='r'):
+    """
+    Wrapper around open() which provides safe access to the convenient Unicode
+    API even under Unix.
+    """
+
+    precondition(isinstance(path, unicode), path)
+
+    if unicode_platform():
+        return open(path, mode)
+    else:
+        encoding = sys.getfilesystemencoding()
+
+        try:
+            return open(path.encode(encoding), mode)
+        except UnicodeEncodeError:
+            raise FilenameEncodingError(path)
}
[Fix handling of correctly encoded unicode filenames (#534)
Francois Deppierraz <francois@ctrlaltdel.ch>**20100520004356
 Ignore-this: 8a3a7df214a855f5a12dc0eeab6f2e39
 
 Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
 backup', have been improved to correctly handle filenames containing non-ASCII
 characters.
   
 In the case where Tahoe encounters a filename which cannot be decoded using the
 system encoding, an error will be returned and the operation will fail.  Under
 Linux, this typically happens when the filesystem contains filenames encoded
 with another encoding, for instance latin1, than the system locale, for
 instance UTF-8.  In such case, you'll need to fix your system with tools such
 as 'convmv' before using Tahoe CLI.
   
 All CLI commands have been improved to support non-ASCII parameters such as
 filenames and aliases on all supported Operating Systems except Windows as of
 now.
] {
hunk ./NEWS 3
 User visible changes in Tahoe-LAFS.  -*- outline -*-
 
+* Release 1.7.0
+
+** Bugfixes
+
+*** Unicode filenames handling
+
+Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
+backup', have been improved to correctly handle filenames containing non-ASCII
+characters.
+
+In the case where Tahoe encounters a filename which cannot be decoded using the
+system encoding, an error will be returned and the operation will fail.  Under
+Linux, this typically happens when the filesystem contains filenames encoded
+with another encoding, for instance latin1, than the system locale, for
+instance UTF-8.  In such case, you'll need to fix your system with tools such
+as 'convmv' before using Tahoe CLI.
+
+All CLI commands have been improved to support non-ASCII parameters such as
+filenames and aliases on all supported Operating Systems except Windows as of
+now.
+
 * Release 1.6.1 (2010-02-27)
 
 ** Bugfixes
hunk ./docs/frontends/CLI.txt 126
 perspective on the graph of files and directories.
 
 Each tahoe node remembers a list of starting points, named "aliases",
-in a file named ~/.tahoe/private/aliases . These aliases are short
-strings that stand in for a directory read- or write- cap. If you use
-the command line "ls" without any "[STARTING_DIR]:" argument, then it
-will use the default alias, which is "tahoe", therefore "tahoe ls" has
-the same effect as "tahoe ls tahoe:".  The same goes for the other
-commands which can reasonably use a default alias: get, put, mkdir,
-mv, and rm.
+in a file named ~/.tahoe/private/aliases . These aliases are short UTF-8
+encoded strings that stand in for a directory read- or write- cap. If
+you use the command line "ls" without any "[STARTING_DIR]:" argument,
+then it will use the default alias, which is "tahoe", therefore "tahoe
+ls" has the same effect as "tahoe ls tahoe:".  The same goes for the
+other commands which can reasonably use a default alias: get, put,
+mkdir, mv, and rm.
 
 For backwards compatibility with Tahoe-1.0, if the "tahoe": alias is not
 found in ~/.tahoe/private/aliases, the CLI will use the contents of
hunk ./src/allmydata/scripts/cli.py 4
 import os.path, re, sys, fnmatch
 from twisted.python import usage
 from allmydata.scripts.common import BaseOptions, get_aliases
+from allmydata.util.stringutils import argv_to_unicode
 
 NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?")
 
hunk ./src/allmydata/scripts/cli.py 53
 
 class MakeDirectoryOptions(VDriveOptions):
     def parseArgs(self, where=""):
-        self.where = where
+        self.where = argv_to_unicode(where)
     longdesc = """Create a new directory, either unlinked or as a subdirectory."""
 
 class AddAliasOptions(VDriveOptions):
hunk ./src/allmydata/scripts/cli.py 58
     def parseArgs(self, alias, cap):
-        self.alias = alias
+        self.alias = argv_to_unicode(alias)
         self.cap = cap
 
     def getSynopsis(self):
hunk ./src/allmydata/scripts/cli.py 68
 
 class CreateAliasOptions(VDriveOptions):
     def parseArgs(self, alias):
-        self.alias = alias
+        self.alias = argv_to_unicode(alias)
 
     def getSynopsis(self):
         return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/cli.py 87
         ("json", None, "Show the raw JSON output"),
         ]
     def parseArgs(self, where=""):
-        self.where = where
+        self.where = argv_to_unicode(where)
 
     longdesc = """
     List the contents of some portion of the grid.
hunk ./src/allmydata/scripts/cli.py 122
         # tahoe get FOO bar              # write to local file
         # tahoe get tahoe:FOO bar        # same
 
-        self.from_file = arg1
-        self.to_file = arg2
+        self.from_file = argv_to_unicode(arg1)
+
+        if arg2:
+            self.to_file = argv_to_unicode(arg2)
+        else:
+            self.to_file = None
+
         if self.to_file == "-":
             self.to_file = None
 
hunk ./src/allmydata/scripts/cli.py 160
         # see Examples below
 
         if arg1 is not None and arg2 is not None:
-            self.from_file = arg1
-            self.to_file = arg2
+            self.from_file = argv_to_unicode(arg1)
+            self.to_file =  argv_to_unicode(arg2)
         elif arg1 is not None and arg2 is None:
hunk ./src/allmydata/scripts/cli.py 163
-            self.from_file = arg1 # might be "-"
+            self.from_file = argv_to_unicode(arg1) # might be "-"
             self.to_file = None
         else:
             self.from_file = None
hunk ./src/allmydata/scripts/cli.py 168
             self.to_file = None
-        if self.from_file == "-":
+        if self.from_file == u"-":
             self.from_file = None
 
     def getSynopsis(self):
hunk ./src/allmydata/scripts/cli.py 206
     def parseArgs(self, *args):
         if len(args) < 2:
             raise usage.UsageError("cp requires at least two arguments")
-        self.sources = args[:-1]
-        self.destination = args[-1]
+        self.sources = map(argv_to_unicode, args[:-1])
+        self.destination = argv_to_unicode(args[-1])
     def getSynopsis(self):
         return "Usage: tahoe [options] cp FROM.. TO"
     longdesc = """
hunk ./src/allmydata/scripts/cli.py 237
 
 class RmOptions(VDriveOptions):
     def parseArgs(self, where):
-        self.where = where
+        self.where = argv_to_unicode(where)
 
     def getSynopsis(self):
         return "%s rm REMOTE_FILE" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/cli.py 244
 
 class MvOptions(VDriveOptions):
     def parseArgs(self, frompath, topath):
-        self.from_file = frompath
-        self.to_file = topath
+        self.from_file = argv_to_unicode(frompath)
+        self.to_file = argv_to_unicode(topath)
 
     def getSynopsis(self):
         return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/cli.py 263
 
 class LnOptions(VDriveOptions):
     def parseArgs(self, frompath, topath):
-        self.from_file = frompath
-        self.to_file = topath
+        self.from_file = argv_to_unicode(frompath)
+        self.to_file = argv_to_unicode(topath)
 
     def getSynopsis(self):
         return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/cli.py 288
         self['exclude'] = set()
 
     def parseArgs(self, localdir, topath):
-        self.from_dir = localdir
-        self.to_dir = topath
+        self.from_dir = argv_to_unicode(localdir)
+        self.to_dir = argv_to_unicode(topath)
 
     def getSynopsis(Self):
         return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
hunk ./src/allmydata/scripts/cli.py 346
         ("info", "i", "Open the t=info page for the file"),
         ]
     def parseArgs(self, where=''):
-        self.where = where
+        self.where = argv_to_unicode(where)
 
     def getSynopsis(self):
         return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/cli.py 363
         ("raw", "r", "Display raw JSON data instead of parsed"),
         ]
     def parseArgs(self, where=''):
-        self.where = where
+        self.where = argv_to_unicode(where)
 
     def getSynopsis(self):
         return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/cli.py 376
         ("raw", "r", "Display raw JSON data instead of parsed"),
         ]
     def parseArgs(self, where=''):
-        self.where = where
+        self.where = argv_to_unicode(where)
 
     def getSynopsis(self):
         return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/cli.py 392
         ("add-lease", None, "Add/renew lease on all shares"),
         ]
     def parseArgs(self, where=''):
-        self.where = where
+        self.where = argv_to_unicode(where)
 
     def getSynopsis(self):
         return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/cli.py 411
         ("verbose", "v", "Be noisy about what is happening."),
         ]
     def parseArgs(self, where=''):
-        self.where = where
+        self.where = argv_to_unicode(where)
 
     def getSynopsis(self):
         return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
hunk ./src/allmydata/scripts/common.py 3
 
 import os, sys, urllib
+import codecs
 from twisted.python import usage
hunk ./src/allmydata/scripts/common.py 5
-
+from allmydata.util.stringutils import unicode_to_url
+from allmydata.util.assertutil import precondition
 
 class BaseOptions:
     # unit tests can override these to point at StringIO instances
hunk ./src/allmydata/scripts/common.py 105
     except EnvironmentError:
         pass
     try:
-        f = open(aliasfile, "r")
+        f = codecs.open(aliasfile, "r", "utf-8")
         for line in f.readlines():
             line = line.strip()
             if line.startswith("#") or not line:
hunk ./src/allmydata/scripts/common.py 112
                 continue
             name, cap = line.split(":", 1)
             # normalize it: remove http: prefix, urldecode
-            cap = cap.strip()
+            cap = cap.strip().encode('utf-8')
             aliases[name] = uri.from_string_dirnode(cap).to_string()
     except EnvironmentError:
         pass
hunk ./src/allmydata/scripts/common.py 143
     # and default is not found in aliases, an UnknownAliasError is
     # raised.
     path = path.strip()
-    if uri.has_uri_prefix(path):
+    if uri.has_uri_prefix(path.encode('utf-8')):
         # We used to require "URI:blah:./foo" in order to get a subpath,
         # stripping out the ":./" sequence. We still allow that for compatibility,
         # but now also allow just "URI:blah/foo".
hunk ./src/allmydata/scripts/common.py 185
 
 def escape_path(path):
     segments = path.split("/")
-    return "/".join([urllib.quote(s) for s in segments])
+    return "/".join([urllib.quote(unicode_to_url(s)) for s in segments])
hunk ./src/allmydata/scripts/tahoe_add_alias.py 3
 
 import os.path
+import codecs
+import sys
 from allmydata import uri
 from allmydata.scripts.common_http import do_http, check_http_error
 from allmydata.scripts.common import get_aliases
hunk ./src/allmydata/scripts/tahoe_add_alias.py 9
 from allmydata.util.fileutil import move_into_place
+from allmydata.util.stringutils import unicode_to_stdout
+
 
 def add_line_to_aliasfile(aliasfile, alias, cap):
     # we use os.path.exists, rather than catching EnvironmentError, to avoid
hunk ./src/allmydata/scripts/tahoe_add_alias.py 17
     # clobbering the valuable alias file in case of spurious or transient
     # filesystem errors.
     if os.path.exists(aliasfile):
-        f = open(aliasfile, "r")
+        f = codecs.open(aliasfile, "r", "utf-8")
         aliases = f.read()
         f.close()
         if not aliases.endswith("\n"):
hunk ./src/allmydata/scripts/tahoe_add_alias.py 25
     else:
         aliases = ""
     aliases += "%s: %s\n" % (alias, cap)
-    f = open(aliasfile+".tmp", "w")
+    f = codecs.open(aliasfile+".tmp", "w", "utf-8")
     f.write(aliases)
     f.close()
     move_into_place(aliasfile+".tmp", aliasfile)
hunk ./src/allmydata/scripts/tahoe_add_alias.py 48
 
     add_line_to_aliasfile(aliasfile, alias, cap)
 
-    print >>stdout, "Alias '%s' added" % (alias,)
+    print >>stdout, "Alias '%s' added" % (unicode_to_stdout(alias),)
     return 0
 
 def create_alias(options):
hunk ./src/allmydata/scripts/tahoe_add_alias.py 81
 
     add_line_to_aliasfile(aliasfile, alias, new_uri)
 
-    print >>stdout, "Alias '%s' created" % (alias,)
+    print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),)
     return 0
 
 def list_aliases(options):
hunk ./src/allmydata/scripts/tahoe_backup.py 12
 from allmydata.scripts.common_http import do_http
 from allmydata.util import time_format
 from allmydata.scripts import backupdb
+import sys
+from allmydata.util.stringutils import unicode_to_stdout, listdir_unicode, open_unicode
+from allmydata.util.assertutil import precondition
+from twisted.python import usage
+
 
 class HTTPError(Exception):
     pass
hunk ./src/allmydata/scripts/tahoe_backup.py 162
 
     def verboseprint(self, msg):
         if self.verbosity >= 2:
+            if isinstance(msg, unicode):
+                msg = unicode_to_stdout(msg)
+
             print >>self.options.stdout, msg
 
     def warn(self, msg):
hunk ./src/allmydata/scripts/tahoe_backup.py 171
         print >>self.options.stderr, msg
 
     def process(self, localpath):
+        precondition(isinstance(localpath, unicode), localpath)
         # returns newdircap
 
         self.verboseprint("processing %s" % localpath)
hunk ./src/allmydata/scripts/tahoe_backup.py 179
         compare_contents = {} # childname -> rocap
 
         try:
-            children = os.listdir(localpath)
+            children = listdir_unicode(localpath)
         except EnvironmentError:
             self.directories_skipped += 1
             self.warn("WARNING: permission denied on directory %s" % localpath)
hunk ./src/allmydata/scripts/tahoe_backup.py 295
 
     # This function will raise an IOError exception when called on an unreadable file
     def upload(self, childpath):
+        precondition(isinstance(childpath, unicode), childpath)
+
         #self.verboseprint("uploading %s.." % childpath)
         metadata = get_local_metadata(childpath)
 
hunk ./src/allmydata/scripts/tahoe_backup.py 305
 
         if must_upload:
             self.verboseprint("uploading %s.." % childpath)
-            infileobj = open(os.path.expanduser(childpath), "rb")
+            infileobj = open_unicode(os.path.expanduser(childpath), "rb")
             url = self.options['node-url'] + "uri"
             resp = do_http("PUT", url, infileobj)
             if resp.status not in (200, 201):
hunk ./src/allmydata/scripts/tahoe_cp.py 5
 import os.path
 import urllib
 import simplejson
+import sys
 from cStringIO import StringIO
 from twisted.python.failure import Failure
 from allmydata.scripts.common import get_alias, escape_path, \
hunk ./src/allmydata/scripts/tahoe_cp.py 12
                                      DefaultAliasMarker, UnknownAliasError
 from allmydata.scripts.common_http import do_http
 from allmydata import uri
+from twisted.python import usage
+from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
+from allmydata.util.assertutil import precondition
+
 
 def ascii_or_none(s):
     if s is None:
hunk ./src/allmydata/scripts/tahoe_cp.py 78
 
 class LocalFileSource:
     def __init__(self, pathname):
+        precondition(isinstance(pathname, unicode), pathname)
         self.pathname = pathname
 
     def need_to_copy_bytes(self):
hunk ./src/allmydata/scripts/tahoe_cp.py 89
 
 class LocalFileTarget:
     def __init__(self, pathname):
+        precondition(isinstance(pathname, unicode), pathname)
         self.pathname = pathname
     def put_file(self, inf):
         outf = open(self.pathname, "wb")
hunk ./src/allmydata/scripts/tahoe_cp.py 102
 
 class LocalMissingTarget:
     def __init__(self, pathname):
+        precondition(isinstance(pathname, unicode), pathname)
         self.pathname = pathname
 
     def put_file(self, inf):
hunk ./src/allmydata/scripts/tahoe_cp.py 116
 
 class LocalDirectorySource:
     def __init__(self, progressfunc, pathname):
+        precondition(isinstance(pathname, unicode), pathname)
+
         self.progressfunc = progressfunc
         self.pathname = pathname
         self.children = None
hunk ./src/allmydata/scripts/tahoe_cp.py 126
         if self.children is not None:
             return
         self.children = {}
-        children = os.listdir(self.pathname)
+        children = listdir_unicode(self.pathname)
         for i,n in enumerate(children):
             self.progressfunc("examining %d of %d" % (i, len(children)))
             pn = os.path.join(self.pathname, n)
hunk ./src/allmydata/scripts/tahoe_cp.py 143
 
 class LocalDirectoryTarget:
     def __init__(self, progressfunc, pathname):
+        precondition(isinstance(pathname, unicode), pathname)
+
         self.progressfunc = progressfunc
         self.pathname = pathname
         self.children = None
hunk ./src/allmydata/scripts/tahoe_cp.py 153
         if self.children is not None:
             return
         self.children = {}
-        children = os.listdir(self.pathname)
+        children = listdir_unicode(self.pathname)
         for i,n in enumerate(children):
             self.progressfunc("examining %d of %d" % (i, len(children)))
             pn = os.path.join(self.pathname, n)
hunk ./src/allmydata/scripts/tahoe_cp.py 176
         return LocalDirectoryTarget(self.progressfunc, pathname)
 
     def put_file(self, name, inf):
+        precondition(isinstance(name, unicode), name)
         pathname = os.path.join(self.pathname, name)
hunk ./src/allmydata/scripts/tahoe_cp.py 178
-        outf = open(pathname, "wb")
+        outf = open_unicode(pathname, "wb")
         while True:
             data = inf.read(32768)
             if not data:
hunk ./src/allmydata/scripts/tahoe_cp.py 371
                 if self.writecap:
                     url = self.nodeurl + "/".join(["uri",
                                                    urllib.quote(self.writecap),
-                                                   urllib.quote(name.encode('utf-8'))])
+                                                   urllib.quote(unicode_to_url(name))])
                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
                                                       writecap, readcap, url)
             elif data[0] == "dirnode":
hunk ./src/allmydata/scripts/tahoe_ls.py 7
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                      UnknownAliasError
 from allmydata.scripts.common_http import do_http
+from allmydata.util.stringutils import unicode_to_stdout
 
 def list(options):
     nodeurl = options['node-url']
hunk ./src/allmydata/scripts/tahoe_ls.py 134
             line.append(ctime_s)
         if not options["classify"]:
             classify = ""
-        line.append(name + classify)
+        line.append(unicode_to_stdout(name) + classify)
         if options["uri"]:
             line.append(uri)
         if options["readonly-uri"]:
hunk ./src/allmydata/scripts/tahoe_manifest.py 88
                 try:
                     print >>stdout, d["cap"], "/".join(d["path"])
                 except UnicodeEncodeError:
-                    print >>stdout, d["cap"], "/".join([p.encode("utf-8")
+                    print >>stdout, d["cap"], "/".join([unicode_to_stdout(p)
                                                         for p in d["path"]])
 
 def manifest(options):
hunk ./src/allmydata/scripts/tahoe_mkdir.py 5
 import urllib
 from allmydata.scripts.common_http import do_http, check_http_error
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError
+from allmydata.util.stringutils import unicode_to_url
 
 def mkdir(options):
     nodeurl = options['node-url']
hunk ./src/allmydata/scripts/tahoe_mkdir.py 39
         path = path[:-1]
     # path (in argv) must be "/".join([s.encode("utf-8") for s in segments])
     url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
-                                           urllib.quote(path))
+                                           urllib.quote(unicode_to_url(path)))
     resp = do_http("POST", url)
     check_http_error(resp, stderr)
     new_uri = resp.read().strip()
hunk ./src/allmydata/test/test_cli.py 9
 import urllib
 import re
 import simplejson
+import sys
 
 from allmydata.util import fileutil, hashutil, base32
 from allmydata import uri
hunk ./src/allmydata/test/test_cli.py 30
 from twisted.internet import threads # CLI tests use deferToThread
 from twisted.python import usage
 
+from allmydata.util.stringutils import listdir_unicode, open_unicode, \
+     unicode_platform, FilenameEncodingError
+
 timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
 
 
hunk ./src/allmydata/test/test_cli.py 291
                    "work": "WA",
                    "c": "CA"}
         def ga1(path):
-            return get_alias(aliases, path, "tahoe")
+            return get_alias(aliases, path, u"tahoe")
         uses_lettercolon = common.platform_uses_lettercolon_drivename()
         self.failUnlessEqual(ga1("bare"), ("TA", "bare"))
         self.failUnlessEqual(ga1("baredir/file"), ("TA", "baredir/file"))
hunk ./src/allmydata/test/test_cli.py 386
         # default set to something that isn't in the aliases argument should
         # raise an UnknownAliasError.
         def ga4(path):
-            return get_alias(aliases, path, "badddefault:")
+            return get_alias(aliases, path, u"badddefault:")
         self.failUnlessRaises(common.UnknownAliasError, ga4, "afile")
         self.failUnlessRaises(common.UnknownAliasError, ga4, "a/dir/path/")
 
hunk ./src/allmydata/test/test_cli.py 394
             old = common.pretend_platform_uses_lettercolon
             try:
                 common.pretend_platform_uses_lettercolon = True
-                retval = get_alias(aliases, path, "baddefault:")
+                retval = get_alias(aliases, path, u"baddefault:")
             finally:
                 common.pretend_platform_uses_lettercolon = old
             return retval
hunk ./src/allmydata/test/test_cli.py 400
         self.failUnlessRaises(common.UnknownAliasError, ga5, "C:\\Windows")
 
+    def test_listdir_unicode_good(self):
+        basedir = u"cli/common/listdir_unicode_good"
+        fileutil.make_dirs(basedir)
+
+        files = (u'Lôzane', u'Bern', u'Genève')
+
+        for file in files:
+            open(os.path.join(basedir, file), "w").close()
+
+        for file in listdir_unicode(basedir):
+            self.failUnlessEqual(file in files, True)
+
+    def test_listdir_unicode_bad(self):
+        if unicode_platform():
+            raise unittest.SkipTest("This test doesn't make any sense on architecture which handle filenames natively as Unicode entities.")
+
+        basedir = u"cli/common/listdir_unicode_bad"
+        fileutil.make_dirs(basedir)
+
+        files = (u'Lôzane', u'Bern', u'Genève')
+
+        # We use a wrong encoding on purpose
+        if sys.getfilesystemencoding() == 'UTF-8':
+            encoding = 'latin1'
+        else:
+            encoding = 'UTF-8'
+
+        for file in files:
+            path = os.path.join(basedir, file).encode(encoding)
+            open(path, "w").close()
+
+        self.failUnlessRaises(FilenameEncodingError, listdir_unicode, basedir)
 
 class Help(unittest.TestCase):
 
hunk ./src/allmydata/test/test_cli.py 631
             self.failUnless(aliases["un-corrupted2"].startswith("URI:DIR2:"))
         d.addCallback(_check_not_corrupted)
 
-        return d
 
hunk ./src/allmydata/test/test_cli.py 632
+    def test_create_unicode(self):
+        if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'):
+            raise unittest.SkipTest("Arbitrary filenames are not supported by this platform")
+
+        if sys.stdout.encoding not in ('UTF-8'):
+            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
+
+        self.basedir = "cli/CreateAlias/create_unicode"
+        self.set_up_grid()
+        aliasfile = os.path.join(self.get_clientdir(), "private", "aliases")
+
+        d = self.do_cli("create-alias", "études")
+        def _check_create_unicode((rc,stdout,stderr)):
+            self.failUnlessEqual(rc, 0)
+            self.failIf(stderr)
+
+            # If stdout only supports ascii, accentuated characters are
+            # being replaced by '?'
+            if sys.stdout.encoding == "ANSI_X3.4-1968":
+                self.failUnless("Alias '?tudes' created" in stdout)
+            else:
+                self.failUnless("Alias 'études' created" in stdout)
+
+            aliases = get_aliases(self.get_clientdir())
+            self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
+        d.addCallback(_check_create_unicode)
+
+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
+        def _check_ls1((rc, stdout, stderr)):
+            self.failUnlessEqual(rc, 0)
+            self.failIf(stderr)
+
+            self.failUnlessEqual(stdout, "")
+        d.addCallback(_check_ls1)
+
+        d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt",
+          stdin="Blah blah blah"))
+
+        d.addCallback(lambda res: self.do_cli("ls", "études:"))
+        def _check_ls2((rc, stdout, stderr)):
+            self.failUnlessEqual(rc, 0)
+            self.failIf(stderr)
+
+            self.failUnlessEqual(stdout, "uploaded.txt\n")
+        d.addCallback(_check_ls2)
+
+        d.addCallback(lambda res: self.do_cli("get", "études:uploaded.txt"))
+        def _check_get((rc, stdout, stderr)):
+            self.failUnlessEqual(rc, 0)
+            self.failIf(stderr)
+            self.failUnlessEqual(stdout, "Blah blah blah")
+        d.addCallback(_check_get)
+
+        # Ensure that an Unicode filename in an Unicode alias works as expected
+        d.addCallback(lambda res: self.do_cli("put", "-", "études:lumière.txt",
+          stdin="Let the sunshine In!"))
+
+        d.addCallback(lambda res: self.do_cli("get",
+                      get_aliases(self.get_clientdir())[u"études"] + "/lumière.txt"))
+        def _check_get((rc, stdout, stderr)):
+            self.failUnlessEqual(rc, 0)
+            self.failIf(stderr)
+            self.failUnlessEqual(stdout, "Let the sunshine In!")
+        d.addCallback(_check_get)
+
+        return d
 
 class Ln(GridTestMixin, CLITestMixin, unittest.TestCase):
     def _create_test_file(self):
hunk ./src/allmydata/test/test_cli.py 969
         return d
 
 
+    def test_immutable_from_file_unicode(self):
+        if sys.stdout.encoding not in ('UTF-8'):
+            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
+      
+        # tahoe put file.txt "à trier.txt"
+        self.basedir = os.path.dirname(self.mktemp())
+        self.set_up_grid()
+
+        rel_fn = os.path.join(self.basedir, "DATAFILE")
+        abs_fn = os.path.abspath(rel_fn)
+        # we make the file small enough to fit in a LIT file, for speed
+        DATA = "short file"
+        f = open(rel_fn, "w")
+        f.write(DATA)
+        f.close()
+
+        d = self.do_cli("create-alias", "tahoe")
+
+        d.addCallback(lambda res:
+                      self.do_cli("put", rel_fn, "à trier.txt"))
+        def _uploaded((rc,stdout,stderr)):
+            readcap = stdout.strip()
+            self.failUnless(readcap.startswith("URI:LIT:"))
+            self.failUnless("201 Created" in stderr, stderr)
+            self.readcap = readcap
+        d.addCallback(_uploaded)
+
+        d.addCallback(lambda res:
+                      self.do_cli("get", "tahoe:à trier.txt"))
+        d.addCallback(lambda (rc,stdout,stderr):
+                      self.failUnlessEqual(stdout, DATA))
+
+        return d
+
 class List(GridTestMixin, CLITestMixin, unittest.TestCase):
     def test_list(self):
         self.basedir = "cli/List/list"
hunk ./src/allmydata/test/test_cli.py 1284
                               o.parseOptions, ["onearg"])
 
     def test_unicode_filename(self):
+        if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'):
+            raise unittest.SkipTest("Arbitrary filenames are not supported by this platform")
+
+        if sys.stdout.encoding not in ('UTF-8'):
+            raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
+
         self.basedir = "cli/Cp/unicode_filename"
         self.set_up_grid()
hunk ./src/allmydata/test/test_cli.py 1292
+        d = self.do_cli("create-alias", "tahoe")
 
hunk ./src/allmydata/test/test_cli.py 1294
-        fn1 = os.path.join(self.basedir, "Ärtonwall")
+        # Use unicode strings when calling os functions
+        fn1 = os.path.join(self.basedir, u"Ärtonwall")
         DATA1 = "unicode file content"
         fileutil.write(fn1, DATA1)
 
hunk ./src/allmydata/test/test_cli.py 1299
-        fn2 = os.path.join(self.basedir, "Metallica")
-        DATA2 = "non-unicode file content"
-        fileutil.write(fn2, DATA2)
-
-        # Bug #534
-        # Assure that uploading a file whose name contains unicode character
-        # doesn't prevent further uploads in the same directory
-        d = self.do_cli("create-alias", "tahoe")
-        d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
-        d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
+        d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:"))
 
         d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
hunk ./src/allmydata/test/test_cli.py 1304
 
+        fn2 = os.path.join(self.basedir, u"Metallica")
+        DATA2 = "non-unicode file content"
+        fileutil.write(fn2, DATA2)
+
+        d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:"))
+
         d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
         d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
 
hunk ./src/allmydata/test/test_cli.py 1313
+        d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
+        d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, "Metallica\nÄrtonwall\n"))
+
         return d
hunk ./src/allmydata/test/test_cli.py 1317
-    test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms.  See issue ticket #534."
 
     def test_dangling_symlink_vs_recursion(self):
         if not hasattr(os, 'symlink'):
hunk ./src/allmydata/test/test_cli.py 1423
         return d
 
 
+class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
+    def test_unicode_mkdir(self):
+        self.basedir = os.path.dirname(self.mktemp())
+        self.set_up_grid()
+
+        d = self.do_cli("create-alias", "tahoe")
+        d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
+
+        return d
+ 
+
 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
 
     def writeto(self, path, data):
}

Context:

[docs: line-wrap README.txt
zooko@zooko.com**20100518174240
 Ignore-this: 670a02d360df7de51ebdcf4fae752577
] 
[Hush pyflakes warnings
Kevan Carstensen <kevan@isnotajoke.com>**20100515184344
 Ignore-this: fd602c3bba115057770715c36a87b400
] 
[setup: new improved misc/show-tool-versions.py
zooko@zooko.com**20100516050122
 Ignore-this: ce9b1de1b35b07d733e6cf823b66335a
] 
[Improve code coverage of the Tahoe2PeerSelector tests.
Kevan Carstensen <kevan@isnotajoke.com>**20100515032913
 Ignore-this: 793151b63ffa65fdae6915db22d9924a
] 
[Remove a comment that no longer makes sense.
Kevan Carstensen <kevan@isnotajoke.com>**20100514203516
 Ignore-this: 956983c7e7c7e4477215494dfce8f058
] 
[docs: update docs/architecture.txt to more fully and correctly explain the upload procedure
zooko@zooko.com**20100514043458
 Ignore-this: 538b6ea256a49fed837500342092efa3
] 
[Fix up the behavior of #778, per reviewers' comments
Kevan Carstensen <kevan@isnotajoke.com>**20100514004917
 Ignore-this: 9c20b60716125278b5456e8feb396bff
 
   - Make some important utility functions clearer and more thoroughly 
     documented.
   - Assert in upload.servers_of_happiness that the buckets attributes
     of PeerTrackers passed to it are mutually disjoint.
   - Get rid of some silly non-Pythonisms that I didn't see when I first
     wrote these patches.
   - Make sure that should_add_server returns true when queried about a 
     shnum that it doesn't know about yet.
   - Change Tahoe2PeerSelector.preexisting_shares to map a shareid to a set
     of peerids, alter dependencies to deal with that.
   - Remove upload.should_add_servers, because it is no longer necessary
   - Move upload.shares_of_happiness and upload.shares_by_server to a utility
     file.
   - Change some points in Tahoe2PeerSelector.
   - Compute servers_of_happiness using a bipartite matching algorithm that 
     we know is optimal instead of an ad-hoc greedy algorithm that isn't.
   - Change servers_of_happiness to just take a sharemap as an argument,
     change its callers to merge existing_shares and used_peers before 
     calling it.
   - Change an error message in the encoder to be more appropriate for 
     servers of happiness.
   - Clarify the wording of an error message in immutable/upload.py
   - Refactor a happiness failure message to happinessutil.py, and make
     immutable/upload.py and immutable/encode.py use it.
   - Move the word "only" as far to the right as possible in failure 
     messages.
   - Use a better definition of progress during peer selection.
   - Do read-only peer share detection queries in parallel, not sequentially.
   - Clean up logging semantics; print the query statistics whenever an
     upload is unsuccessful, not just in one case.
 
] 
[Alter the error message when an upload fails, per some comments in #778.
Kevan Carstensen <kevan@isnotajoke.com>**20091230210344
 Ignore-this: ba97422b2f9737c46abeb828727beb1
 
 When I first implemented #778, I just altered the error messages to refer to
 servers where they referred to shares. The resulting error messages weren't
 very good. These are a bit better.
] 
[Change "UploadHappinessError" to "UploadUnhappinessError"
Kevan Carstensen <kevan@isnotajoke.com>**20091205043037
 Ignore-this: 236b64ab19836854af4993bb5c1b221a
] 
[Alter the error message returned when peer selection fails
Kevan Carstensen <kevan@isnotajoke.com>**20091123002405
 Ignore-this: b2a7dc163edcab8d9613bfd6907e5166
 
 The Tahoe2PeerSelector returned either NoSharesError or NotEnoughSharesError
 for a variety of error conditions that weren't informatively described by them.
 This patch creates a new error, UploadHappinessError, replaces uses of 
 NoSharesError and NotEnoughSharesError with it, and alters the error message
 raised with the errors to be more in line with the new servers_of_happiness
 behavior. See ticket #834 for more information.
] 
[Eliminate overcounting iof servers_of_happiness in Tahoe2PeerSelector; also reorganize some things.
Kevan Carstensen <kevan@isnotajoke.com>**20091118014542
 Ignore-this: a6cb032cbff74f4f9d4238faebd99868
] 
[Change stray "shares_of_happiness" to "servers_of_happiness"
Kevan Carstensen <kevan@isnotajoke.com>**20091116212459
 Ignore-this: 1c971ba8c3c4d2e7ba9f020577b28b73
] 
[Alter Tahoe2PeerSelector to make sure that it recognizes existing shares on readonly servers, fixing an issue in #778
Kevan Carstensen <kevan@isnotajoke.com>**20091116192805
 Ignore-this: 15289f4d709e03851ed0587b286fd955
] 
[Alter 'immutable/encode.py' and 'immutable/upload.py' to use servers_of_happiness instead of shares_of_happiness.
Kevan Carstensen <kevan@isnotajoke.com>**20091104111222
 Ignore-this: abb3283314820a8bbf9b5d0cbfbb57c8
] 
[Alter the signature of set_shareholders in IEncoder to add a 'servermap' parameter, which gives IEncoders enough information to perform a sane check for servers_of_happiness.
Kevan Carstensen <kevan@isnotajoke.com>**20091104033241
 Ignore-this: b3a6649a8ac66431beca1026a31fed94
] 
[Alter CiphertextDownloader to work with servers_of_happiness
Kevan Carstensen <kevan@isnotajoke.com>**20090924041932
 Ignore-this: e81edccf0308c2d3bedbc4cf217da197
] 
[Revisions of the #778 tests, per reviewers' comments
Kevan Carstensen <kevan@isnotajoke.com>**20100514012542
 Ignore-this: 735bbc7f663dce633caeb3b66a53cf6e
 
 - Fix comments and confusing naming.
 - Add tests for the new error messages suggested by David-Sarah
   and Zooko.
 - Alter existing tests for new error messages.
 - Make sure that the tests continue to work with the trunk.
 - Add a test for a mutual disjointedness assertion that I added to
   upload.servers_of_happiness.
 - Fix the comments to correctly reflect read-onlyness
 - Add a test for an edge case in should_add_server
 - Add an assertion to make sure that share redistribution works as it 
   should
 - Alter tests to work with revised servers_of_happiness semantics
 - Remove tests for should_add_server, since that function no longer exists.
 - Alter tests to know about merge_peers, and to use it before calling 
   servers_of_happiness.
 - Add tests for merge_peers.
 - Add Zooko's puzzles to the tests.
 - Edit encoding tests to expect the new kind of failure message.
 - Edit tests to expect error messages with the word "only" moved as far
   to the right as possible.
 - Extended and cleaned up some helper functions.
 - Changed some tests to call more appropriate helper functions.
 - Added a test for the failing redistribution algorithm
 - Added a test for the progress message
 - Added a test for the upper bound on readonly peer share discovery.
 
] 
[Alter various unit tests to work with the new happy behavior
Kevan Carstensen <kevan@isnotajoke.com>**20100107181325
 Ignore-this: 132032bbf865e63a079f869b663be34a
] 
[Replace "UploadHappinessError" with "UploadUnhappinessError" in tests.
Kevan Carstensen <kevan@isnotajoke.com>**20091205043453
 Ignore-this: 83f4bc50c697d21b5f4e2a4cd91862ca
] 
[Add tests for the behavior described in #834.
Kevan Carstensen <kevan@isnotajoke.com>**20091123012008
 Ignore-this: d8e0aa0f3f7965ce9b5cea843c6d6f9f
] 
[Re-work 'test_upload.py' to be more readable; add more tests for #778
Kevan Carstensen <kevan@isnotajoke.com>**20091116192334
 Ignore-this: 7e8565f92fe51dece5ae28daf442d659
] 
[Test Tahoe2PeerSelector to make sure that it recognizeses existing shares on readonly servers
Kevan Carstensen <kevan@isnotajoke.com>**20091109003735
 Ignore-this: 12f9b4cff5752fca7ed32a6ebcff6446
] 
[Add more tests for comment:53 in ticket #778
Kevan Carstensen <kevan@isnotajoke.com>**20091104112849
 Ignore-this: 3bb2edd299a944cc9586e14d5d83ec8c
] 
[Add a test for upload.shares_by_server
Kevan Carstensen <kevan@isnotajoke.com>**20091104111324
 Ignore-this: f9802e82d6982a93e00f92e0b276f018
] 
[Minor tweak to an existing test -- make the first server read-write, instead of read-only
Kevan Carstensen <kevan@isnotajoke.com>**20091104034232
 Ignore-this: a951a46c93f7f58dd44d93d8623b2aee
] 
[Alter tests to use the new form of set_shareholders
Kevan Carstensen <kevan@isnotajoke.com>**20091104033602
 Ignore-this: 3deac11fc831618d11441317463ef830
] 
[Refactor some behavior into a mixin, and add tests for the behavior described in #778
"Kevan Carstensen" <kevan@isnotajoke.com>**20091030091908
 Ignore-this: a6f9797057ca135579b249af3b2b66ac
] 
[Alter NoNetworkGrid to allow the creation of readonly servers for testing purposes.
Kevan Carstensen <kevan@isnotajoke.com>**20091018013013
 Ignore-this: e12cd7c4ddeb65305c5a7e08df57c754
] 
[Update 'docs/architecture.txt' to reflect readonly share discovery
kevan@isnotajoke.com**20100514003852
 Ignore-this: 7ead71b34df3b1ecfdcfd3cb2882e4f9
] 
[Alter the wording in docs/architecture.txt to more accurately describe the servers_of_happiness behavior.
Kevan Carstensen <kevan@isnotajoke.com>**20100428002455
 Ignore-this: 6eff7fa756858a1c6f73728d989544cc
] 
[Alter wording in 'interfaces.py' to be correct wrt #778
"Kevan Carstensen" <kevan@isnotajoke.com>**20091205034005
 Ignore-this: c9913c700ac14e7a63569458b06980e0
] 
[Update 'docs/configuration.txt' to reflect the servers_of_happiness behavior.
Kevan Carstensen <kevan@isnotajoke.com>**20091205033813
 Ignore-this: 5e1cb171f8239bfb5b565d73c75ac2b8
] 
[Clarify quickstart instructions for installing pywin32
david-sarah@jacaranda.org**20100511180300
 Ignore-this: d4668359673600d2acbc7cd8dd44b93c
] 
[web: add a simple test that you can load directory.xhtml
zooko@zooko.com**20100510063729
 Ignore-this: e49b25fa3c67b3c7a56c8b1ae01bb463
] 
[setup: fix typos in misc/show-tool-versions.py
zooko@zooko.com**20100510063615
 Ignore-this: 2181b1303a0e288e7a9ebd4c4855628
] 
[setup: show code-coverage tool versions in show-tools-versions.py
zooko@zooko.com**20100510062955
 Ignore-this: 4b4c68eb3780b762c8dbbd22b39df7cf
] 
[docs: update README, mv it to README.txt, update setup.py
zooko@zooko.com**20100504094340
 Ignore-this: 40e28ca36c299ea1fd12d3b91e5b421c
] 
[Dependency on Windmill test framework is not needed yet.
david-sarah@jacaranda.org**20100504161043
 Ignore-this: be088712bec650d4ef24766c0026ebc8
] 
[tests: pass z to tar so that BSD tar will know to ungzip
zooko@zooko.com**20100504090628
 Ignore-this: 1339e493f255e8fc0b01b70478f23a09
] 
[setup: update comments and URLs in setup.cfg
zooko@zooko.com**20100504061653
 Ignore-this: f97692807c74bcab56d33100c899f829
] 
[setup: reorder and extend the show-tool-versions script, the better to glean information about our new buildslaves
zooko@zooko.com**20100504045643
 Ignore-this: 836084b56b8d4ee8f1de1f4efb706d36
] 
[CLI: Support for https url in option --node-url
Francois Deppierraz <francois@ctrlaltdel.ch>**20100430185609
 Ignore-this: 1717176b4d27c877e6bc67a944d9bf34
 
 This patch modifies the regular expression used for verifying of '--node-url'
 parameter.  Support for accessing a Tahoe gateway over HTTPS was already
 present, thanks to Python's urllib.
 
] 
[backupdb.did_create_directory: use REPLACE INTO, not INSERT INTO + ignore error
Brian Warner <warner@lothar.com>**20100428050803
 Ignore-this: 1fca7b8f364a21ae413be8767161e32f
 
 This handles the case where we upload a new tahoe directory for a
 previously-processed local directory, possibly creating a new dircap (if the
 metadata had changed). Now we replace the old dirhash->dircap record. The
 previous behavior left the old record in place (with the old dircap and
 timestamps), so we'd never stop creating new directories and never converge
 on a null backup.
] 
["tahoe webopen": add --info flag, to get ?t=info
Brian Warner <warner@lothar.com>**20100424233003
 Ignore-this: 126b0bb6db340fabacb623d295eb45fa
 
 Also fix some trailing whitespace.
] 
[docs: install.html http-equiv refresh to quickstart.html
zooko@zooko.com**20100421165708
 Ignore-this: 52b4b619f9dde5886ae2cd7f1f3b734b
] 
[docs: install.html -> quickstart.html
zooko@zooko.com**20100421155757
 Ignore-this: 6084e203909306bed93efb09d0e6181d
 It is not called "installing" because that implies that it is going to change the configuration of your operating system. It is not called "building" because that implies that you need developer tools like a compiler. Also I added a stern warning against looking at the "InstallDetails" wiki page, which I have renamed to "AdvancedInstall".
] 
[Fix another typo in tahoe_storagespace munin plugin
david-sarah@jacaranda.org**20100416220935
 Ignore-this: ad1f7aa66b554174f91dfb2b7a3ea5f3
] 
[Add dependency on windmill >= 1.3
david-sarah@jacaranda.org**20100416190404
 Ignore-this: 4437a7a464e92d6c9012926b18676211
] 
[licensing: phrase the OpenSSL-exemption in the vocabulary of copyright instead of computer technology, and replicate the exemption from the GPL to the TGPPL
zooko@zooko.com**20100414232521
 Ignore-this: a5494b2f582a295544c6cad3f245e91
] 
[munin-tahoe_storagespace
freestorm77@gmail.com**20100221203626
 Ignore-this: 14d6d6a587afe1f8883152bf2e46b4aa
 
 Plugin configuration rename
 
] 
[setup: add licensing declaration for setuptools (noticed by the FSF compliance folks)
zooko@zooko.com**20100309184415
 Ignore-this: 2dfa7d812d65fec7c72ddbf0de609ccb
] 
[setup: fix error in licensing declaration from Shawn Willden, as noted by the FSF compliance division
zooko@zooko.com**20100309163736
 Ignore-this: c0623d27e469799d86cabf67921a13f8
] 
[CREDITS to Jacob Appelbaum
zooko@zooko.com**20100304015616
 Ignore-this: 70db493abbc23968fcc8db93f386ea54
] 
[desert-island-build-with-proper-versions
jacob@appelbaum.net**20100304013858] 
[docs: a few small edits to try to guide newcomers through the docs
zooko@zooko.com**20100303231902
 Ignore-this: a6aab44f5bf5ad97ea73e6976bc4042d
 These edits were suggested by my watching over Jake Appelbaum's shoulder as he completely ignored/skipped/missed install.html and also as he decided that debian.txt wouldn't help him with basic installation. Then I threw in a few docs edits that have been sitting around in my sandbox asking to be committed for months.
] 
[TAG allmydata-tahoe-1.6.1
david-sarah@jacaranda.org**20100228062314
 Ignore-this: eb5f03ada8ea953ee7780e7fe068539
] 
Patch bundle hash:
1cac164f9367a123cd5f7968971916e3bcdffebb
