source: trunk/misc/coding_tools/make-canary-files.py

Last change on this file was 4ac60c5, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-12-21T13:57:09Z

vendor cmp()

  • Property mode set to 100644
File size: 5.3 KB
Line 
1#!/usr/bin/env python
2
3
4"""
5Given a list of nodeids and a 'convergence' file, create a bunch of files
6that will (when encoded at k=1,N=1) be uploaded to specific nodeids.
7
8Run this as follows:
9
10 make-canary-files.py -c PATH/TO/convergence -n PATH/TO/nodeids -k 1 -N 1
11
12It will create a directory named 'canaries', with one file per nodeid named
13'$NODEID-$NICKNAME.txt', that contains some random text.
14
15The 'nodeids' file should contain one base32 nodeid per line, followed by the
16optional nickname, like:
17
18---
195yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo  server12
20vb7vm2mneyid5jbyvcbk2wb5icdhwtun  server13
21...
22---
23
24The resulting 'canaries/5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo-server12.txt' file
25will, when uploaded with the given (convergence,k,N) pair, have its first
26share placed on the 5yyq/server12 storage server. If N>1, the other shares
27will be placed elsewhere, of course.
28
29This tool can be useful to construct a set of 'canary' files, which can then
30be uploaded to storage servers, and later downloaded to test a grid's health.
31If you are able to download the canary for server12 via some tahoe node X,
32then the following properties are known to be true:
33
34 node X is running, and has established a connection to server12
35 server12 is running, and returning data for at least the given file
36
37Using k=1/N=1 creates a separate test for each server. The test process is
38then to download the whole directory of files (perhaps with a t=deep-check
39operation).
40
41Alternatively, you could upload with the usual k=3/N=10 and then move/delete
42shares to put all N shares on a single server.
43
44Note that any changes to the nodeid list will affect the placement of shares.
45Shares should be uploaded with the same nodeid list as this tool used when
46constructing the files.
47
48Also note that this tool uses the Tahoe codebase, so it should be run on a
49system where Tahoe is installed, or in a source tree with setup.py like this:
50
51 setup.py run_with_pythonpath -p -c 'misc/make-canary-files.py ARGS..'
52"""
53
54import os, hashlib
55from twisted.python import usage
56from allmydata.immutable import upload
57from allmydata.util import base32
58
59def cmp(a, b):
60    return (a > b) - (a < b)
61
62class Options(usage.Options):
63    optParameters = [
64        ("convergence", "c", None, "path to NODEDIR/private/convergence"),
65        ("nodeids", "n", None, "path to file with one base32 nodeid per line"),
66        ("k", "k", 1, "number of necessary shares, defaults to 1", int),
67        ("N", "N", 1, "number of total shares, defaults to 1", int),
68        ]
69    optFlags = [
70        ("verbose", "v", "Be noisy"),
71        ]
72
73opts = Options()
74opts.parseOptions()
75
76verbose = bool(opts["verbose"])
77
78nodes = {}
79for line in open(opts["nodeids"], "r").readlines():
80    line = line.strip()
81    if not line or line.startswith("#"):
82        continue
83    pieces = line.split(None, 1)
84    if len(pieces) == 2:
85        nodeid_s, nickname = pieces
86    else:
87        nodeid_s = pieces[0]
88        nickname = None
89    nodeid = base32.a2b(nodeid_s)
90    nodes[nodeid] = nickname
91
92if opts["k"] != 3 or opts["N"] != 10:
93    print("note: using non-default k/N requires patching the Tahoe code")
94    print("src/allmydata/client.py line 55, DEFAULT_ENCODING_PARAMETERS")
95
96convergence_file = os.path.expanduser(opts["convergence"])
97convergence_s = open(convergence_file, "rb").read().strip()
98convergence = base32.a2b(convergence_s)
99
100def get_permuted_peers(key):
101    results = []
102    for nodeid in nodes:
103        permuted = hashlib.sha1(key + nodeid).digest()
104        results.append((permuted, nodeid))
105    results.sort(lambda a,b: cmp(a[0], b[0]))
106    return [ r[1] for r in results ]
107
108def find_share_for_target(target):
109    target_s = base32.b2a(target)
110    prefix = "The first share of this file will be placed on " + target_s + "\n"
111    prefix += "This data is random: "
112    attempts = 0
113    while True:
114        attempts += 1
115        suffix = base32.b2a(os.urandom(10))
116        if verbose: print(" trying", suffix, end=' ')
117        data = prefix + suffix + "\n"
118        assert len(data) > 55  # no LIT files
119        # now, what storage index will this get?
120        u = upload.Data(data, convergence)
121        eu = upload.EncryptAnUploadable(u)
122        d = eu.get_storage_index() # this happens to run synchronously
123        def _got_si(si, data=data):
124            if verbose: print("SI", base32.b2a(si), end=' ')
125            peerlist = get_permuted_peers(si)
126            if peerlist[0] == target:
127                # great!
128                if verbose: print("  yay!")
129                fn = base32.b2a(target)
130                if nodes[target]:
131                    nickname = nodes[target].replace("/", "_")
132                    fn += "-" + nickname
133                fn += ".txt"
134                fn = os.path.join("canaries", fn)
135                open(fn, "w").write(data)
136                return True
137            # nope, must try again
138            if verbose: print("  boo")
139            return False
140        d.addCallback(_got_si)
141        # get sneaky and look inside the Deferred for the synchronous result
142        if d.result:
143            return attempts
144
145os.mkdir("canaries")
146attempts = []
147for target in nodes:
148    target_s = base32.b2a(target)
149    print("working on", target_s)
150    attempts.append(find_share_for_target(target))
151print("done")
152print("%d attempts total, avg %d per target, max %d" % \
153      (sum(attempts), 1.0* sum(attempts) / len(nodes), max(attempts)))
154
155
Note: See TracBrowser for help on using the repository browser.