| 1 | """ |
|---|
| 2 | Tests for allmydata.hashtree. |
|---|
| 3 | """ |
|---|
| 4 | |
|---|
| 5 | from __future__ import annotations |
|---|
| 6 | |
|---|
| 7 | from .common import SyncTestCase |
|---|
| 8 | |
|---|
| 9 | from base64 import b32encode |
|---|
| 10 | from allmydata.util.hashutil import tagged_hash |
|---|
| 11 | from allmydata import hashtree |
|---|
| 12 | |
|---|
| 13 | def make_tree(numleaves): |
|---|
| 14 | leaves = [b"%d" % i for i in range(numleaves)] |
|---|
| 15 | leaf_hashes = [tagged_hash(b"tag", leaf) for leaf in leaves] |
|---|
| 16 | ht = hashtree.HashTree(leaf_hashes) |
|---|
| 17 | return ht |
|---|
| 18 | |
|---|
| 19 | class Complete(SyncTestCase): |
|---|
| 20 | def test_create(self): |
|---|
| 21 | # try out various sizes, since we pad to a power of two |
|---|
| 22 | ht = make_tree(6) |
|---|
| 23 | ht = make_tree(9) |
|---|
| 24 | ht = make_tree(8) |
|---|
| 25 | root = ht[0] |
|---|
| 26 | self.failUnlessEqual(len(root), 32) |
|---|
| 27 | self.failUnlessEqual(ht.get_leaf(0), tagged_hash(b"tag", b"0")) |
|---|
| 28 | self.failUnlessRaises(IndexError, ht.get_leaf, 8) |
|---|
| 29 | self.failUnlessEqual(ht.get_leaf_index(0), 7) |
|---|
| 30 | self.failUnlessRaises(IndexError, ht.parent, 0) |
|---|
| 31 | self.failUnlessRaises(IndexError, ht.needed_for, -1) |
|---|
| 32 | |
|---|
| 33 | def test_well_known_tree(self): |
|---|
| 34 | self.assertEqual( |
|---|
| 35 | [b32encode(s).strip(b"=").lower() for s in make_tree(3)], |
|---|
| 36 | [b'vxuqudnucceja4pqkdqy5txapagxubm5moupzqywkbg2jrjkaola', |
|---|
| 37 | b'weycjri4jlcaunca2jyx2kr7sbtb7qdriog3f26g5jpc5awfeazq', |
|---|
| 38 | b'5ovy3g2wwjnxoqtja4licckxkbqjef4xsjtclk6gxnsl66kvow6a', |
|---|
| 39 | b'esd34nbzri75l3j2vwetpk3dvlvsxstkbaktomonrulpks3df3sq', |
|---|
| 40 | b'jkxbwa2tppyfax35o72tbjecxvaa4xphma6zbyfbkkku3ed2657a', |
|---|
| 41 | b'wfisavaqgab2raihe7dld2qjps4rtxyiubgfs5enziokey2msjwa', |
|---|
| 42 | b't3kza5vwx3tlowdemmgdyigp62ju57qduyfh7uulnfkc7mj2ncrq'], |
|---|
| 43 | ) |
|---|
| 44 | |
|---|
| 45 | def test_needed_hashes(self): |
|---|
| 46 | ht = make_tree(8) |
|---|
| 47 | self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) |
|---|
| 48 | self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) |
|---|
| 49 | self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) |
|---|
| 50 | self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1])) |
|---|
| 51 | self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1])) |
|---|
| 52 | self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1])) |
|---|
| 53 | |
|---|
| 54 | def test_dump(self): |
|---|
| 55 | ht = make_tree(6) |
|---|
| 56 | expected = [(0,0), |
|---|
| 57 | (1,1), (3,2), (7,3), (8,3), (4,2), (9,3), (10,3), |
|---|
| 58 | (2,1), (5,2), (11,3), (12,3), (6,2), (13,3), (14,3), |
|---|
| 59 | ] |
|---|
| 60 | self.failUnlessEqual(list(ht.depth_first()), expected) |
|---|
| 61 | d = "\n" + ht.dump() |
|---|
| 62 | #print(d) |
|---|
| 63 | self.failUnless("\n 0:" in d) |
|---|
| 64 | self.failUnless("\n 1:" in d) |
|---|
| 65 | self.failUnless("\n 3:" in d) |
|---|
| 66 | self.failUnless("\n 7:" in d) |
|---|
| 67 | self.failUnless("\n 8:" in d) |
|---|
| 68 | self.failUnless("\n 4:" in d) |
|---|
| 69 | |
|---|
| 70 | class Incomplete(SyncTestCase): |
|---|
| 71 | |
|---|
| 72 | def test_create(self): |
|---|
| 73 | ht = hashtree.IncompleteHashTree(6) |
|---|
| 74 | ht = hashtree.IncompleteHashTree(9) |
|---|
| 75 | ht = hashtree.IncompleteHashTree(8) |
|---|
| 76 | self.failUnlessEqual(ht[0], None) |
|---|
| 77 | self.failUnlessEqual(ht.get_leaf(0), None) |
|---|
| 78 | self.failUnlessRaises(IndexError, ht.get_leaf, 8) |
|---|
| 79 | self.failUnlessEqual(ht.get_leaf_index(0), 7) |
|---|
| 80 | |
|---|
| 81 | def test_needed_hashes(self): |
|---|
| 82 | ht = hashtree.IncompleteHashTree(8) |
|---|
| 83 | self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) |
|---|
| 84 | self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) |
|---|
| 85 | self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) |
|---|
| 86 | self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1])) |
|---|
| 87 | self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1])) |
|---|
| 88 | self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1])) |
|---|
| 89 | ht = hashtree.IncompleteHashTree(1) |
|---|
| 90 | self.failUnlessEqual(ht.needed_hashes(0), set([])) |
|---|
| 91 | ht = hashtree.IncompleteHashTree(6) |
|---|
| 92 | self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2])) |
|---|
| 93 | self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2])) |
|---|
| 94 | self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2])) |
|---|
| 95 | self.failUnlessEqual(ht.needed_hashes(5), set([11, 6, 1])) |
|---|
| 96 | self.failUnlessEqual(ht.needed_hashes(5, False), set([11, 6, 1])) |
|---|
| 97 | self.failUnlessEqual(ht.needed_hashes(5, True), set([12, 11, 6, 1])) |
|---|
| 98 | |
|---|
| 99 | def test_depth_of(self): |
|---|
| 100 | hashtree.IncompleteHashTree(8) |
|---|
| 101 | self.failUnlessEqual(hashtree.depth_of(0), 0) |
|---|
| 102 | for i in [1,2]: |
|---|
| 103 | self.failUnlessEqual(hashtree.depth_of(i), 1, "i=%d"%i) |
|---|
| 104 | for i in [3,4,5,6]: |
|---|
| 105 | self.failUnlessEqual(hashtree.depth_of(i), 2, "i=%d"%i) |
|---|
| 106 | for i in [7,8,9,10,11,12,13,14]: |
|---|
| 107 | self.failUnlessEqual(hashtree.depth_of(i), 3, "i=%d"%i) |
|---|
| 108 | |
|---|
| 109 | def test_large(self): |
|---|
| 110 | # IncompleteHashTree.set_hashes() used to take O(N**2). This test is |
|---|
| 111 | # meant to show that it now takes O(N) or maybe O(N*ln(N)). I wish |
|---|
| 112 | # there were a good way to assert this (like counting VM operations |
|---|
| 113 | # or something): the problem was inside list.sort(), so there's no |
|---|
| 114 | # good way to instrument set_hashes() to count what we care about. On |
|---|
| 115 | # my laptop, 10k leaves takes 1.1s in this fixed version, and 11.6s |
|---|
| 116 | # in the old broken version. An 80k-leaf test (corresponding to a |
|---|
| 117 | # 10GB file with a 128KiB segsize) 10s in the fixed version, and |
|---|
| 118 | # several hours in the broken version, but 10s on my laptop (plus the |
|---|
| 119 | # 20s of setup code) probably means 200s on our dapper buildslave, |
|---|
| 120 | # which is painfully long for a unit test. |
|---|
| 121 | self.do_test_speed(10000) |
|---|
| 122 | |
|---|
| 123 | def do_test_speed(self, SIZE): |
|---|
| 124 | # on my laptop, SIZE=80k (corresponding to a 10GB file with a 128KiB |
|---|
| 125 | # segsize) takes: |
|---|
| 126 | # 7s to build the (complete) HashTree |
|---|
| 127 | # 13s to set up the dictionary |
|---|
| 128 | # 10s to run set_hashes() |
|---|
| 129 | ht = make_tree(SIZE) |
|---|
| 130 | iht = hashtree.IncompleteHashTree(SIZE) |
|---|
| 131 | |
|---|
| 132 | needed = set() |
|---|
| 133 | for i in range(SIZE): |
|---|
| 134 | needed.update(ht.needed_hashes(i, True)) |
|---|
| 135 | all = dict([ (i, ht[i]) for i in needed]) |
|---|
| 136 | iht.set_hashes(hashes=all) |
|---|
| 137 | |
|---|
| 138 | def test_check(self): |
|---|
| 139 | # first create a complete hash tree |
|---|
| 140 | ht = make_tree(6) |
|---|
| 141 | # then create a corresponding incomplete tree |
|---|
| 142 | iht = hashtree.IncompleteHashTree(6) |
|---|
| 143 | |
|---|
| 144 | # suppose we wanted to validate leaf[0] |
|---|
| 145 | # leaf[0] is the same as node[7] |
|---|
| 146 | self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) |
|---|
| 147 | self.failUnlessEqual(iht.needed_hashes(0, True), set([7, 8, 4, 2])) |
|---|
| 148 | self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) |
|---|
| 149 | iht[0] = ht[0] # set the root |
|---|
| 150 | self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) |
|---|
| 151 | self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) |
|---|
| 152 | iht[5] = ht[5] |
|---|
| 153 | self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) |
|---|
| 154 | self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2])) |
|---|
| 155 | |
|---|
| 156 | # reset |
|---|
| 157 | iht = hashtree.IncompleteHashTree(6) |
|---|
| 158 | |
|---|
| 159 | current_hashes = list(iht) |
|---|
| 160 | # this should fail because there aren't enough hashes known |
|---|
| 161 | try: |
|---|
| 162 | iht.set_hashes(leaves={0: tagged_hash(b"tag", b"0")}) |
|---|
| 163 | except hashtree.NotEnoughHashesError: |
|---|
| 164 | pass |
|---|
| 165 | else: |
|---|
| 166 | self.fail("didn't catch not enough hashes") |
|---|
| 167 | |
|---|
| 168 | # and the set of hashes stored in the tree should still be the same |
|---|
| 169 | self.failUnlessEqual(list(iht), current_hashes) |
|---|
| 170 | # and we should still need the same |
|---|
| 171 | self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2])) |
|---|
| 172 | |
|---|
| 173 | chain = {0: ht[0], 2: ht[2], 4: ht[4], 8: ht[8]} |
|---|
| 174 | # this should fail because the leaf hash is just plain wrong |
|---|
| 175 | try: |
|---|
| 176 | iht.set_hashes(chain, leaves={0: tagged_hash(b"bad tag", b"0")}) |
|---|
| 177 | except hashtree.BadHashError: |
|---|
| 178 | pass |
|---|
| 179 | else: |
|---|
| 180 | self.fail("didn't catch bad hash") |
|---|
| 181 | |
|---|
| 182 | # this should fail because we give it conflicting hashes: one as an |
|---|
| 183 | # internal node, another as a leaf |
|---|
| 184 | try: |
|---|
| 185 | iht.set_hashes(chain, leaves={1: tagged_hash(b"bad tag", b"1")}) |
|---|
| 186 | except hashtree.BadHashError: |
|---|
| 187 | pass |
|---|
| 188 | else: |
|---|
| 189 | self.fail("didn't catch bad hash") |
|---|
| 190 | |
|---|
| 191 | bad_chain = chain.copy() |
|---|
| 192 | bad_chain[2] = ht[2] + b"BOGUS" |
|---|
| 193 | |
|---|
| 194 | # this should fail because the internal hash is wrong |
|---|
| 195 | try: |
|---|
| 196 | iht.set_hashes(bad_chain, leaves={0: tagged_hash(b"tag", b"0")}) |
|---|
| 197 | except hashtree.BadHashError: |
|---|
| 198 | pass |
|---|
| 199 | else: |
|---|
| 200 | self.fail("didn't catch bad hash") |
|---|
| 201 | |
|---|
| 202 | # this should succeed |
|---|
| 203 | try: |
|---|
| 204 | iht.set_hashes(chain, leaves={0: tagged_hash(b"tag", b"0")}) |
|---|
| 205 | except hashtree.BadHashError as e: |
|---|
| 206 | self.fail("bad hash: %s" % e) |
|---|
| 207 | |
|---|
| 208 | self.failUnlessEqual(ht.get_leaf(0), tagged_hash(b"tag", b"0")) |
|---|
| 209 | self.failUnlessRaises(IndexError, ht.get_leaf, 8) |
|---|
| 210 | |
|---|
| 211 | # this should succeed too |
|---|
| 212 | try: |
|---|
| 213 | iht.set_hashes(leaves={1: tagged_hash(b"tag", b"1")}) |
|---|
| 214 | except hashtree.BadHashError: |
|---|
| 215 | self.fail("bad hash") |
|---|
| 216 | |
|---|
| 217 | # this should fail because we give it hashes that conflict with some |
|---|
| 218 | # that we added successfully before |
|---|
| 219 | try: |
|---|
| 220 | iht.set_hashes(leaves={1: tagged_hash(b"bad tag", b"1")}) |
|---|
| 221 | except hashtree.BadHashError: |
|---|
| 222 | pass |
|---|
| 223 | else: |
|---|
| 224 | self.fail("didn't catch bad hash") |
|---|
| 225 | |
|---|
| 226 | # now that leaves 0 and 1 are known, some of the internal nodes are |
|---|
| 227 | # known |
|---|
| 228 | self.failUnlessEqual(iht.needed_hashes(4), set([12, 6])) |
|---|
| 229 | chain = {6: ht[6], 12: ht[12]} |
|---|
| 230 | |
|---|
| 231 | # this should succeed |
|---|
| 232 | try: |
|---|
| 233 | iht.set_hashes(chain, leaves={4: tagged_hash(b"tag", b"4")}) |
|---|
| 234 | except hashtree.BadHashError as e: |
|---|
| 235 | self.fail("bad hash: %s" % e) |
|---|