| 1 | """ |
|---|
| 2 | Hashing utilities. |
|---|
| 3 | |
|---|
| 4 | Ported to Python 3. |
|---|
| 5 | """ |
|---|
| 6 | |
|---|
| 7 | def byteschr(x): |
|---|
| 8 | return bytes([x]) |
|---|
| 9 | |
|---|
| 10 | import os |
|---|
| 11 | import hashlib |
|---|
| 12 | from allmydata.util.netstring import netstring |
|---|
| 13 | |
|---|
| 14 | # Be very very cautious when modifying this file. Almost any change will |
|---|
| 15 | # cause a compatibility break, invalidating all outstanding URIs and making |
|---|
| 16 | # any previously uploaded files become inaccessible. BE CONSERVATIVE AND TEST |
|---|
| 17 | # AGAINST OLD DATA! |
|---|
| 18 | |
|---|
| 19 | # Various crypto values are this size: hash outputs (from SHA-256d), |
|---|
| 20 | # randomly-generated secrets such as the lease secret, and symmetric encryption |
|---|
| 21 | # keys. In the near future we will add DSA private keys, and salts of various |
|---|
| 22 | # kinds. |
|---|
| 23 | CRYPTO_VAL_SIZE = 32 |
|---|
| 24 | |
|---|
| 25 | |
|---|
| 26 | class _SHA256d_Hasher: |
|---|
| 27 | # use SHA-256d, as defined by Ferguson and Schneier: hash the output |
|---|
| 28 | # again to prevent length-extension attacks |
|---|
| 29 | def __init__(self, truncate_to=None): |
|---|
| 30 | self.h = hashlib.sha256() |
|---|
| 31 | self.truncate_to = truncate_to |
|---|
| 32 | self._digest = None |
|---|
| 33 | |
|---|
| 34 | def update(self, data): |
|---|
| 35 | assert isinstance(data, bytes) # no unicode |
|---|
| 36 | self.h.update(data) |
|---|
| 37 | |
|---|
| 38 | def digest(self): |
|---|
| 39 | if self._digest is None: |
|---|
| 40 | h1 = self.h.digest() |
|---|
| 41 | del self.h |
|---|
| 42 | h2 = hashlib.sha256(h1).digest() |
|---|
| 43 | if self.truncate_to: |
|---|
| 44 | h2 = h2[:self.truncate_to] |
|---|
| 45 | self._digest = h2 |
|---|
| 46 | return self._digest |
|---|
| 47 | |
|---|
| 48 | |
|---|
| 49 | def tagged_hasher(tag, truncate_to=None): |
|---|
| 50 | hasher = _SHA256d_Hasher(truncate_to) |
|---|
| 51 | hasher.update(netstring(tag)) |
|---|
| 52 | return hasher |
|---|
| 53 | |
|---|
| 54 | |
|---|
| 55 | def tagged_hash(tag, val, truncate_to=None): |
|---|
| 56 | hasher = tagged_hasher(tag, truncate_to) |
|---|
| 57 | hasher.update(val) |
|---|
| 58 | return hasher.digest() |
|---|
| 59 | |
|---|
| 60 | |
|---|
| 61 | def tagged_pair_hash(tag, val1, val2, truncate_to=None): |
|---|
| 62 | s = _SHA256d_Hasher(truncate_to) |
|---|
| 63 | s.update(netstring(tag)) |
|---|
| 64 | s.update(netstring(val1)) |
|---|
| 65 | s.update(netstring(val2)) |
|---|
| 66 | return s.digest() |
|---|
| 67 | |
|---|
| 68 | # specific hash tags that we use |
|---|
| 69 | |
|---|
| 70 | |
|---|
| 71 | # immutable |
|---|
| 72 | STORAGE_INDEX_TAG = b"allmydata_immutable_key_to_storage_index_v1" |
|---|
| 73 | BLOCK_TAG = b"allmydata_encoded_subshare_v1" |
|---|
| 74 | UEB_TAG = b"allmydata_uri_extension_v1" |
|---|
| 75 | PLAINTEXT_TAG = b"allmydata_plaintext_v1" |
|---|
| 76 | CIPHERTEXT_TAG = b"allmydata_crypttext_v1" |
|---|
| 77 | CIPHERTEXT_SEGMENT_TAG = b"allmydata_crypttext_segment_v1" |
|---|
| 78 | PLAINTEXT_SEGMENT_TAG = b"allmydata_plaintext_segment_v1" |
|---|
| 79 | CONVERGENT_ENCRYPTION_TAG = b"allmydata_immutable_content_to_key_with_added_secret_v1+" |
|---|
| 80 | |
|---|
| 81 | CLIENT_RENEWAL_TAG = b"allmydata_client_renewal_secret_v1" |
|---|
| 82 | CLIENT_CANCEL_TAG = b"allmydata_client_cancel_secret_v1" |
|---|
| 83 | FILE_RENEWAL_TAG = b"allmydata_file_renewal_secret_v1" |
|---|
| 84 | FILE_CANCEL_TAG = b"allmydata_file_cancel_secret_v1" |
|---|
| 85 | BUCKET_RENEWAL_TAG = b"allmydata_bucket_renewal_secret_v1" |
|---|
| 86 | BUCKET_CANCEL_TAG = b"allmydata_bucket_cancel_secret_v1" |
|---|
| 87 | |
|---|
| 88 | # mutable |
|---|
| 89 | MUTABLE_WRITEKEY_TAG = b"allmydata_mutable_privkey_to_writekey_v1" |
|---|
| 90 | MUTABLE_WRITE_ENABLER_MASTER_TAG = b"allmydata_mutable_writekey_to_write_enabler_master_v1" |
|---|
| 91 | MUTABLE_WRITE_ENABLER_TAG = b"allmydata_mutable_write_enabler_master_and_nodeid_to_write_enabler_v1" |
|---|
| 92 | MUTABLE_PUBKEY_TAG = b"allmydata_mutable_pubkey_to_fingerprint_v1" |
|---|
| 93 | MUTABLE_READKEY_TAG = b"allmydata_mutable_writekey_to_readkey_v1" |
|---|
| 94 | MUTABLE_DATAKEY_TAG = b"allmydata_mutable_readkey_to_datakey_v1" |
|---|
| 95 | MUTABLE_STORAGEINDEX_TAG = b"allmydata_mutable_readkey_to_storage_index_v1" |
|---|
| 96 | |
|---|
| 97 | # dirnodes |
|---|
| 98 | DIRNODE_CHILD_WRITECAP_TAG = b"allmydata_mutable_writekey_and_salt_to_dirnode_child_capkey_v1" |
|---|
| 99 | DIRNODE_CHILD_SALT_TAG = b"allmydata_dirnode_child_rwcap_to_salt_v1" |
|---|
| 100 | |
|---|
| 101 | |
|---|
| 102 | def storage_index_hash(key): |
|---|
| 103 | # storage index is truncated to 128 bits (16 bytes). We're only hashing a |
|---|
| 104 | # 16-byte value to get it, so there's no point in using a larger value. We |
|---|
| 105 | # use this same tagged hash to go from encryption key to storage index for |
|---|
| 106 | # random-keyed immutable files and convergent-encryption immutabie |
|---|
| 107 | # files. Mutable files use ssk_storage_index_hash(). |
|---|
| 108 | return tagged_hash(STORAGE_INDEX_TAG, key, 16) |
|---|
| 109 | |
|---|
| 110 | |
|---|
| 111 | def block_hash(data): |
|---|
| 112 | return tagged_hash(BLOCK_TAG, data) |
|---|
| 113 | |
|---|
| 114 | |
|---|
| 115 | def block_hasher(): |
|---|
| 116 | return tagged_hasher(BLOCK_TAG) |
|---|
| 117 | |
|---|
| 118 | |
|---|
| 119 | def uri_extension_hash(data): |
|---|
| 120 | return tagged_hash(UEB_TAG, data) |
|---|
| 121 | |
|---|
| 122 | |
|---|
| 123 | def uri_extension_hasher(): |
|---|
| 124 | return tagged_hasher(UEB_TAG) |
|---|
| 125 | |
|---|
| 126 | |
|---|
| 127 | def plaintext_hash(data): |
|---|
| 128 | return tagged_hash(PLAINTEXT_TAG, data) |
|---|
| 129 | |
|---|
| 130 | |
|---|
| 131 | def plaintext_hasher(): |
|---|
| 132 | return tagged_hasher(PLAINTEXT_TAG) |
|---|
| 133 | |
|---|
| 134 | |
|---|
| 135 | def crypttext_hash(data): |
|---|
| 136 | return tagged_hash(CIPHERTEXT_TAG, data) |
|---|
| 137 | |
|---|
| 138 | |
|---|
| 139 | def crypttext_hasher(): |
|---|
| 140 | return tagged_hasher(CIPHERTEXT_TAG) |
|---|
| 141 | |
|---|
| 142 | |
|---|
| 143 | def crypttext_segment_hash(data): |
|---|
| 144 | return tagged_hash(CIPHERTEXT_SEGMENT_TAG, data) |
|---|
| 145 | |
|---|
| 146 | |
|---|
| 147 | def crypttext_segment_hasher(): |
|---|
| 148 | return tagged_hasher(CIPHERTEXT_SEGMENT_TAG) |
|---|
| 149 | |
|---|
| 150 | |
|---|
| 151 | def plaintext_segment_hash(data): |
|---|
| 152 | return tagged_hash(PLAINTEXT_SEGMENT_TAG, data) |
|---|
| 153 | |
|---|
| 154 | |
|---|
| 155 | def plaintext_segment_hasher(): |
|---|
| 156 | return tagged_hasher(PLAINTEXT_SEGMENT_TAG) |
|---|
| 157 | |
|---|
| 158 | |
|---|
| 159 | KEYLEN = 16 |
|---|
| 160 | IVLEN = 16 |
|---|
| 161 | |
|---|
| 162 | |
|---|
| 163 | def convergence_hash(k, n, segsize, data, convergence): |
|---|
| 164 | h = convergence_hasher(k, n, segsize, convergence) |
|---|
| 165 | h.update(data) |
|---|
| 166 | return h.digest() |
|---|
| 167 | |
|---|
| 168 | |
|---|
| 169 | def _convergence_hasher_tag(k, n, segsize, convergence): |
|---|
| 170 | """ |
|---|
| 171 | Create the convergence hashing tag. |
|---|
| 172 | |
|---|
| 173 | :param int k: Required shares (in [1..256]). |
|---|
| 174 | :param int n: Total shares (in [1..256]). |
|---|
| 175 | :param int segsize: Maximum segment size. |
|---|
| 176 | :param bytes convergence: The convergence secret. |
|---|
| 177 | |
|---|
| 178 | :return bytes: The bytestring to use as a tag in the convergence hash. |
|---|
| 179 | """ |
|---|
| 180 | assert isinstance(convergence, bytes) |
|---|
| 181 | if k > n: |
|---|
| 182 | raise ValueError( |
|---|
| 183 | "k > n not allowed; k = {}, n = {}".format(k, n), |
|---|
| 184 | ) |
|---|
| 185 | if k < 1 or n < 1: |
|---|
| 186 | # It doesn't make sense to have zero shares. Zero shares carry no |
|---|
| 187 | # information, cannot encode any part of the application data. |
|---|
| 188 | raise ValueError( |
|---|
| 189 | "k, n < 1 not allowed; k = {}, n = {}".format(k, n), |
|---|
| 190 | ) |
|---|
| 191 | if k > 256 or n > 256: |
|---|
| 192 | # ZFEC supports encoding application data into a maximum of 256 |
|---|
| 193 | # shares. If we ignore the limitations of ZFEC, it may be fine to use |
|---|
| 194 | # a configuration with more shares than that and it may be fine to |
|---|
| 195 | # construct a convergence tag from such a configuration. Since ZFEC |
|---|
| 196 | # is the only supported encoder, though, this is moot for now. |
|---|
| 197 | raise ValueError( |
|---|
| 198 | "k, n > 256 not allowed; k = {}, n = {}".format(k, n), |
|---|
| 199 | ) |
|---|
| 200 | param_tag = netstring(b"%d,%d,%d" % (k, n, segsize)) |
|---|
| 201 | tag = CONVERGENT_ENCRYPTION_TAG + netstring(convergence) + param_tag |
|---|
| 202 | return tag |
|---|
| 203 | |
|---|
| 204 | |
|---|
| 205 | def convergence_hasher(k, n, segsize, convergence): |
|---|
| 206 | tag = _convergence_hasher_tag(k, n, segsize, convergence) |
|---|
| 207 | return tagged_hasher(tag, KEYLEN) |
|---|
| 208 | |
|---|
| 209 | |
|---|
| 210 | def random_key(): |
|---|
| 211 | return os.urandom(KEYLEN) |
|---|
| 212 | |
|---|
| 213 | |
|---|
| 214 | def my_renewal_secret_hash(my_secret): |
|---|
| 215 | return tagged_hash(my_secret, CLIENT_RENEWAL_TAG) |
|---|
| 216 | |
|---|
| 217 | |
|---|
| 218 | def my_cancel_secret_hash(my_secret): |
|---|
| 219 | return tagged_hash(my_secret, CLIENT_CANCEL_TAG) |
|---|
| 220 | |
|---|
| 221 | |
|---|
| 222 | def file_renewal_secret_hash(client_renewal_secret, storage_index): |
|---|
| 223 | return tagged_pair_hash(FILE_RENEWAL_TAG, |
|---|
| 224 | client_renewal_secret, storage_index) |
|---|
| 225 | |
|---|
| 226 | |
|---|
| 227 | def file_cancel_secret_hash(client_cancel_secret, storage_index): |
|---|
| 228 | return tagged_pair_hash(FILE_CANCEL_TAG, |
|---|
| 229 | client_cancel_secret, storage_index) |
|---|
| 230 | |
|---|
| 231 | |
|---|
| 232 | def bucket_renewal_secret_hash(file_renewal_secret, peerid): |
|---|
| 233 | assert len(peerid) == 20, "%s: %r" % (len(peerid), peerid) # binary! |
|---|
| 234 | return tagged_pair_hash(BUCKET_RENEWAL_TAG, file_renewal_secret, peerid) |
|---|
| 235 | |
|---|
| 236 | |
|---|
| 237 | def bucket_cancel_secret_hash(file_cancel_secret, peerid): |
|---|
| 238 | assert len(peerid) == 20, "%s: %r" % (len(peerid), peerid) # binary! |
|---|
| 239 | return tagged_pair_hash(BUCKET_CANCEL_TAG, file_cancel_secret, peerid) |
|---|
| 240 | |
|---|
| 241 | |
|---|
| 242 | def _xor(a, b): |
|---|
| 243 | return b"".join([byteschr(c ^ b) for c in bytes(a)]) |
|---|
| 244 | |
|---|
| 245 | |
|---|
| 246 | def hmac(tag, data): |
|---|
| 247 | tag = bytes(tag) # Make sure it matches Python 3 behavior |
|---|
| 248 | ikey = _xor(tag, 0x36) |
|---|
| 249 | okey = _xor(tag, 0x5c) |
|---|
| 250 | h1 = hashlib.sha256(ikey + data).digest() |
|---|
| 251 | h2 = hashlib.sha256(okey + h1).digest() |
|---|
| 252 | return h2 |
|---|
| 253 | |
|---|
| 254 | |
|---|
| 255 | def mutable_rwcap_key_hash(iv, writekey): |
|---|
| 256 | return tagged_pair_hash(DIRNODE_CHILD_WRITECAP_TAG, iv, writekey, KEYLEN) |
|---|
| 257 | |
|---|
| 258 | |
|---|
| 259 | def mutable_rwcap_salt_hash(writekey): |
|---|
| 260 | return tagged_hash(DIRNODE_CHILD_SALT_TAG, writekey, IVLEN) |
|---|
| 261 | |
|---|
| 262 | |
|---|
| 263 | def ssk_writekey_hash(privkey): |
|---|
| 264 | return tagged_hash(MUTABLE_WRITEKEY_TAG, privkey, KEYLEN) |
|---|
| 265 | |
|---|
| 266 | |
|---|
| 267 | def ssk_write_enabler_master_hash(writekey): |
|---|
| 268 | return tagged_hash(MUTABLE_WRITE_ENABLER_MASTER_TAG, writekey) |
|---|
| 269 | |
|---|
| 270 | |
|---|
| 271 | def ssk_write_enabler_hash(writekey, peerid): |
|---|
| 272 | assert len(peerid) == 20, "%s: %r" % (len(peerid), peerid) # binary! |
|---|
| 273 | wem = ssk_write_enabler_master_hash(writekey) |
|---|
| 274 | return tagged_pair_hash(MUTABLE_WRITE_ENABLER_TAG, wem, peerid) |
|---|
| 275 | |
|---|
| 276 | |
|---|
| 277 | def ssk_pubkey_fingerprint_hash(pubkey): |
|---|
| 278 | return tagged_hash(MUTABLE_PUBKEY_TAG, pubkey) |
|---|
| 279 | |
|---|
| 280 | |
|---|
| 281 | def ssk_readkey_hash(writekey): |
|---|
| 282 | return tagged_hash(MUTABLE_READKEY_TAG, writekey, KEYLEN) |
|---|
| 283 | |
|---|
| 284 | |
|---|
| 285 | def ssk_readkey_data_hash(IV, readkey): |
|---|
| 286 | return tagged_pair_hash(MUTABLE_DATAKEY_TAG, IV, readkey, KEYLEN) |
|---|
| 287 | |
|---|
| 288 | |
|---|
| 289 | def ssk_storage_index_hash(readkey): |
|---|
| 290 | return tagged_hash(MUTABLE_STORAGEINDEX_TAG, readkey, KEYLEN) |
|---|
| 291 | |
|---|
| 292 | |
|---|
| 293 | def timing_safe_compare(a, b): |
|---|
| 294 | n = os.urandom(32) |
|---|
| 295 | return bool(tagged_hash(n, a) == tagged_hash(n, b)) |
|---|
| 296 | |
|---|
| 297 | |
|---|
| 298 | BACKUPDB_DIRHASH_TAG = b"allmydata_backupdb_dirhash_v1" |
|---|
| 299 | |
|---|
| 300 | |
|---|
| 301 | def backupdb_dirhash(contents): |
|---|
| 302 | return tagged_hash(BACKUPDB_DIRHASH_TAG, contents) |
|---|
| 303 | |
|---|
| 304 | |
|---|
| 305 | def permute_server_hash(peer_selection_index, server_permutation_seed): |
|---|
| 306 | return hashlib.sha1(peer_selection_index + server_permutation_seed).digest() |
|---|