| 1 | """ |
|---|
| 2 | Base32 encoding. |
|---|
| 3 | |
|---|
| 4 | Ported to Python 3. |
|---|
| 5 | """ |
|---|
| 6 | def backwardscompat_bytes(b): |
|---|
| 7 | return b |
|---|
| 8 | maketrans = bytes.maketrans |
|---|
| 9 | |
|---|
| 10 | from typing import Optional |
|---|
| 11 | import base64 |
|---|
| 12 | |
|---|
| 13 | from allmydata.util.assertutil import precondition |
|---|
| 14 | |
|---|
| 15 | rfc3548_alphabet = b"abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus... |
|---|
| 16 | chars = rfc3548_alphabet |
|---|
| 17 | |
|---|
| 18 | vals = bytes(range(32)) |
|---|
| 19 | c2vtranstable = maketrans(chars, vals) |
|---|
| 20 | v2ctranstable = maketrans(vals, chars) |
|---|
| 21 | identitytranstable = maketrans(b'', b'') |
|---|
| 22 | |
|---|
| 23 | def _get_trailing_chars_without_lsbs(N, d): |
|---|
| 24 | """ |
|---|
| 25 | @return: a list of chars that can legitimately appear in the last place when the least significant N bits are ignored. |
|---|
| 26 | """ |
|---|
| 27 | s = [] |
|---|
| 28 | if N < 4: |
|---|
| 29 | s.extend(_get_trailing_chars_without_lsbs(N+1, d=d)) |
|---|
| 30 | i = 0 |
|---|
| 31 | while i < len(chars): |
|---|
| 32 | if i not in d: |
|---|
| 33 | d[i] = None |
|---|
| 34 | s.append(chars[i:i+1]) |
|---|
| 35 | i = i + 2**N |
|---|
| 36 | return s |
|---|
| 37 | |
|---|
| 38 | def get_trailing_chars_without_lsbs(N): |
|---|
| 39 | precondition((N >= 0) and (N < 5), "N is required to be > 0 and < len(chars).", N=N) |
|---|
| 40 | if N == 0: |
|---|
| 41 | return chars |
|---|
| 42 | d = {} |
|---|
| 43 | return b''.join(_get_trailing_chars_without_lsbs(N, d=d)) |
|---|
| 44 | |
|---|
| 45 | BASE32CHAR = b'['+get_trailing_chars_without_lsbs(0)+b']' |
|---|
| 46 | BASE32CHAR_4bits = b'['+get_trailing_chars_without_lsbs(1)+b']' |
|---|
| 47 | BASE32CHAR_3bits = b'['+get_trailing_chars_without_lsbs(2)+b']' |
|---|
| 48 | BASE32CHAR_2bits = b'['+get_trailing_chars_without_lsbs(3)+b']' |
|---|
| 49 | BASE32CHAR_1bits = b'['+get_trailing_chars_without_lsbs(4)+b']' |
|---|
| 50 | BASE32STR_1byte = BASE32CHAR+BASE32CHAR_3bits |
|---|
| 51 | BASE32STR_2bytes = BASE32CHAR+b'{3}'+BASE32CHAR_1bits |
|---|
| 52 | BASE32STR_3bytes = BASE32CHAR+b'{4}'+BASE32CHAR_4bits |
|---|
| 53 | BASE32STR_4bytes = BASE32CHAR+b'{6}'+BASE32CHAR_2bits |
|---|
| 54 | BASE32STR_anybytes = bytes(b'((?:%s{8})*') % (BASE32CHAR,) + bytes(b"(?:|%s|%s|%s|%s))") % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes) |
|---|
| 55 | |
|---|
| 56 | def b2a(os): # type: (bytes) -> bytes |
|---|
| 57 | """ |
|---|
| 58 | @param os the data to be encoded (as bytes) |
|---|
| 59 | |
|---|
| 60 | @return the contents of os in base-32 encoded form, as bytes |
|---|
| 61 | """ |
|---|
| 62 | return base64.b32encode(os).rstrip(b"=").lower() |
|---|
| 63 | |
|---|
| 64 | def b2a_or_none(os: Optional[bytes]) -> Optional[bytes]: |
|---|
| 65 | if os is not None: |
|---|
| 66 | return b2a(os) |
|---|
| 67 | return None |
|---|
| 68 | |
|---|
| 69 | # b2a() uses the minimal number of quintets sufficient to encode the binary |
|---|
| 70 | # input. It just so happens that the relation is like this (everything is |
|---|
| 71 | # modulo 40 bits). |
|---|
| 72 | # num_qs = NUM_OS_TO_NUM_QS[num_os] |
|---|
| 73 | NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,) |
|---|
| 74 | |
|---|
| 75 | # num_os = NUM_QS_TO_NUM_OS[num_qs], but if not NUM_QS_LEGIT[num_qs] then |
|---|
| 76 | # there is *no* number of octets which would have resulted in this number of |
|---|
| 77 | # quintets, so either the encoded string has been mangled (truncated) or else |
|---|
| 78 | # you were supposed to decode it with a2b_l() (which means you were supposed |
|---|
| 79 | # to know the actual length of the encoded data). |
|---|
| 80 | |
|---|
| 81 | NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4) |
|---|
| 82 | NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,) |
|---|
| 83 | NUM_QS_TO_NUM_BITS=tuple([_x*8 for _x in NUM_QS_TO_NUM_OS]) |
|---|
| 84 | |
|---|
| 85 | # A fast way to determine whether a given string *could* be base-32 encoded data, assuming that the |
|---|
| 86 | # original data had 8K bits for a positive integer K. |
|---|
| 87 | # The boolean value of s8[len(s)%8][ord(s[-1])], where s is the possibly base-32 encoded string |
|---|
| 88 | # tells whether the final character is reasonable. |
|---|
| 89 | def add_check_array(cs, sfmap): |
|---|
| 90 | checka=[0] * 256 |
|---|
| 91 | for c in bytes(cs): |
|---|
| 92 | checka[c] = 1 |
|---|
| 93 | sfmap.append(tuple(checka)) |
|---|
| 94 | |
|---|
| 95 | def init_s8(): |
|---|
| 96 | s8 = [] |
|---|
| 97 | add_check_array(chars, s8) |
|---|
| 98 | for lenmod8 in (1, 2, 3, 4, 5, 6, 7,): |
|---|
| 99 | if NUM_QS_LEGIT[lenmod8]: |
|---|
| 100 | add_check_array(get_trailing_chars_without_lsbs(4-(NUM_QS_TO_NUM_BITS[lenmod8]%5)), s8) |
|---|
| 101 | else: |
|---|
| 102 | add_check_array(b'', s8) |
|---|
| 103 | return tuple(s8) |
|---|
| 104 | s8 = init_s8() |
|---|
| 105 | |
|---|
| 106 | def could_be_base32_encoded(s, s8=s8, tr=bytes.translate, identitytranstable=identitytranstable, chars=chars): |
|---|
| 107 | precondition(isinstance(s, bytes), s) |
|---|
| 108 | if s == b'': |
|---|
| 109 | return True |
|---|
| 110 | s = bytes(s) # On Python 2, make sure we're using modern bytes |
|---|
| 111 | return s8[len(s)%8][s[-1]] and not tr(s, identitytranstable, chars) |
|---|
| 112 | |
|---|
| 113 | def a2b(cs): # type: (bytes) -> bytes |
|---|
| 114 | """ |
|---|
| 115 | @param cs the base-32 encoded data (as bytes) |
|---|
| 116 | """ |
|---|
| 117 | precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs) |
|---|
| 118 | precondition(isinstance(cs, bytes), cs) |
|---|
| 119 | |
|---|
| 120 | cs = cs.upper() |
|---|
| 121 | # Add padding back, to make Python's base64 module happy: |
|---|
| 122 | while (len(cs) * 5) % 8 != 0: |
|---|
| 123 | cs += b"=" |
|---|
| 124 | |
|---|
| 125 | return base64.b32decode(cs) |
|---|
| 126 | |
|---|
| 127 | |
|---|
| 128 | __all__ = ["b2a", "a2b", "b2a_or_none", "BASE32CHAR_3bits", "BASE32CHAR_1bits", "BASE32CHAR", "BASE32STR_anybytes", "could_be_base32_encoded"] |
|---|