Context Navigation

source: trunk/src/allmydata/util/base62.py

Visit:

Last change on this file was 7ca3a4c6, checked in by Alexandre Detiste <alexandre.detiste@…>, at 2024-12-21T18:08:16Z
define function before use
Property mode set to `100644`
File size: 4.5 KB

Line
1	"""
2	Base62 encoding.
3
4	Ported to Python 3.
5	"""
6
7	maketrans = bytes.maketrans
8	translate = bytes.translate
9
10	from allmydata.util.mathutil import log_ceil, log_floor
11
12	chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
13
14	BASE62CHAR = b'[' + chars + b']'
15
16	def byteschr(x):
17	return bytes([x])
18
19	vals = b''.join([byteschr(i) for i in range(62)])
20	c2vtranstable = maketrans(chars, vals)
21	v2ctranstable = maketrans(vals, chars)
22	identitytranstable = maketrans(chars, chars)
23
24	def b2a(os):
25	"""
26	@param os the data to be encoded (as bytes)
27
28	@return the contents of os in base-62 encoded form, as bytes
29	"""
30	cs = b2a_l(os, len(os)*8)
31	assert num_octets_that_encode_to_this_many_chars(len(cs)) == len(os), "%s != %s, numchars: %s" % (num_octets_that_encode_to_this_many_chars(len(cs)), len(os), len(cs))
32	return cs
33
34	def b2a_l(os, lengthinbits):
35	"""
36	@param os the data to be encoded (as bytes)
37	@param lengthinbits the number of bits of data in os to be encoded
38
39	b2a_l() will generate a base-62 encoded string big enough to encode
40	lengthinbits bits. So for example if os is 3 bytes long and lengthinbits is
41	17, then b2a_l() will generate a 3-character- long base-62 encoded string
42	(since 3 chars is sufficient to encode more than 2^17 values). If os is 3
43	bytes long and lengthinbits is 18 (or None), then b2a_l() will generate a
44	4-character string (since 4 chars are required to hold 2^18 values). Note
45	that if os is 3 bytes long and lengthinbits is 17, the least significant 7
46	bits of os are ignored.
47
48	Warning: if you generate a base-62 encoded string with b2a_l(), and then someone else tries to
49	decode it by calling a2b() instead of a2b_l(), then they will (potentially) get a different
50	string than the one you encoded! So use b2a_l() only when you are sure that the encoding and
51	decoding sides know exactly which lengthinbits to use. If you do not have a way for the
52	encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and
53	a2b(). The only drawback to using b2a() over b2a_l() is that when you have a number of
54	bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-62 encoded
55	string that is one or two characters longer than necessary.
56
57	@return the contents of os in base-62 encoded form, as bytes
58	"""
59	# We call bytes() again for Python 2, to ensure literals are using future's
60	# Python 3-compatible variant.
61	os = [o for o in reversed(bytes(os))] # treat os as big-endian -- and we want to process the least-significant o first
62
63	value = 0
64	numvalues = 1 # the number of possible values that value could be
65	for o in os:
66	o *= numvalues
67	value += o
68	numvalues *= 256
69
70	chars = []
71	while numvalues > 0:
72	chars.append(value % 62)
73	value //= 62
74	numvalues //= 62
75
76	return translate(bytes([c for c in reversed(chars)]), v2ctranstable) # make it big-endian
77
78	def num_octets_that_encode_to_this_many_chars(numcs):
79	return log_floor(62**numcs, 256)
80
81	def num_chars_that_this_many_octets_encode_to(numos):
82	return log_ceil(256**numos, 62)
83
84	def a2b(cs):
85	"""
86	@param cs the base-62 encoded data (a string)
87	"""
88	return a2b_l(cs, num_octets_that_encode_to_this_many_chars(len(cs))*8)
89
90	def a2b_l(cs, lengthinbits):
91	"""
92	@param lengthinbits the number of bits of data in encoded into cs
93
94	a2b_l() will return a result just big enough to hold lengthinbits bits. So
95	for example if cs is 2 characters long (encoding between 5 and 12 bits worth
96	of data) and lengthinbits is 8, then a2b_l() will return a string of length
97	1 (since 1 byte is sufficient to store 8 bits), but if lengthinbits is 9,
98	then a2b_l() will return a string of length 2.
99
100	Please see the warning in the docstring of b2a_l() regarding the use of
101	b2a() versus b2a_l().
102
103	@return the data encoded in cs, as bytes
104	"""
105	# We call bytes() again for Python 2, to ensure literals are using future's
106	# Python 3-compatible variant.
107	cs = [c for c in reversed(bytes(translate(cs, c2vtranstable)))] # treat cs as big-endian -- and we want to process the least-significant c first
108
109	value = 0
110	numvalues = 1 # the number of possible values that value could be
111	for c in cs:
112	c *= numvalues
113	value += c
114	numvalues *= 62
115
116	numvalues = 2**lengthinbits
117	result_bytes = []
118	while numvalues > 1:
119	result_bytes.append(value % 256)
120	value //= 256
121	numvalues //= 256
122
123	return bytes([b for b in reversed(result_bytes)]) # make it big-endian

Note: See TracBrowser for help on using the repository browser.

Download in other formats: