-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathalpha.py
executable file
·154 lines (115 loc) · 4.11 KB
/
alpha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#!/usr/bin/env python3
#Encodes a base 10 integer to a string representation with the given alphabet.
# To pad strings to a fixed size you can put the max b10 value in pad_b10_max.
# For example, to pad a values with 256 options (aka, a byte), you would put 256 here.
# https://stackoverflow.com/questions/1119722/base-62-conversion
def b10_to_token(b10,alphabet,pad_b10_max=0):
token=''
base=len(alphabet)
#Encode
while b10>0:
b10,rem=divmod(b10,base)
token=alphabet[rem]+token
#Got a zero
if len(token)==0:
token=alphabet[0]
#Add padding if specified
if pad_b10_max>0:
pad_width=len(b10_to_token(pad_b10_max-1,alphabet,0))
while len(token)<pad_width:
token=alphabet[0]+token
return token
#Takes a token and returns a base 10 integer based on the given alphabet (works with or without padding).
# https://stackoverflow.com/questions/1119722/base-62-conversion
def token_to_b10(token,alphabet):
size=len(token)
base=len(alphabet)
b10=0
place=0
for ch in token:
power=(size-place-1)
b10+=alphabet.index(ch)*(base**power)
place+=1
return b10
#Takes a section of bytes, turns them into an integer, returns the base 10 value of them.
# Warning: Be careful with the width of said bytes! No clue what a width>8 will yield!
def bytes_to_b10(bs):
return int.from_bytes(bs,'big')
#Takes a block of bytes, turns them into a base 10 integer, and returns a padded token for a given alphabet.
# Warning: Be careful with the width of said bytes! No clue what a width>8 will yield!
def bytes_to_token(bs,alphabet):
#Get base 10 representation of the bytes
b10=bytes_to_b10(bs)
#Get the max value of the given block size (will pad the token with zeros)
b10_max=bytes_to_b10(b'\xff'*len(bs))
#Convert to token and return
return b10_to_token(b10,alphabet,b10_max)
#Converts to bytes to a token string with the given alphabet.
# Warning: Be careful with the width of said bytes! No clue what a width>8 will yield!
def bytes_to_token_str(bs,alphabet,block_width=8):
size=len(bs)
ptr=0
token_str=''
#Go through all blocks
while ptr<size:
#Extract the block
block=bs[ptr:ptr+block_width]
#Add to token string
token_str+=bytes_to_token(block,alphabet)
#Next block
ptr+=block_width
#Return the token string
return token_str
#Converts to bytes to a token string with the given alphabet.
# Warning: Be careful with the width of said bytes! No clue what a width>8 will yield!
def token_str_to_bytes(token_str,alphabet,block_width=8):
size=len(token_str)
ptr=0
bs=b''
#Get the max value of the given block size (will pad the token with zeros)
b10_max=bytes_to_b10(b'\xff'*block_width)
#Get the width of the max value of the given block size encoded in the alphabet
enc_width=len(b10_to_token(b10_max,alphabet,b10_max))
#Go through the tokens in the token string
while ptr<size:
#Grab the token
token=token_str[ptr:ptr+enc_width]
#Convert token to base 10
b10=token_to_b10(token,alphabet)
#Get the length of the token
token_length=len(token)
#Token length is less than a full block token length -> last block
if token_length<enc_width:
#Get the max value representable by the alphabet with the a token of the same length
max_token_val=len(alphabet)**token_length
#Find out how many blocks we have
while block_width>0:
#We already know we have too many, so decrement before check
block_width-=1
#Get the new max value
b10_max=bytes_to_b10(b'\xff'*block_width)
#If we crossed the boundary, we found the real number of blocks - break
if b10_max<max_token_val:
break
#Decode base 10 to base 256 (aka bytes)
bs+=b10.to_bytes(block_width,'big')
#Next token
ptr+=enc_width
#Return bytes
return bs
if __name__=='__main__':
#Alphabet
alphabet='0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
#Initial string
bs=b'\x00\x00\x00\x00\x00\x00\x00\x00hello th\x00\x00\x00\x00\x00\x00\x00\x00partner\x00\x00'
print('ini(%d)=%s'%(len(bs),bs))
#Encode
enc=bytes_to_token_str(bs,alphabet,8)
print('enc=%s'%enc)
#Decode
dec=token_str_to_bytes(enc,alphabet,8)
print('dec(%d)=%s'%(len(dec),dec))
if bs!=dec:
print('ERROR')
else:
print('SUCCESS')