-
Notifications
You must be signed in to change notification settings - Fork 19
/
base122.js
257 lines (233 loc) · 8.92 KB
/
base122.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
// Provides functions for encoding/decoding data to and from base-122.
let fs = require('fs')
, readline = require('readline')
;
const kString = 0
, kUint8Array = 1
, kDefaultMimeType = "image/jpeg"
, kDebug = false
, kIllegals = [
0 // null
, 10 // newline
, 13 // carriage return
, 34 // double quote
, 38 // ampersand
, 92 // backslash
]
, kShortened = 0b111 // Uses the illegal index to signify the last two-byte char encodes <= 7 bits.
;
/**
* Encodes raw data into base-122.
* @param {Uint8Array|Buffer|Array|String} rawData - The data to be encoded. This can be an array
* or Buffer with raw data bytes or a string of bytes (i.e. the type of argument to btoa())
* @returns {Array} The base-122 encoded data as a regular array of UTF-8 character byte values.
*/
function encode(rawData) {
let dataType = typeof(rawData) == 'string' ? kString : kUint8Array
, curIndex = 0
, curBit = 0 // Points to current bit needed
, curMask = 0b10000000
, outData = []
, getByte = i => rawData[i]
;
if (dataType == kString) {
getByte = (i) => {
let val = rawData.codePointAt(i);
if (val > 255) {
throw "Unexpected code point at position: " + i + ". Expected value [0,255]. Got: " + val;
}
return val;
}
}
// Get seven bits of input data. Returns false if there is no input left.
function get7() {
if (curIndex >= rawData.length) return false;
// Shift, mask, unshift to get first part.
let firstByte = getByte(curIndex);
let firstPart = ((0b11111110 >>> curBit) & firstByte) << curBit;
// Align it to a seven bit chunk.
firstPart >>= 1;
// Check if we need to go to the next byte for more bits.
curBit += 7;
if (curBit < 8) return firstPart; // Do not need next byte.
curBit -= 8;
curIndex++;
// Now we want bits [0..curBit] of the next byte if it exists.
if (curIndex >= rawData.length) return firstPart;
let secondByte = getByte(curIndex);
let secondPart = ((0xFF00 >>> curBit) & secondByte) & 0xFF;
// Align it.
secondPart >>= 8 - curBit;
return firstPart | secondPart;
}
while(true) {
// Grab 7 bits.
let bits = get7();
if (bits === false) break;
debugLog('Seven input bits', print7Bits(bits), bits);
let illegalIndex = kIllegals.indexOf(bits);
if (illegalIndex != -1) {
// Since this will be a two-byte character, get the next chunk of seven bits.
let nextBits = get7();
debugLog('Handle illegal sequence', print7Bits(bits), print7Bits(nextBits));
let b1 = 0b11000010, b2 = 0b10000000;
if (nextBits === false) {
debugLog('Last seven bits are an illegal sequence.');
b1 |= (0b111 & kShortened) << 2
nextBits = bits; // Encode these bits after the shortened signifier.
} else {
b1 |= (0b111 & illegalIndex) << 2;
}
// Push first bit onto first byte, remaining 6 onto second.
let firstBit = (nextBits & 0b01000000) > 0 ? 1 : 0;
b1 |= firstBit;
b2 |= nextBits & 0b00111111;
outData.push(b1);
outData.push(b2);
} else {
outData.push(bits);
}
}
return outData;
}
/**
* Re-encodes an HTML or text file with base-64 data to one with base-122 data.
* @param {String} inpath - The filepath of the input file.
* @param {String} outpath - The filepath of the output file.
* @param {Object} options
* @param {Boolean} options.html - Parse the input file as HTML and re-encode base64 data URIs.
* @param {Boolean} options.addDecoder - If HTML, insert the minified decoder before </body>.
* @param {Function} callback - Called upon completion.
*/
function encodeFile(inpath, outpath, options, callback) {
let inStream = fs.createReadStream(inpath, {encoding: 'utf8'});
let outStream = fs.createWriteStream(outpath, {defaultEncoding: 'utf8'});
let decoderScript = options.addDecoder ? fs.readFileSync('decode.min.js') : '';
outStream.on('error', () => { throw 'Error writing to ' + outpath; });
inStream.on('error', () => { throw 'Error reading from ' + inpath; });
if (!options.html) {
// This is a plain base-64 encoded file.
let fileContents = "";
inStream.on('data', (chunk) => { fileContents += chunk; });
inStream.on('end', () => {
let encoded = encodeFromBase64(fileContents);
let encodedStr = String.fromCharCode(...encoded);
outStream.end(encodedStr, 'binary', callback);
});
return;
}
let rl = readline.createInterface({ input: inStream });
rl.on('line', (line) => {
let regexp = /src=[\"\']data:(.*?);base64,(.*?)[\"\']/ig;
let bodyRegExp = /<\/body>/i;
let results;
let prevIndex = 0;
while ((results = regexp.exec(line)) !== null) {
outStream.write(line.substring(prevIndex, results.index));
let mimetype = results[1];
let encoded = encodeFromBase64(results[2]);
let encodedStr = String.fromCharCode(...encoded);
outStream.write('data-b122="');
outStream.write(encodedStr, 'binary');
outStream.write('"');
if (mimetype != kDefaultMimeType) outStream.write(' data-b122m="' + mimetype + '"');
prevIndex = regexp.lastIndex;
}
if (options.addDecoder) {
// Check for </body> to insert decoder.
if ((results = bodyRegExp.exec(line)) != null) {
// </body> cannot be valid if it's before any data URI.
if (results.index >= prevIndex) {
outStream.write(line.substring(prevIndex, results.index) + '<script>' + decoderScript
+ '</script>');
prevIndex = results.index;
}
}
}
outStream.write(line.substring(prevIndex) + "\n");
});
rl.on('close', () => {
outStream.end('', callback);
});
}
/**
* Re-encodes a base-64 encoded string into base-122.
* @param {String} base64String - A base-64 encoded string.
* @returns {Array} - The base-122 encoded data.
*/
function encodeFromBase64(base64String) {
// "binary" encoding encodes each byte in a separate character.
let strData = Buffer.from(base64String, 'base64').toString('binary');
return encode(strData);
}
/**
* Decodes base-122 encoded data back to the original data.
* @param {Uint8Array|Buffer|String} rawData - The data to be decoded. This can be a Uint8Array
* or Buffer with raw data bytes or a string of bytes (i.e. the type of argument to btoa())
* @returns {Array} The data in a regular array representing byte values.
*/
function decode(base122Data) {
let strData = typeof(base122Data) == 'string' ? base122Data : utf8DataToString(base122Data)
, decoded = []
, decodedIndex = 0
, curByte = 0
, bitOfByte = 0
;
function push7(byte) {
byte <<= 1;
// Align this byte to offset for current byte.
curByte |= (byte >>> bitOfByte);
bitOfByte += 7;
if (bitOfByte >= 8) {
decoded.push(curByte);
bitOfByte -= 8;
// Now, take the remainder, left shift by what has been taken.
curByte = (byte << (7 - bitOfByte)) & 255;
}
}
for (let i = 0; i < strData.length; i++) {
let c = strData.charCodeAt(i);
// Check if this is a two-byte character.
if (c > 127) {
// Note, the charCodeAt will give the codePoint, thus
// 0b110xxxxx 0b10yyyyyy will give => xxxxxyyyyyy
let illegalIndex = (c >>> 8) & 7; // 7 = 0b111.
// We have to first check if this is a shortened two-byte character, i.e. if it only
// encodes <= 7 bits.
if (illegalIndex != kShortened) push7(kIllegals[illegalIndex]);
// Always push the rest.
push7(c & 127);
} else {
// One byte characters can be pushed directly.
push7(c);
}
}
return decoded;
}
/**
* Converts a sequence of UTF-8 bytes to a string.
* @param {Uint8Array|Buffer} data - The UTF-8 data.
* @returns {String} A string with each character representing a code point.
*/
function utf8DataToString(data) {
return Buffer.from(data).toString('utf-8');
}
// For debugging.
function debugLog() {
if (kDebug) console.log(...arguments);
}
// For debugging.
function print7Bits(num) {
return "0000000".substring(num.toString(2).length) + num.toString(2);
}
// For debugging.
function print8Bits(num) {
return "00000000".substring(num.toString(2).length) + num.toString(2);
}
module.exports = {
encode: encode,
decode: decode,
encodeFromBase64: encodeFromBase64,
encodeFile: encodeFile,
utf8DataToString: utf8DataToString
};