-
Notifications
You must be signed in to change notification settings - Fork 238
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New extension: Numerical Encoding V2 (#1450)
1. Replaced separate commands and reporters with just reporters (no more ugly global state that can get broken by unlucky warp timer) 2. Significantly optimized encoding format. The original extension always uses 6 numbers per UTF-8 character in the original text. On ASCII this has an efficiency of 1.33 bits/number. #1449 proposed length-prefixed encoding which is a good improvement for pure ASCII strings. Most of the commonly used parts of ASCII have 2 digit or 3 digit decimals codes so let's just say it's an average of about 3.5 numbers per character in the input after adding the length prefix. That's about 2.28 bits/number of efficiency. This PR does all encoding and decoding operations on UTF-8 binary instead. Each group of 3 bits in the binary is encoded by 1 decimal in [1, 8] (not using 0 to avoid leading zeros), so this has 3 bits/number of efficiency. In theory we should be able to get $\log_2{10} \approx 3.321$ bits/number of efficiency with some smarter encoding. But I think this is already pretty good and the encoding/decoding routines shouldn't be too hard to port to other languages for people making bots.
- Loading branch information
1 parent
6e8d4fc
commit 6784131
Showing
5 changed files
with
220 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
// Name: Numerical Encoding V2 | ||
// ID: numericalencoding2 | ||
// Description: Encode strings as numbers for cloud variables. Not compatible with V1 due to using much more efficient format. | ||
// License: MPL-2.0 | ||
|
||
(function (Scratch) { | ||
"use strict"; | ||
|
||
const textEncoder = new TextEncoder(); | ||
const textDecoder = new TextDecoder(); | ||
|
||
/** | ||
* @param {Uint8Array} bytes | ||
* @returns {string} | ||
*/ | ||
const encodeBinary = (bytes) => { | ||
// Pre-allocating buffer seems to be much faster than string concatenation | ||
const buffer = new Uint8Array(Math.ceil((bytes.length * 8) / 3)); | ||
let ptr = 0; | ||
|
||
for (var i = 0; i <= bytes.length - 3; i += 3) { | ||
// AAAAAAAA BBBBBBBB CCCCCCCC | ||
// 11122233 34445556 66777888 | ||
const a = bytes[i]; | ||
const b = bytes[i + 1]; | ||
const c = bytes[i + 2]; | ||
buffer[ptr++] = 49 + (a >> 5); | ||
buffer[ptr++] = 49 + ((a >> 2) & 0b111); | ||
buffer[ptr++] = 49 + (((a & 0b11) << 1) | (b >> 7)); | ||
buffer[ptr++] = 49 + ((b >> 4) & 0b111); | ||
buffer[ptr++] = 49 + ((b >> 1) & 0b111); | ||
buffer[ptr++] = 49 + (((b & 0b1) << 2) | (c >> 6)); | ||
buffer[ptr++] = 49 + ((c >> 3) & 0b111); | ||
buffer[ptr++] = 49 + (c & 0b111); | ||
} | ||
|
||
switch (bytes.length - i) { | ||
case 1: { | ||
// AAAAAAAA | ||
// 11122233 3 | ||
const a = bytes[i]; | ||
buffer[ptr++] = 49 + (a >> 5); | ||
buffer[ptr++] = 49 + ((a >> 2) & 0b111); | ||
buffer[ptr++] = 49 + ((a & 0b11) << 1); | ||
break; | ||
} | ||
|
||
case 2: { | ||
// AAAAAAAA BBBBBBBB | ||
// 11122233 34445556 66 | ||
const a = bytes[i]; | ||
const b = bytes[i + 1]; | ||
buffer[ptr++] = 49 + (a >> 5); | ||
buffer[ptr++] = 49 + ((a >> 2) & 0b111); | ||
buffer[ptr++] = 49 + (((a & 0b11) << 1) | (b >> 7)); | ||
buffer[ptr++] = 49 + ((b >> 4) & 0b111); | ||
buffer[ptr++] = 49 + ((b >> 1) & 0b111); | ||
buffer[ptr++] = 49 + ((b & 0b1) << 2); | ||
break; | ||
} | ||
} | ||
|
||
return textDecoder.decode(buffer); | ||
}; | ||
|
||
/** | ||
* @param {string} string | ||
* @returns {Uint8Array} | ||
*/ | ||
const decodeBinary = (string) => { | ||
const encodedBytes = Math.floor((string.length * 3) / 8); | ||
const result = new Uint8Array(encodedBytes); | ||
let ptr = 0; | ||
|
||
for (var i = 0; i <= string.length - 8; i += 8) { | ||
// AAA BBB CCC DDD EEE FFF GGG HHH | ||
// 111 111 112 222 222 233 333 333 | ||
const a = string.charCodeAt(i) - 49; | ||
const b = string.charCodeAt(i + 1) - 49; | ||
const c = string.charCodeAt(i + 2) - 49; | ||
const d = string.charCodeAt(i + 3) - 49; | ||
const e = string.charCodeAt(i + 4) - 49; | ||
const f = string.charCodeAt(i + 5) - 49; | ||
const g = string.charCodeAt(i + 6) - 49; | ||
const h = string.charCodeAt(i + 7) - 49; | ||
result[ptr++] = (a << 5) | (b << 2) | (c >> 1); | ||
result[ptr++] = ((c & 0b1) << 7) | (d << 4) | (e << 1) | (f >> 2); | ||
result[ptr++] = ((f & 0b11) << 6) | (g << 3) | h; | ||
} | ||
|
||
switch (encodedBytes - ptr) { | ||
case 1: { | ||
// AAA BBB CCC | ||
// 111 111 11 | ||
const a = string.charCodeAt(i) - 49; | ||
const b = string.charCodeAt(i + 1) - 49; | ||
const c = string.charCodeAt(i + 2) - 49; | ||
result[ptr] = (a << 5) | (b << 2) | (c >> 1); | ||
break; | ||
} | ||
|
||
case 2: { | ||
// AAA BBB CCC DDD EEE FFF | ||
// 111 111 112 222 222 2 | ||
const a = string.charCodeAt(i) - 49; | ||
const b = string.charCodeAt(i + 1) - 49; | ||
const c = string.charCodeAt(i + 2) - 49; | ||
const d = string.charCodeAt(i + 3) - 49; | ||
const e = string.charCodeAt(i + 4) - 49; | ||
const f = string.charCodeAt(i + 5) - 49; | ||
result[ptr++] = (a << 5) | (b << 2) | (c >> 1); | ||
result[ptr] = ((c & 0b1) << 7) | (d << 4) | (e << 1) | (f >> 2); | ||
break; | ||
} | ||
} | ||
|
||
return result; | ||
}; | ||
|
||
/** | ||
* @param {string} text | ||
* @returns {string} | ||
*/ | ||
const encodeText = (text) => encodeBinary(textEncoder.encode(text)); | ||
|
||
/** | ||
* @param {string} text | ||
* @returns {string} | ||
*/ | ||
const decodeText = (text) => { | ||
// All characters must be in range [1, 8] | ||
for (let i = 0; i < text.length; i++) { | ||
const ch = text.charCodeAt(i); | ||
if (ch < 49 || ch > 56) { | ||
return ""; | ||
} | ||
} | ||
return textDecoder.decode(decodeBinary(text)); | ||
}; | ||
|
||
// Uncomment this to validate that the encoding and decoding is correct. | ||
/* | ||
const stressValidate = () => { | ||
for (let i = 0; i < 100000; i++) { | ||
const randomLength = Math.floor(Math.random() * 1000); | ||
const randomArray = new Uint8Array(randomLength); | ||
for (let j = 0; j < randomLength; j++) { | ||
randomArray[j] = Math.floor(Math.random() * 256); | ||
} | ||
const encoded = encodeBinary(randomArray); | ||
const decoded = decodeBinary(encoded); | ||
if (decoded.length !== randomArray.length) { | ||
debugger; | ||
} | ||
for (let j = 0; j < randomArray.length; j++) { | ||
if (randomArray[j] !== decoded[j]) { | ||
debugger; | ||
} | ||
} | ||
} | ||
}; | ||
console.time('validate'); | ||
stressValidate(); | ||
console.timeEnd('validate'); | ||
*/ | ||
|
||
class NumericalEncodingV2 { | ||
getInfo() { | ||
const example = Scratch.translate({ | ||
default: "Hello", | ||
description: | ||
"Used as default input value to show how the encoding works", | ||
}); | ||
|
||
return { | ||
id: "numericalencoding2", | ||
name: Scratch.translate("Numerical Encoding V2"), | ||
blocks: [ | ||
{ | ||
blockType: Scratch.BlockType.REPORTER, | ||
opcode: "encode", | ||
text: Scratch.translate("encode [TEXT] as numbers"), | ||
arguments: { | ||
TEXT: { | ||
type: Scratch.ArgumentType.STRING, | ||
defaultValue: example, | ||
}, | ||
}, | ||
}, | ||
{ | ||
blockType: Scratch.BlockType.REPORTER, | ||
opcode: "decode", | ||
text: Scratch.translate("decode [TEXT] as text"), | ||
arguments: { | ||
TEXT: { | ||
type: Scratch.ArgumentType.STRING, | ||
defaultValue: encodeText(example), | ||
}, | ||
}, | ||
}, | ||
], | ||
}; | ||
} | ||
|
||
encode({ TEXT }) { | ||
return encodeText(Scratch.Cast.toString(TEXT)); | ||
} | ||
|
||
decode({ TEXT }) { | ||
return decodeText(Scratch.Cast.toString(TEXT)); | ||
} | ||
} | ||
|
||
Scratch.extensions.register(new NumericalEncodingV2()); | ||
})(Scratch); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes