Skip to content

Commit

Permalink
New extension: Numerical Encoding V2 (#1450)
Browse files Browse the repository at this point in the history
1. Replaced separate commands and reporters with just reporters (no more
ugly global state that can get broken by unlucky warp timer)
2. Significantly optimized encoding format.

The original extension always uses 6 numbers per UTF-8 character in the
original text. On ASCII this has an efficiency of 1.33 bits/number.

#1449 proposed
length-prefixed encoding which is a good improvement for pure ASCII
strings. Most of the commonly used parts of ASCII have 2 digit or 3
digit decimals codes so let's just say it's an average of about 3.5
numbers per character in the input after adding the length prefix.
That's about 2.28 bits/number of efficiency.

This PR does all encoding and decoding operations on UTF-8 binary
instead. Each group of 3 bits in the binary is encoded by 1 decimal in
[1, 8] (not using 0 to avoid leading zeros), so this has 3 bits/number
of efficiency.

In theory we should be able to get $\log_2{10} \approx 3.321$
bits/number of efficiency with some smarter encoding. But I think this
is already pretty good and the encoding/decoding routines shouldn't be
too hard to port to other languages for people making bots.
  • Loading branch information
GarboMuffin authored May 9, 2024
1 parent 6e8d4fc commit 6784131
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 4 deletions.
6 changes: 3 additions & 3 deletions extensions/cs2627883/numericalencoding.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Name: Numerical Encoding
// Name: Numerical Encoding V1
// ID: cs2627883NumericalEncoding
// Description: Encode strings as numbers for cloud variables.
// Description: Use V2 instead as it is more efficient. V1 only exists for compatibility reasons.
// By: cs2627883 <https://scratch.mit.edu/users/cs2627883/>
// License: MIT

Expand Down Expand Up @@ -59,7 +59,7 @@
getInfo() {
return {
id: "cs2627883NumericalEncoding",
name: Scratch.translate("Numerical Encoding"),
name: Scratch.translate("Numerical Encoding V1"),
blocks: [
{
opcode: "NumericalEncode",
Expand Down
1 change: 1 addition & 0 deletions extensions/extensions.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
"DNin/wake-lock",
"Skyhigh173/json",
"mbw/xml",
"numerical-encoding-2",
"cs2627883/numericalencoding",
"DT/cameracontrols",
"TheShovel/CanvasEffects",
Expand Down
215 changes: 215 additions & 0 deletions extensions/numerical-encoding-2.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
// Name: Numerical Encoding V2
// ID: numericalencoding2
// Description: Encode strings as numbers for cloud variables. Not compatible with V1 due to using much more efficient format.
// License: MPL-2.0

(function (Scratch) {
"use strict";

const textEncoder = new TextEncoder();
const textDecoder = new TextDecoder();

/**
* @param {Uint8Array} bytes
* @returns {string}
*/
const encodeBinary = (bytes) => {
// Pre-allocating buffer seems to be much faster than string concatenation
const buffer = new Uint8Array(Math.ceil((bytes.length * 8) / 3));
let ptr = 0;

for (var i = 0; i <= bytes.length - 3; i += 3) {
// AAAAAAAA BBBBBBBB CCCCCCCC
// 11122233 34445556 66777888
const a = bytes[i];
const b = bytes[i + 1];
const c = bytes[i + 2];
buffer[ptr++] = 49 + (a >> 5);
buffer[ptr++] = 49 + ((a >> 2) & 0b111);
buffer[ptr++] = 49 + (((a & 0b11) << 1) | (b >> 7));
buffer[ptr++] = 49 + ((b >> 4) & 0b111);
buffer[ptr++] = 49 + ((b >> 1) & 0b111);
buffer[ptr++] = 49 + (((b & 0b1) << 2) | (c >> 6));
buffer[ptr++] = 49 + ((c >> 3) & 0b111);
buffer[ptr++] = 49 + (c & 0b111);
}

switch (bytes.length - i) {
case 1: {
// AAAAAAAA
// 11122233 3
const a = bytes[i];
buffer[ptr++] = 49 + (a >> 5);
buffer[ptr++] = 49 + ((a >> 2) & 0b111);
buffer[ptr++] = 49 + ((a & 0b11) << 1);
break;
}

case 2: {
// AAAAAAAA BBBBBBBB
// 11122233 34445556 66
const a = bytes[i];
const b = bytes[i + 1];
buffer[ptr++] = 49 + (a >> 5);
buffer[ptr++] = 49 + ((a >> 2) & 0b111);
buffer[ptr++] = 49 + (((a & 0b11) << 1) | (b >> 7));
buffer[ptr++] = 49 + ((b >> 4) & 0b111);
buffer[ptr++] = 49 + ((b >> 1) & 0b111);
buffer[ptr++] = 49 + ((b & 0b1) << 2);
break;
}
}

return textDecoder.decode(buffer);
};

/**
* @param {string} string
* @returns {Uint8Array}
*/
const decodeBinary = (string) => {
const encodedBytes = Math.floor((string.length * 3) / 8);
const result = new Uint8Array(encodedBytes);
let ptr = 0;

for (var i = 0; i <= string.length - 8; i += 8) {
// AAA BBB CCC DDD EEE FFF GGG HHH
// 111 111 112 222 222 233 333 333
const a = string.charCodeAt(i) - 49;
const b = string.charCodeAt(i + 1) - 49;
const c = string.charCodeAt(i + 2) - 49;
const d = string.charCodeAt(i + 3) - 49;
const e = string.charCodeAt(i + 4) - 49;
const f = string.charCodeAt(i + 5) - 49;
const g = string.charCodeAt(i + 6) - 49;
const h = string.charCodeAt(i + 7) - 49;
result[ptr++] = (a << 5) | (b << 2) | (c >> 1);
result[ptr++] = ((c & 0b1) << 7) | (d << 4) | (e << 1) | (f >> 2);
result[ptr++] = ((f & 0b11) << 6) | (g << 3) | h;
}

switch (encodedBytes - ptr) {
case 1: {
// AAA BBB CCC
// 111 111 11
const a = string.charCodeAt(i) - 49;
const b = string.charCodeAt(i + 1) - 49;
const c = string.charCodeAt(i + 2) - 49;
result[ptr] = (a << 5) | (b << 2) | (c >> 1);
break;
}

case 2: {
// AAA BBB CCC DDD EEE FFF
// 111 111 112 222 222 2
const a = string.charCodeAt(i) - 49;
const b = string.charCodeAt(i + 1) - 49;
const c = string.charCodeAt(i + 2) - 49;
const d = string.charCodeAt(i + 3) - 49;
const e = string.charCodeAt(i + 4) - 49;
const f = string.charCodeAt(i + 5) - 49;
result[ptr++] = (a << 5) | (b << 2) | (c >> 1);
result[ptr] = ((c & 0b1) << 7) | (d << 4) | (e << 1) | (f >> 2);
break;
}
}

return result;
};

/**
* @param {string} text
* @returns {string}
*/
const encodeText = (text) => encodeBinary(textEncoder.encode(text));

/**
* @param {string} text
* @returns {string}
*/
const decodeText = (text) => {
// All characters must be in range [1, 8]
for (let i = 0; i < text.length; i++) {
const ch = text.charCodeAt(i);
if (ch < 49 || ch > 56) {
return "";
}
}
return textDecoder.decode(decodeBinary(text));
};

// Uncomment this to validate that the encoding and decoding is correct.
/*
const stressValidate = () => {
for (let i = 0; i < 100000; i++) {
const randomLength = Math.floor(Math.random() * 1000);
const randomArray = new Uint8Array(randomLength);
for (let j = 0; j < randomLength; j++) {
randomArray[j] = Math.floor(Math.random() * 256);
}
const encoded = encodeBinary(randomArray);
const decoded = decodeBinary(encoded);
if (decoded.length !== randomArray.length) {
debugger;
}
for (let j = 0; j < randomArray.length; j++) {
if (randomArray[j] !== decoded[j]) {
debugger;
}
}
}
};
console.time('validate');
stressValidate();
console.timeEnd('validate');
*/

class NumericalEncodingV2 {
getInfo() {
const example = Scratch.translate({
default: "Hello",
description:
"Used as default input value to show how the encoding works",
});

return {
id: "numericalencoding2",
name: Scratch.translate("Numerical Encoding V2"),
blocks: [
{
blockType: Scratch.BlockType.REPORTER,
opcode: "encode",
text: Scratch.translate("encode [TEXT] as numbers"),
arguments: {
TEXT: {
type: Scratch.ArgumentType.STRING,
defaultValue: example,
},
},
},
{
blockType: Scratch.BlockType.REPORTER,
opcode: "decode",
text: Scratch.translate("decode [TEXT] as text"),
arguments: {
TEXT: {
type: Scratch.ArgumentType.STRING,
defaultValue: encodeText(example),
},
},
},
],
};
}

encode({ TEXT }) {
return encodeText(Scratch.Cast.toString(TEXT));
}

decode({ TEXT }) {
return decodeText(Scratch.Cast.toString(TEXT));
}
}

Scratch.extensions.register(new NumericalEncodingV2());
})(Scratch);
2 changes: 1 addition & 1 deletion images/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ All images in this folder are licensed under the [GNU General Public License ver
## TheShovel/CanvasEffects.svg
- Created by @Pizzalover4783 in https://github.com/TurboWarp/extensions/issues/90#issuecomment-1526886264.

## cs2627883/numericalencoding.svg
## numerical-encoding-2.svg
- Created by [@NexusKitten](https://scratch.mit.edu/users/namelesscat/).
- Silkscreen font used under [Open Font License](https://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=OFL)
- Dango based on dango from [Twemoji](https://twemoji.twitter.com/) under [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/).
Expand Down
File renamed without changes

0 comments on commit 6784131

Please sign in to comment.