From 99c6c7f8bdf02535f98d1df3f81113ecfc2ee43c Mon Sep 17 00:00:00 2001
From: Simon von Hackewitz <simon.von.hackewitz@tokiwa.software>
Date: Thu, 11 Jul 2024 17:43:23 +0200
Subject: [PATCH 1/4] implement encodings base32 and base32hex base32 (running
 its test) causes a java exception, left there for debugging

---
 lib/encodings/base32.fz                       | 169 ++++++++++++++++++
 lib/encodings/base32hex.fz                    |  45 +++++
 tests/base32/Makefile                         |  25 +++
 tests/base32/base32_test.fz                   | 161 +++++++++++++++++
 tests/base32/base32_test.fz.expected_err      |   0
 tests/base32/base32_test.fz.expected_out      |  24 +++
 tests/base32hex/Makefile                      |  25 +++
 tests/base32hex/base32hex_test.fz             | 161 +++++++++++++++++
 .../base32hex/base32hex_test.fz.expected_err  |   0
 .../base32hex/base32hex_test.fz.expected_out  |  24 +++
 10 files changed, 634 insertions(+)
 create mode 100644 lib/encodings/base32.fz
 create mode 100644 lib/encodings/base32hex.fz
 create mode 100644 tests/base32/Makefile
 create mode 100644 tests/base32/base32_test.fz
 create mode 100644 tests/base32/base32_test.fz.expected_err
 create mode 100644 tests/base32/base32_test.fz.expected_out
 create mode 100644 tests/base32hex/Makefile
 create mode 100644 tests/base32hex/base32hex_test.fz
 create mode 100644 tests/base32hex/base32hex_test.fz.expected_err
 create mode 100644 tests/base32hex/base32hex_test.fz.expected_out

diff --git a/lib/encodings/base32.fz b/lib/encodings/base32.fz
new file mode 100644
index 0000000000..761a043c8a
--- /dev/null
+++ b/lib/encodings/base32.fz
@@ -0,0 +1,169 @@
+# This file is part of the Fuzion language implementation.
+#
+# The Fuzion language implementation is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, version 3 of the License.
+#
+# The Fuzion language implementation is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License along with The
+# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.
+
+
+# -----------------------------------------------------------------------
+#
+#  Tokiwa Software GmbH, Germany
+#
+#  Source code of Fuzion standard library feature base32
+#
+# -----------------------------------------------------------------------
+
+# Base32 encoding and decoding as defined in RFC 4648
+#
+public base32 is
+
+  # allows redefinition e.g. for base32hex
+  module get_alphabet => "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567".utf8.as_array
+  module encoding_name => "base32"
+
+  # NYI: compiler does currently not optimize features without arguments to treat them like fields
+  alphabet := get_alphabet
+
+  # decode a valid base32 characters to 5 bits
+  # visibility module to allow redefinition in base32hex
+  module quintet_bits(n u8) =>
+    if n >= 65 && n <= 90        # case A-Z
+      n.as_u64 - 65
+
+    else #if n >= 50 && n <= 55  # case 2-7
+      n.as_u64 - 24
+
+  # checks if a character is valid in the encoding
+  # visibility module to allow redefinition in base32hex
+  module is_valid(c u8) =>
+    (c >= 65 && c <= 90) || (c >= 50 && c <= 55)
+
+  # Encodes a given byte sequence in base32, output is padded to multiple of 8
+  # returns a sequence of ascii values
+  public encode(data) =>
+
+    # extract 8 quintets from 40 bit (of an 64 bit integer)
+    enc40(n u64) =>
+      block := array i32 8 (i->((n >> (35-i*5).as_u64) & 31).as_i32)
+      block.map (x->alphabet[x])
+
+    for
+      res Sequence u8 := [], next        # the encoded input data
+      i := 0, i+1
+      last_n u64 := 0, i %% 5 ?  0 : n
+      b in data
+      n := (last_n << 8) + b.as_u64
+      next := if i%5=4 then res ++ enc40 n
+              else          res
+    else
+      bit_len := data.length%5 * 8       # number of bits in last input block
+
+      if bit_len = 0
+        res
+      else
+        block_len := bit_len/5 + (bit_len%%5 ? 0 : 1)  # number ob characters in last block
+        res ++ (enc40 (last_n<<((u64 40)-bit_len.as_u64))).slice 0 block_len ++ (array u8 (8-block_len) _->61)
+
+
+  # Encodes a given byte sequence in base32, output is padded to multiple of 8
+  # returns a string
+  public encode_to_string(data array u8) =>
+    String.type.from_bytes (encode data)
+
+
+  # decodes a base32 string, decoding is strict as required by RFC 4648
+  # lowercase letters, non alphabet characters, line breaks, missing padding cause errors
+  # NYI: decoding does currently not reject encodings where the padding bits have not been set to zero prior to encoding
+  #      therefore in some cases multiple encodings can be decoded to the same data
+  #      See RFC4648 section 3.5: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5
+  public decode_str(data String) =>
+    decode data.utf8.as_array
+
+
+  # decodes a sequence of ASCII characters, decoding is strict as required by RFC 4648
+  # lowercase letters, non alphabet characters, line breaks, missing padding cause errors
+  # NYI: decoding does currently not reject encodings where the padding bits have not been set to zero prior to encoding
+  #      therefore in some cases multiple encodings can be decoded to the same data
+  #      See RFC4648 section 3.5: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5
+  public decode(data) =>
+
+    # determine size of padding, i.e. number of '=' (61 in ASCII) at the end
+    pad_size :=
+      for
+        pad_len := 0, pad_len + (data[i] = 61 ? 1 : 0)
+        i in (data.indices.reverse)
+        _ in 1..6                             # padding can not be longer than 6
+      while data[i] = 61
+      else
+        pad_len = 2 ? 1                       # padding can not be 2
+                    : (pad_len = 5 ? 4        # padding can not be 5
+                                   : pad_len)
+
+    dec_input(i) =>
+
+      if i >= data.length
+        error "length of input data is not multiple of 8, as required by RFC4648"
+      else
+        c := data[i]
+
+        # base32 alphabet character
+        if (is_valid c)
+          outcome (quintet_bits c)
+
+        # padding character =
+        else if c = 61
+          if i < data.length - pad_size
+            # only complain about pad car if length is ok, otherwise wrong length is probably more helpful
+            if data.length%%8
+              error """
+                    padding character '=' not allowed within the input data, only at the very end, \
+                    as required by RFC464 (padding length of 2 or 5 can never occur)"""
+            else
+              error "length of input data is not multiple of 8, as required by RFC4648"
+          else outcome (u64 0)  # replace padding with zeros for decoding
+
+        # line break
+        else if c = 10 || c = 13
+          error """
+                line breaks are not allowed within encoded data, as required by RFC464, found \
+                {if c=10 then "LF" else "CR"} at position $i"""
+
+        # other non alphabet character
+        else
+          inv_char := String.type.from_bytes (data.slice i (i+4 > data.length ? data.length : i+4))
+                            .substring_codepoint 0 1
+
+          error "invalid $encoding_name input at byte position $i, decoding to unicode character '$inv_char'"
+
+    for
+      res list u8 := nil, res ++ bytes  # contains the decoded data at the end
+      nxt := 0, nxt + 8
+      last_err := false, is_err
+      qnt_last list (outcome u64) := nil, quintets
+
+    while nxt < data.length && !last_err
+    do
+      quintets := (nxt :: +1).map(i->dec_input i).take 8
+      is_err := (quintets ∃ el -> el.is_error)
+
+      # convert quintets in 40 bit number, break up in three bytes
+      bits := if is_err then 0
+              else quintets.map (.val)
+                           .zip (((u64 35) :: -5).take 8) (<<)
+                           .foldf (|) (u64 0)
+      bytes := [(u64 32), 24, 16, 8, 0].map (i->(bits >> i).low8bits)
+
+    else
+      if last_err
+        (qnt_last.filter (e -> e.is_error)).first.err
+      else
+        dump_size := 5 - ((40 - (pad_size * 5)) / 8)    # number of decoded bytes caused by zeroed padding
+        outcome (res.take res.count-dump_size).as_array # remove zero bytes caused by padding
diff --git a/lib/encodings/base32hex.fz b/lib/encodings/base32hex.fz
new file mode 100644
index 0000000000..00d1b3964c
--- /dev/null
+++ b/lib/encodings/base32hex.fz
@@ -0,0 +1,45 @@
+# This file is part of the Fuzion language implementation.
+#
+# The Fuzion language implementation is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, version 3 of the License.
+#
+# The Fuzion language implementation is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License along with The
+# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.
+
+
+# -----------------------------------------------------------------------
+#
+#  Tokiwa Software GmbH, Germany
+#
+#  Source code of Fuzion standard library feature base32hex
+#
+# -----------------------------------------------------------------------
+
+# Base32hex encoding and decoding as defined in RFC 4648
+#
+public base32hex : base32 is
+
+  # allows redefinition e.g. for base32hexhex
+  redef get_alphabet => "0123456789ABCDEFGHIJKLMNOPQRSTUV".utf8.as_array
+  redef encoding_name => "base32hex"
+
+  # NYI: This causes a java exception
+  alphabet := get_alphabet
+
+  # decode a valid base32hex characters to 5 bits
+  redef quintet_bits(n u8) =>
+    if n >= 65 && n <= 86        # case A-V
+      n.as_u64 - 55
+
+    else #if n >= 48 && n <= 57  # case 0-9
+      n.as_u64 - 48
+
+  # checks if a character is valid in the encoding
+  redef is_valid(c u8) =>
+    (c >= 65 && c <= 86) || (c >= 48 && c <= 57)
diff --git a/tests/base32/Makefile b/tests/base32/Makefile
new file mode 100644
index 0000000000..e01fca7f55
--- /dev/null
+++ b/tests/base32/Makefile
@@ -0,0 +1,25 @@
+# This file is part of the Fuzion language implementation.
+#
+# The Fuzion language implementation is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, version 3 of the License.
+#
+# The Fuzion language implementation is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License along with The
+# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.
+
+
+# -----------------------------------------------------------------------
+#
+#  Tokiwa Software GmbH, Germany
+#
+#  Source code of Fuzion test Makefile
+#
+# -----------------------------------------------------------------------
+
+override NAME = base32_test
+include ../simple.mk
diff --git a/tests/base32/base32_test.fz b/tests/base32/base32_test.fz
new file mode 100644
index 0000000000..5b070d5dd0
--- /dev/null
+++ b/tests/base32/base32_test.fz
@@ -0,0 +1,161 @@
+# This file is part of the Fuzion language implementation.
+#
+# The Fuzion language implementation is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, version 3 of the License.
+#
+# The Fuzion language implementation is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License along with The
+# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.
+
+
+# -----------------------------------------------------------------------
+#
+#  Tokiwa Software GmbH, Germany
+#
+#  Source code of Fuzion test base32
+#
+# -----------------------------------------------------------------------
+
+base32_test is
+
+  my_c : choice String (array u8) is
+  mk_choice(x my_c) => x
+
+  # RFC 4618 test vectors
+  base32_test_vectors array (tuple my_c String) :=
+    [(mk_choice $"", ""),
+     (mk_choice $"f", "MY======"),
+     (mk_choice "fo", "MZXQ===="),
+     (mk_choice "foo", "MZXW6==="),
+     (mk_choice "foob", "MZXW6YQ="),
+     (mk_choice "fooba", "MZXW6YTB"),
+     (mk_choice "foobar", "MZXW6YTBOI======")]
+
+  # Additional test vectors
+  own_test_vectors array (tuple my_c String) :=
+    [(mk_choice [(u8 0), 0, 0, 0],    "AAAAAAA="),
+     (mk_choice [(u8 255), 255, 255], "77776==="),
+     (mk_choice (array u8 5 _->255), "77777777"),
+     (mk_choice "123890ABCXYZabcxyz_:;>~<%&\$§!", "GEZDGOBZGBAUEQ2YLFNGCYTDPB4XUXZ2HM7H4PBFEYSMFJZB"),
+     (mk_choice """
+     Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidun\
+     t ut labore et dolore magna aliquyam erat, sed dia voluptua. At vero eos et accusam et justo d\
+     uo dolores et ea rebum.
+
+     Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.""",
+     """
+     JRXXEZLNEBUXA43VNUQGI33MN5ZCA43JOQQGC3LFOQWCAY3PNZZWK5DFOR2XEIDTMFSGS4DTMNUW4ZZAMVWGS5DSFQQHGZ\
+     LEEBSGSYLNEBXG63TVNV4SAZLJOJWW6ZBAORSW24DPOIQGS3TWNFSHK3TUEB2XIIDMMFRG64TFEBSXIIDEN5WG64TFEBWW\
+     CZ3OMEQGC3DJOF2XSYLNEBSXEYLUFQQHGZLEEBSGSYJAOZXWY5LQOR2WCLRAIF2CA5TFOJXSAZLPOMQGK5BAMFRWG5LTMF\
+     WSAZLUEBVHK43UN4QGI5LPEBSG63DPOJSXGIDFOQQGKYJAOJSWE5LNFYFAUU3UMV2CAY3MNF2GCIDLMFZWIIDHOVRGK4TH\
+     OJSW4LBANZXSA43FMEQHIYLLNFWWC5DBEBZWC3TDOR2XGIDFON2CATDPOJSW2IDJOBZXK3JAMRXWY33SEBZWS5BAMFWWK5BO""")]
+
+
+
+  # ENCODING
+
+  say """
+      Testing base32 encoding..."""
+
+  enc_test(test_vectors array (tuple my_c String), name String) =>
+    for results list (outcome String) := nil, results.concat (out:nil)
+        tup in test_vectors
+    do
+      (plain, code_expected) := tup
+      code_actual := match plain
+                        str String   => encodings.base32.encode_to_string str.utf8.as_array
+                        arr array u8 => encodings.base32.encode_to_string arr
+      out :=
+        if code_actual = code_expected
+          outcome "ok"
+        else
+          plain_str := match plain
+                         str String   => str
+                         arr array u8 => $arr
+          error "encode '$plain_str' produced '$code_actual' but should have been '$code_expected'"
+    else
+      if results ∀ (.ok)
+        say "$name test vectors are encoded correctly"
+      else
+        say "Failed encoding $name test vectors:"
+        results.filter (.is_error)
+              .map (.err.as_string)
+              .map ("  "+)
+              .for_each say
+        say ""
+  enc_test base32_test_vectors "RFC 4648"
+  enc_test own_test_vectors "Additional"
+
+
+
+  # DECODING
+
+  say """
+      \n
+      Testing base32 decoding..."""
+  dec_test(test_vectors array (tuple my_c String), name String) =>
+    for results list (outcome String) := nil, results.concat (out:nil)
+        tup in test_vectors
+    do
+      (plain_exp, code) := tup
+      out :=
+        match encodings.base32.decode code.utf8.as_array
+          actual array u8 =>
+            match plain_exp
+              str String   =>
+                if str = String.from_bytes actual
+                  outcome "ok"
+                else
+                  error "decoding $code produced '{String.from_bytes actual}' but should have been '$str'"
+              arr array u8 =>
+                if arr.length=actual.length && ((arr.zip actual (a,b->a=b)) ∀ x->x)
+                  outcome "ok"
+                else
+                  error "decoding $code produced '$actual' but should have been '$arr'"
+          e error => error "decoding failed when it should not have: {e.msg}"
+    else
+      if results ∀ (.ok)
+        say "$name test vectors are decoded correctly"
+      else
+        say "Failed decoding $name test vectors:"
+        results.filter (.is_error)
+              .map (.err.as_string)
+              .map ("  "+)
+              .for_each say
+        say ""
+  dec_test base32_test_vectors "RFC 4648"
+  dec_test own_test_vectors "Additional"
+
+
+
+  # ERROR MESSAGES
+
+  say """
+      \n
+      Test error messages when decoding broken base32...
+      """
+
+  broken_enc := ["""
+                 MZXW
+                 6===""",         # line break
+                 "MZXW6==",       # padding to short
+                 "MZX=====",      # invalid padding length / encoding length with valid overall length
+                 "11111111",      # non alphabet character
+                 "88888888",      # non alphabet character
+                 "MZ=XW6==",      # pad char within encoding
+                 "MZXW6====",     # padding to long
+                 "MZX W6===",     # space
+                 "MZXW;6===",     # non alphabet ascii character
+                 "MZX🌍6==="]     # non alphabet multi byte unicode character
+
+  for t in broken_enc do
+    yak "$t: "
+    say (match encodings.base32.decode_str t
+        arr array u8 => String.type.from_bytes arr
+        e error => e.as_string)
+  say ""
diff --git a/tests/base32/base32_test.fz.expected_err b/tests/base32/base32_test.fz.expected_err
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/base32/base32_test.fz.expected_out b/tests/base32/base32_test.fz.expected_out
new file mode 100644
index 0000000000..bd163dfd57
--- /dev/null
+++ b/tests/base32/base32_test.fz.expected_out
@@ -0,0 +1,24 @@
+Testing base32 encoding...
+RFC 4648 test vectors are encoded correctly
+Additional test vectors are encoded correctly
+
+
+Testing base32 decoding...
+RFC 4648 test vectors are decoded correctly
+Additional test vectors are decoded correctly
+
+
+Test error messages when decoding broken base32...
+
+MZXW
+6===: error: line breaks are not allowed within encoded data, as required by RFC464, found LF at position 4
+MZXW6==: error: length of input data is not multiple of 8, as required by RFC4648
+MZX=====: error: padding character '=' not allowed within the input data, only at the very end, as required by RFC464 (padding length of 2 or 5 can never occur)
+11111111: error: invalid base32 input at byte position 0, decoding to unicode character '1'
+88888888: error: invalid base32 input at byte position 0, decoding to unicode character '8'
+MZ=XW6==: error: padding character '=' not allowed within the input data, only at the very end, as required by RFC464 (padding length of 2 or 5 can never occur)
+MZXW6====: error: length of input data is not multiple of 8, as required by RFC4648
+MZX W6===: error: invalid base32 input at byte position 3, decoding to unicode character ' '
+MZXW;6===: error: invalid base32 input at byte position 4, decoding to unicode character ';'
+MZX🌍6===: error: invalid base32 input at byte position 3, decoding to unicode character '🌍'
+
diff --git a/tests/base32hex/Makefile b/tests/base32hex/Makefile
new file mode 100644
index 0000000000..5ba386cf13
--- /dev/null
+++ b/tests/base32hex/Makefile
@@ -0,0 +1,25 @@
+# This file is part of the Fuzion language implementation.
+#
+# The Fuzion language implementation is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, version 3 of the License.
+#
+# The Fuzion language implementation is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License along with The
+# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.
+
+
+# -----------------------------------------------------------------------
+#
+#  Tokiwa Software GmbH, Germany
+#
+#  Source code of Fuzion test Makefile
+#
+# -----------------------------------------------------------------------
+
+override NAME = base32hex_test
+include ../simple.mk
diff --git a/tests/base32hex/base32hex_test.fz b/tests/base32hex/base32hex_test.fz
new file mode 100644
index 0000000000..e0d869e0f8
--- /dev/null
+++ b/tests/base32hex/base32hex_test.fz
@@ -0,0 +1,161 @@
+# This file is part of the Fuzion language implementation.
+#
+# The Fuzion language implementation is free software: you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, version 3 of the License.
+#
+# The Fuzion language implementation is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+# License for more details.
+#
+# You should have received a copy of the GNU General Public License along with The
+# Fuzion language implementation.  If not, see <https://www.gnu.org/licenses/>.
+
+
+# -----------------------------------------------------------------------
+#
+#  Tokiwa Software GmbH, Germany
+#
+#  Source code of Fuzion test base32hex
+#
+# -----------------------------------------------------------------------
+
+base32hex_test is
+
+  my_c : choice String (array u8) is
+  mk_choice(x my_c) => x
+
+  # RFC 4618 test vectors
+  base32hex_test_vectors array (tuple my_c String) :=
+    [(mk_choice $"", ""),
+     (mk_choice "f", "CO======"),
+     (mk_choice "fo", "CPNG===="),
+     (mk_choice "foo", "CPNMU==="),
+     (mk_choice "foob", "CPNMUOG="),
+     (mk_choice "fooba", "CPNMUOJ1"),
+     (mk_choice "foobar", "CPNMUOJ1E8======")]
+
+  # Additional test vectors
+  own_test_vectors array (tuple my_c String) :=
+    [(mk_choice [(u8 0), 0, 0, 0],    "0000000="),
+     (mk_choice [(u8 255), 255, 255], "VVVVU==="),
+     (mk_choice (array u8 5 _->255), "VVVVVVVV"),
+     (mk_choice "123890ABCXYZabcxyz_:;>~<%&\$§!", "64P36E1P610K4GQOB5D62OJ3F1SNKNPQ7CV7SF154OIC59P1"),
+     (mk_choice """
+     Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidun\
+     t ut labore et dolore magna aliquyam erat, sed dia voluptua. At vero eos et accusam et justo d\
+     uo dolores et ea rebum.
+
+     Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.""",
+     """
+     9HNN4PBD41KN0SRLDKG68RRCDTP20SR9EGG62RB5EGM20ORFDPPMAT35EHQN483JC5I6IS3JCDKMSPP0CLM6IT3I5GG76P\
+     B441I6IOBD41N6URJLDLSI0PB9E9MMUP10EHIMQS3FE8G6IRJMD5I7ARJK41QN883CC5H6USJ541IN8834DTM6USJ541MM\
+     2PREC4G62R39E5QNIOBD41IN4OBK5GG76PB441I6IO90EPNMOTBGEHQM2BH085Q20TJ5E9NI0PBFECG6AT10C5HM6TBJC5\
+     MI0PBK41L7ASRKDSG68TBF41I6UR3FE9IN6835EGG6AO90E9IM4TBD5O50KKRKCLQ20ORCD5Q6283BC5PM8837ELH6ASJ7\
+     E9IMSB10DPNI0SR5C4G78OBBD5MM2T3141PM2RJ3EHQN6835EDQ20J3FE9IMQ839E1PNAR90CHNMORRI41PMIT10C5MMAT1E""")]
+
+
+
+  # ENCODING
+
+  say """
+      Testing base32hex encoding..."""
+
+  enc_test(test_vectors array (tuple my_c String), name String) =>
+    for results list (outcome String) := nil, results.concat (out:nil)
+        tup in test_vectors
+    do
+      (plain, code_expected) := tup
+      code_actual := match plain
+                        str String   => encodings.base32hex.encode_to_string str.utf8.as_array
+                        arr array u8 => encodings.base32hex.encode_to_string arr
+      out :=
+        if code_actual = code_expected
+          outcome "ok"
+        else
+          plain_str := match plain
+                         str String   => str
+                         arr array u8 => $arr
+          error "encode '$plain_str' produced '$code_actual' but should have been '$code_expected'"
+    else
+      if results ∀ (.ok)
+        say "$name test vectors are encoded correctly"
+      else
+        say "Failed encoding $name test vectors:"
+        results.filter (.is_error)
+              .map (.err.as_string)
+              .map ("  "+)
+              .for_each say
+        say ""
+  enc_test base32hex_test_vectors "RFC 4648"
+  enc_test own_test_vectors "Additional"
+
+
+
+  # DECODING
+
+  say """
+      \n
+      Testing base32hex decoding..."""
+  dec_test(test_vectors array (tuple my_c String), name String) =>
+    for results list (outcome String) := nil, results.concat (out:nil)
+        tup in test_vectors
+    do
+      (plain_exp, code) := tup
+      out :=
+        match encodings.base32hex.decode code.utf8.as_array
+          actual array u8 =>
+            match plain_exp
+              str String   =>
+                if str = String.from_bytes actual
+                  outcome "ok"
+                else
+                  error "decoding $code produced '{String.from_bytes actual}' but should have been '$str'"
+              arr array u8 =>
+                if arr.length=actual.length && ((arr.zip actual (a,b->a=b)) ∀ x->x)
+                  outcome "ok"
+                else
+                  error "decoding $code produced '$actual' but should have been '$arr'"
+          e error => error "decoding failed when it should not have: {e.msg}"
+    else
+      if results ∀ (.ok)
+        say "$name test vectors are decoded correctly"
+      else
+        say "Failed decoding $name test vectors:"
+        results.filter (.is_error)
+              .map (.err.as_string)
+              .map ("  "+)
+              .for_each say
+        say ""
+  dec_test base32hex_test_vectors "RFC 4648"
+  dec_test own_test_vectors "Additional"
+
+
+
+  # ERROR MESSAGES
+
+  say """
+      \n
+      Test error messages when decoding broken base32hex...
+      """
+
+  broken_enc := ["""
+                 CPN
+                 MU===""",        # line break
+                 "CPNMU==",       # padding to short
+                 "WWWWWWWW",      # non alphabet character
+                 "ZZZZZZZZ",      # non alphabet character
+                 "CPN=====",      # invalid padding length / encoding length with valid overall length
+                 "CPN=MU==",      # pad char within encoding
+                 "CPNMU====",     # padding to long
+                 "CPNM U===",     # space
+                 "CP;NMU===",     # non alphabet ascii character
+                 "CPNM🌍U=="]    # non alphabet multi byte unicode character
+
+  for t in broken_enc do
+    yak "$t: "
+    say (match encodings.base32hex.decode_str t
+        arr array u8 => String.type.from_bytes arr
+        e error => e.as_string)
+  say ""
diff --git a/tests/base32hex/base32hex_test.fz.expected_err b/tests/base32hex/base32hex_test.fz.expected_err
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/base32hex/base32hex_test.fz.expected_out b/tests/base32hex/base32hex_test.fz.expected_out
new file mode 100644
index 0000000000..b065916a20
--- /dev/null
+++ b/tests/base32hex/base32hex_test.fz.expected_out
@@ -0,0 +1,24 @@
+Testing base32hex encoding...
+RFC 4648 test vectors are encoded correctly
+Additional test vectors are encoded correctly
+
+
+Testing base32hex decoding...
+RFC 4648 test vectors are decoded correctly
+Additional test vectors are decoded correctly
+
+
+Test error messages when decoding broken base32hex...
+
+CPN
+MU===: error: line breaks are not allowed within encoded data, as required by RFC464, found LF at position 3
+CPNMU==: error: length of input data is not multiple of 8, as required by RFC4648
+WWWWWWWW: error: invalid base32hex input at byte position 0, decoding to unicode character 'W'
+ZZZZZZZZ: error: invalid base32hex input at byte position 0, decoding to unicode character 'Z'
+CPN=====: error: padding character '=' not allowed within the input data, only at the very end, as required by RFC464 (padding length of 2 or 5 can never occur)
+CPN=MU==: error: padding character '=' not allowed within the input data, only at the very end, as required by RFC464 (padding length of 2 or 5 can never occur)
+CPNMU====: error: length of input data is not multiple of 8, as required by RFC4648
+CPNM U===: error: invalid base32hex input at byte position 4, decoding to unicode character ' '
+CP;NMU===: error: invalid base32hex input at byte position 2, decoding to unicode character ';'
+CPNM🌍U==: error: invalid base32hex input at byte position 4, decoding to unicode character '🌍'
+

From 9e2c6111ff1c166f641d4a55c9884df96ab16b47 Mon Sep 17 00:00:00 2001
From: Simon von Hackewitz <simon.von.hackewitz@tokiwa.software>
Date: Thu, 11 Jul 2024 17:54:20 +0200
Subject: [PATCH 2/4] remove bug from base32hex

---
 lib/encodings/base32hex.fz | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/lib/encodings/base32hex.fz b/lib/encodings/base32hex.fz
index 00d1b3964c..8ca96f8099 100644
--- a/lib/encodings/base32hex.fz
+++ b/lib/encodings/base32hex.fz
@@ -29,9 +29,6 @@ public base32hex : base32 is
   redef get_alphabet => "0123456789ABCDEFGHIJKLMNOPQRSTUV".utf8.as_array
   redef encoding_name => "base32hex"
 
-  # NYI: This causes a java exception
-  alphabet := get_alphabet
-
   # decode a valid base32hex characters to 5 bits
   redef quintet_bits(n u8) =>
     if n >= 65 && n <= 86        # case A-V

From 3bd67a8b87adaf5d9b7dfa7e8b3955e20d9ef2cd Mon Sep 17 00:00:00 2001
From: Simon von Hackewitz <simon.von.hackewitz@tokiwa.software>
Date: Fri, 12 Jul 2024 12:29:52 +0200
Subject: [PATCH 3/4] apply suggestions from code review

---
 lib/encodings/base32.fz    | 16 ++++++++--------
 lib/encodings/base32hex.fz |  3 ++-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/lib/encodings/base32.fz b/lib/encodings/base32.fz
index 761a043c8a..29f5db4edf 100644
--- a/lib/encodings/base32.fz
+++ b/lib/encodings/base32.fz
@@ -22,6 +22,7 @@
 # -----------------------------------------------------------------------
 
 # Base32 encoding and decoding as defined in RFC 4648
+# https://datatracker.ietf.org/doc/html/rfc4648#section-6
 #
 public base32 is
 
@@ -32,8 +33,7 @@ public base32 is
   # NYI: compiler does currently not optimize features without arguments to treat them like fields
   alphabet := get_alphabet
 
-  # decode a valid base32 characters to 5 bits
-  # visibility module to allow redefinition in base32hex
+  # decode a valid base32 character to 5 bits
   module quintet_bits(n u8) =>
     if n >= 65 && n <= 90        # case A-Z
       n.as_u64 - 65
@@ -42,21 +42,21 @@ public base32 is
       n.as_u64 - 24
 
   # checks if a character is valid in the encoding
-  # visibility module to allow redefinition in base32hex
   module is_valid(c u8) =>
     (c >= 65 && c <= 90) || (c >= 50 && c <= 55)
 
   # Encodes a given byte sequence in base32, output is padded to multiple of 8
   # returns a sequence of ascii values
-  public encode(data) =>
+  public encode(data array u8) =>
 
     # extract 8 quintets from 40 bit (of an 64 bit integer)
     enc40(n u64) =>
-      block := array i32 8 (i->((n >> (35-i*5).as_u64) & 31).as_i32)
-      block.map (x->alphabet[x])
+      array u8 8 i->
+        idx := ((n >> (35-i*5).as_u64) & 31).as_i32
+        alphabet[idx]
 
     for
-      res Sequence u8 := [], next        # the encoded input data
+      res Sequence u8 := [], next        # the encoded data
       i := 0, i+1
       last_n u64 := 0, i %% 5 ?  0 : n
       b in data
@@ -93,7 +93,7 @@ public base32 is
   # NYI: decoding does currently not reject encodings where the padding bits have not been set to zero prior to encoding
   #      therefore in some cases multiple encodings can be decoded to the same data
   #      See RFC4648 section 3.5: https://datatracker.ietf.org/doc/html/rfc4648#section-3.5
-  public decode(data) =>
+  public decode(data array u8) =>
 
     # determine size of padding, i.e. number of '=' (61 in ASCII) at the end
     pad_size :=
diff --git a/lib/encodings/base32hex.fz b/lib/encodings/base32hex.fz
index 8ca96f8099..1be70403f5 100644
--- a/lib/encodings/base32hex.fz
+++ b/lib/encodings/base32hex.fz
@@ -22,6 +22,7 @@
 # -----------------------------------------------------------------------
 
 # Base32hex encoding and decoding as defined in RFC 4648
+# https://datatracker.ietf.org/doc/html/rfc4648#section-7
 #
 public base32hex : base32 is
 
@@ -29,7 +30,7 @@ public base32hex : base32 is
   redef get_alphabet => "0123456789ABCDEFGHIJKLMNOPQRSTUV".utf8.as_array
   redef encoding_name => "base32hex"
 
-  # decode a valid base32hex characters to 5 bits
+  # decode a valid base32hex character to 5 bits
   redef quintet_bits(n u8) =>
     if n >= 65 && n <= 86        # case A-V
       n.as_u64 - 55

From a3d6fd1e7187dc49652486a82beca8fa26a10901 Mon Sep 17 00:00:00 2001
From: Simon von Hackewitz <simon.von.hackewitz@tokiwa.software>
Date: Fri, 12 Jul 2024 15:05:26 +0200
Subject: [PATCH 4/4] improve comparisons

---
 lib/encodings/base32.fz    | 6 +++---
 lib/encodings/base32hex.fz | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/encodings/base32.fz b/lib/encodings/base32.fz
index 29f5db4edf..0e614cf561 100644
--- a/lib/encodings/base32.fz
+++ b/lib/encodings/base32.fz
@@ -35,15 +35,15 @@ public base32 is
 
   # decode a valid base32 character to 5 bits
   module quintet_bits(n u8) =>
-    if n >= 65 && n <= 90        # case A-Z
+    if 65 <= n <= 90        # case A-Z
       n.as_u64 - 65
 
-    else #if n >= 50 && n <= 55  # case 2-7
+    else #if 50 <= n <= 55  # case 2-7
       n.as_u64 - 24
 
   # checks if a character is valid in the encoding
   module is_valid(c u8) =>
-    (c >= 65 && c <= 90) || (c >= 50 && c <= 55)
+    (65 <= c <= 90) || (50 <= c <= 55)
 
   # Encodes a given byte sequence in base32, output is padded to multiple of 8
   # returns a sequence of ascii values
diff --git a/lib/encodings/base32hex.fz b/lib/encodings/base32hex.fz
index 1be70403f5..2289d9b846 100644
--- a/lib/encodings/base32hex.fz
+++ b/lib/encodings/base32hex.fz
@@ -32,12 +32,12 @@ public base32hex : base32 is
 
   # decode a valid base32hex character to 5 bits
   redef quintet_bits(n u8) =>
-    if n >= 65 && n <= 86        # case A-V
+    if 65 <= n <= 86        # case A-V
       n.as_u64 - 55
 
-    else #if n >= 48 && n <= 57  # case 0-9
+    else #if 48 <= n <= 57  # case 0-9
       n.as_u64 - 48
 
   # checks if a character is valid in the encoding
   redef is_valid(c u8) =>
-    (c >= 65 && c <= 86) || (c >= 48 && c <= 57)
+    (65 <= c <= 86) || (48 <= c <= 57)