From e39289b46bc5c43a3469ec8154b81ee863fd88ba Mon Sep 17 00:00:00 2001 From: Richard Viney Date: Thu, 6 Feb 2025 13:15:57 +1300 Subject: [PATCH] Add bit_array.to_string_lossy --- CHANGELOG.md | 4 ++++ src/gleam/bit_array.gleam | 40 +++++++++++++++++++++++++++++++++ test/gleam/bit_array_test.gleam | 20 +++++++++++++++++ 3 files changed, 64 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0cfada0..24dfa0ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased + +- The `bit_array` module gains the `to_string_lossy` function. + ## v0.58.0 - 2025-03-23 - The deprecated `pop` and `pop_map` functions have been removed from the diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index 8dbefb50..732c9cf9 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -95,6 +95,46 @@ pub fn to_string(bits: BitArray) -> Result(String, Nil) { @external(erlang, "gleam_stdlib", "identity") fn unsafe_to_string(a: BitArray) -> String +/// Converts a bit array to a string. Invalid bits are passed to the provided +/// callback and its result is included in the final string in place of the +/// invalid data. +/// +/// ## Examples +/// +/// ```gleam +/// to_string_lossy(<<"A":utf8, 0x80, "1":utf8, 0:size(5)>>, fn(_) { "�" }) +/// // -> "A�1�" +/// ``` +/// +pub fn to_string_lossy( + bits: BitArray, + map_invalid_bits: fn(BitArray) -> String, +) -> String { + to_string_lossy_impl(bits, map_invalid_bits, "") +} + +fn to_string_lossy_impl( + bits: BitArray, + map_invalid_bits: fn(BitArray) -> String, + acc: String, +) -> String { + case bits { + <<>> -> acc + + <> -> + to_string_lossy_impl( + rest, + map_invalid_bits, + acc <> string.from_utf_codepoints([x]), + ) + + <> -> + to_string_lossy_impl(rest, map_invalid_bits, acc <> map_invalid_bits(x)) + + _ -> acc <> map_invalid_bits(bits) + } +} + /// Creates a new bit array by joining multiple binaries. /// /// ## Examples diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 1d0c6224..98d56ee1 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -207,6 +207,26 @@ pub fn to_string_test() { |> should.equal(Ok("ø")) } +pub fn to_string_lossy_test() { + <<>> + |> bit_array.to_string_lossy(fn(_) { "�" }) + |> should.equal("") + + <<0x80, "A":utf8, 0x81>> + |> bit_array.to_string_lossy(fn(_) { "�" }) + |> should.equal("�A�") + + // Test some codepoints that require 2/3/4 bytes to be stored as UTF-8 + <<"£И한𐍈":utf8>> + |> bit_array.to_string_lossy(fn(_) { "�" }) + |> should.equal("£И한𐍈") + + // Test unaligned bit array + <<"ø":utf8, 50:4>> + |> bit_array.to_string_lossy(fn(_) { "�" }) + |> should.equal("ø�") +} + pub fn is_utf8_test() { <<>> |> bit_array.is_utf8