diff --git a/CHANGELOG.md b/CHANGELOG.md index eb8895fb..f85752dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased + +- The `bit_array` module gains the `to_string_lossy` function. + ## v0.69.0 - 2026-01-26 - The `int.range` function has been added. diff --git a/src/gleam/bit_array.gleam b/src/gleam/bit_array.gleam index 1fefa8b3..e5744cc6 100644 --- a/src/gleam/bit_array.gleam +++ b/src/gleam/bit_array.gleam @@ -95,6 +95,46 @@ pub fn to_string(bits: BitArray) -> Result(String, Nil) { @external(erlang, "gleam_stdlib", "identity") fn unsafe_to_string(a: BitArray) -> String +/// Converts a bit array to a string. Invalid bits are passed to the provided +/// callback and its result is included in the final string in place of the +/// invalid data. +/// +/// ## Examples +/// +/// ```gleam +/// to_string_lossy(<<"A":utf8, 0x80, "1":utf8, 0:size(5)>>, fn(_) { "�" }) +/// // -> "A�1�" +/// ``` +/// +pub fn to_string_lossy( + bits: BitArray, + map_invalid_bits: fn(BitArray) -> String, +) -> String { + to_string_lossy_impl(bits, map_invalid_bits, "") +} + +fn to_string_lossy_impl( + bits: BitArray, + map_invalid_bits: fn(BitArray) -> String, + acc: String, +) -> String { + case bits { + <<>> -> acc + + <> -> + to_string_lossy_impl( + rest, + map_invalid_bits, + acc <> string.from_utf_codepoints([x]), + ) + + <> -> + to_string_lossy_impl(rest, map_invalid_bits, acc <> map_invalid_bits(x)) + + _ -> acc <> map_invalid_bits(bits) + } +} + /// Creates a new bit array by joining multiple binaries. /// /// ## Examples diff --git a/test/gleam/bit_array_test.gleam b/test/gleam/bit_array_test.gleam index 3dd43e98..05c04b13 100644 --- a/test/gleam/bit_array_test.gleam +++ b/test/gleam/bit_array_test.gleam @@ -135,6 +135,19 @@ pub fn to_string_test() { assert bit_array.to_string(x) == Ok("ø") } +pub fn to_string_lossy_test() { + assert bit_array.to_string_lossy(<<>>, fn(_) { "�" }) == "" + + assert bit_array.to_string_lossy(<<0x80, "A":utf8, 0x81>>, fn(_) { "�" }) + == "�A�" + + // Test some codepoints that require 2/3/4 bytes to be stored as UTF-8 + assert bit_array.to_string_lossy(<<"£И한𐍈":utf8>>, fn(_) { "�" }) == "£И한𐍈" + + // Test unaligned bit array + assert bit_array.to_string_lossy(<<"ø":utf8, 2:4>>, fn(_) { "�" }) == "ø�" +} + pub fn is_utf8_test() { assert bit_array.is_utf8(<<>>)