Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## v0.71.0 - Unreleased

- The `bit_array` module gains the `split` and `split_once` functions.

## v0.70.0 - 2026-03-07

- Fixed a bug where `uri.parse` would incorrectly handle uppercase schemes on
Expand Down
46 changes: 46 additions & 0 deletions src/gleam/bit_array.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,52 @@ pub fn slice(
take length: Int,
) -> Result(BitArray, Nil)

/// Splits a bit array into two parts at the location of the pattern.
///
/// The result will not include the pattern, and returns an error if the
/// pattern is not found.
///
/// This function runs in linear time.
///
/// ## Examples
///
/// ```gleam
/// split_once(from: <<1, 2, 3>>, on: <<2>>)
/// // -> Ok(#(<<1>>, <<3>>))
///
/// split_once(from: <<0>>, on: <<1>>)
/// // -> Error(Nil)
/// ```
@external(erlang, "gleam_stdlib", "bit_array_split_once")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split_once")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not implement this in Gleam rather than Erlang? Could be a bunch nicer, and we wouldn't need to use any private APIs which should not be used.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because of this comment: #629 (comment)

I can give a stab at implementing it in Gleam if you'd prefer it. The Erlang binary:split is a BIF so performance-wise it makes sense to use it.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I made a typo here. I meant to say JavaScript rather than Erlang 😅

pub fn split_once(
from bits: BitArray,
on pattern: BitArray,
) -> Result(#(BitArray, BitArray), Nil)

/// Splits a bit array into parts at the locations of the pattern.
///
/// The result will not include the pattern, and returns the input
/// as is if the pattern is not found.
///
/// This function runs in linear time.
///
/// ## Examples
///
/// ```gleam
/// split(from: <<0, 1, 0, 2, 0, 3>>, on: <<0>>)
/// // -> Ok([<<1>>, <<2>>, <<3>>])
///
/// split(from: <<0>>, on: <<1>>)
/// // -> Ok([<<0>>])
/// ```
@external(erlang, "gleam_stdlib", "bit_array_split")
@external(javascript, "../gleam_stdlib.mjs", "bit_array_split")
pub fn split(
from bits: BitArray,
on pattern: BitArray,
) -> Result(List(BitArray), Nil)

/// Tests to see whether a bit array is valid UTF-8.
///
pub fn is_utf8(bits: BitArray) -> Bool {
Expand Down
18 changes: 17 additions & 1 deletion src/gleam_stdlib.erl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
int_from_base_string/2, utf_codepoint_list_to_string/1, contains_string/2,
crop_string/2, base16_encode/1, base16_decode/1, string_replace/3, slice/3,
bit_array_to_int_and_size/1, bit_array_pad_to_bytes/1, index/2, list/5,
dict/1, int/1, float/1, bit_array/1, is_null/1
dict/1, int/1, float/1, bit_array/1, is_null/1, bit_array_split_once/2,
bit_array_split/2
]).

%% Taken from OTP's uri_string module
Expand Down Expand Up @@ -149,6 +150,21 @@ bit_array_slice(Bin, Pos, Len) ->
catch error:badarg -> {error, nil}
end.

bit_array_split_once(Bin, Sub) ->
try
case binary:split(Bin, [Sub]) of
[<<>>, <<>>] -> {error, nil};
[A, B] -> {ok, {A, B}};
_ -> {error, nil}
end
catch error:badarg -> {error, nil}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this try-catch for?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It catches this case, where Erlang would raise.

Now that I reread about the Erlang implementation I realize that it can also raise a nif_error, so maybe the catch should be more generic to avoid raising altogether.

end.

bit_array_split(Bin, Sub) ->
try {ok, binary:split(Bin, [Sub], [global, trim_all])}
catch error:badarg -> {error, nil}
end.

base64_decode(S) ->
try {ok, base64:decode(S)}
catch error:_ -> {error, nil}
Expand Down
74 changes: 74 additions & 0 deletions src/gleam_stdlib.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,80 @@ export function bit_array_slice(bits, position, length) {
return Result$Ok(bitArraySlice(bits, start * 8, end * 8));
}

export function bit_array_split_once(bits, pattern) {
try {
const patternEmpty = pattern.buffer.length < 1
const patternLongerThanBits = pattern.buffer.length >= bits.buffer.length
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may not be the length of the bit array itself.

const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No runtime type checking please 🙏

if (incorrectArguments || patternEmpty || patternLongerThanBits) {
return new Error(Nil);
}

const n = bits.buffer.length - pattern.buffer.length + 1;
find: for (let i = 0; i < n; i++) {
for (let j = 0; j < pattern.buffer.length; j++) {
if (bits.buffer[i + j] !== pattern.buffer[j]) {
continue find;
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like quite an expensive algorithm, it is checking bytes multiple times even when we know they could not match.

There's a few established algorithms we could use https://en.wikipedia.org/wiki/String-searching_algorithm. Boyer–Moore–Horspool seems fairly straightforward, but two-way algorithm seems to be the most popular approach.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's a naive approach. I'll have a go at one of the more efficient algorithms.

}
const before = bits.buffer.slice(0, i);
const after = bits.buffer.slice(i + pattern.buffer.length);
return new Ok([new BitArray(before), new BitArray(after)]);
}

return new Error(Nil);
} catch (e) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's this try-catch for?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't remember if it catches a specific case. I'll check and remove it if not.

return new Error(Nil);
}
}

export function bit_array_split(bits, pattern) {
try {
const patternEmpty = pattern.buffer.length < 1
const incorrectArguments = !(bits instanceof BitArray) || !(pattern instanceof BitArray)
if (incorrectArguments || patternEmpty) {
return new Error(Nil);
}

const bitsShorter = bits.buffer.length < pattern.buffer.length
if (bitsShorter) {
return new Ok(List.fromArray([bits]))
}

const results = [];
let lastIndex = 0;
const n = bits.buffer.length - pattern.buffer.length + 1;

find: for (let i = 0; i < n; i++) {
for (let j = 0; j < pattern.buffer.length; j++) {
if (bits.buffer[i + j] !== pattern.buffer[j]) {
continue find;
}
}

const bitsEqualsPattern = bits.buffer.length === pattern.buffer.length
if (bitsEqualsPattern) {
return new Ok(List.fromArray([]));
}

if (i > lastIndex) {
results.push(new BitArray(bits.buffer.slice(lastIndex, i)));
}

lastIndex = i + pattern.buffer.length;
i = lastIndex - 1;
}

if (lastIndex < bits.buffer.length) {
results.push(new BitArray(bits.buffer.slice(lastIndex)));
}

return new Ok(List.fromArray(results))
} catch (e) {
return new Error(Nil);
}
}

export function codepoint(int) {
return new UtfCodepoint(int);
}
Expand Down
114 changes: 114 additions & 0 deletions test/gleam/bit_array_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,120 @@ pub fn slice_large_error_test() {
== Error(Nil)
}

pub fn split_once_middle_test() {
assert <<0, 1, 2>>
|> bit_array.split_once(<<1>>)
== Ok(#(<<0>>, <<2>>))
}

pub fn split_once_beginning_test() {
assert <<0, 1, 2>>
|> bit_array.split_once(<<0>>)
== Ok(#(<<>>, <<1, 2>>))
}

pub fn split_once_end_test() {
assert <<0, 1, 2>>
|> bit_array.split_once(<<2>>)
== Ok(#(<<0, 1>>, <<>>))
}

pub fn split_once_multi_byte_separator_test() {
assert <<0, 1, 0, 2, 0, 3>>
|> bit_array.split_once(<<0, 2>>)
== Ok(#(<<0, 1>>, <<0, 3>>))
}

pub fn split_once_empty_haystack_test() {
assert <<>>
|> bit_array.split_once(<<1>>)
== Error(Nil)
}

pub fn split_once_empty_separator_test() {
assert <<0, 1, 2, 0, 3, 4, 5>>
|> bit_array.split_once(<<>>)
== Error(Nil)
}

pub fn split_once_separator_equals_haystack_test() {
assert <<1>>
|> bit_array.split_once(<<1>>)
== Error(Nil)
}

pub fn split_once_no_match_test() {
assert <<0>>
|> bit_array.split_once(<<1>>)
== Error(Nil)
}

// This test is target specific since it's using non byte-aligned BitArrays
// and those are not supported on the JavaScript target.
@target(erlang)
pub fn split_once_unaligned_test() {
assert <<0, 1, 2:7>>
|> bit_array.split_once(<<1>>)
== Error(Nil)
}

pub fn split_string_test() {
assert <<"hello":utf8>>
|> bit_array.split(<<"l":utf8>>)
== Ok([<<"he":utf8>>, <<"o":utf8>>])
}

pub fn split_multiple_matches_test() {
assert <<0, 1, 0, 2, 0, 3>>
|> bit_array.split(<<0>>)
== Ok([<<1>>, <<2>>, <<3>>])
}

pub fn split_multi_byte_separator_test() {
assert <<0, 1, 0, 2, 0, 3>>
|> bit_array.split(<<0, 2>>)
== Ok([<<0, 1>>, <<0, 3>>])
}

pub fn split_trailing_separator_test() {
assert <<1, 0>>
|> bit_array.split(<<0>>)
== Ok([<<1>>])
}

pub fn split_leading_separator_test() {
assert <<1, 0>>
|> bit_array.split(<<1>>)
== Ok([<<0>>])
}

pub fn split_no_match_test() {
assert <<1>>
|> bit_array.split(<<0>>)
== Ok([<<1>>])
}

pub fn split_separator_equals_haystack_test() {
assert <<1, 2>>
|> bit_array.split(<<1, 2>>)
== Ok([])
}

pub fn split_empty_separator_test() {
assert <<0, 1, 2, 0, 3, 4, 5>>
|> bit_array.split(<<>>)
== Error(Nil)
}

// This test is target specific since it's using non byte-aligned BitArrays
// and those are not supported on the JavaScript target.
@target(erlang)
pub fn split_unaligned_test() {
assert <<0, 1, 2:7>>
|> bit_array.split(<<1>>)
== Error(Nil)
}

pub fn to_string_empty_test() {
assert bit_array.to_string(<<>>) == Ok("")
}
Expand Down