From fd175057de62d6f5480c759061f06de3f32b2f30 Mon Sep 17 00:00:00 2001 From: John Downey Date: Mon, 18 May 2026 12:00:30 -0500 Subject: [PATCH] Fix drop_start on JavaScript for multi-byte strings `string_byte_slice` used `String.prototype.slice`, which operates on UTF-16 code units, but it is called with UTF-8 byte offsets from `byte_size`. Encode to UTF-8, slice the byte array, then decode back. --- CHANGELOG.md | 5 +++++ src/gleam_stdlib.mjs | 3 ++- test/gleam/string_test.gleam | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b388cddc..682e80e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Unreleased + +- Fixed a bug where `string.drop_start` would return incorrect results on + JavaScript when the string contained multi-byte characters. + ## v1.0.2 - 2026-05-14 - Fixed deprecation warnings when using OTP 29 diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index 4f7f1e2a..ad5e40a8 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -200,7 +200,8 @@ export function length(data) { } export function string_byte_slice(string, index, length) { - return string.slice(index, index + length); + const bytes = new TextEncoder().encode(string); + return new TextDecoder().decode(bytes.subarray(index, index + length)); } export function string_grapheme_slice(string, idx, len) { diff --git a/test/gleam/string_test.gleam b/test/gleam/string_test.gleam index ec4c0880..31728d13 100644 --- a/test/gleam/string_test.gleam +++ b/test/gleam/string_test.gleam @@ -471,6 +471,14 @@ pub fn drop_start_3499_test() { assert string.drop_start("\r]", 1) == "]" } +pub fn drop_start_multibyte_test() { + // https://github.com/gleam-lang/stdlib/issues/924 + assert string.drop_start("广州abcdefghijklmn", 0) == "广州abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 1) == "州abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 2) == "abcdefghijklmn" + assert string.drop_start("广州abcdefghijklmn", 3) == "bcdefghijklmn" +} + pub fn drop_end_basic_test() { assert string.drop_end("gleam", up_to: 2) == "gle" }