From 8ce9e80bbe9020b2ccd0a44ed5361e1b2d391d81 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 10 Mar 2026 10:00:26 +0100 Subject: [PATCH 1/8] Fixed: IPv6 parsing in JS Fixes IPv6 parsing in JS where the char ranges were not specified correctly. `48 >= char` is the same as `char <= 48`, so the range 48..57 was invalid. Additionally, the first char in this string is `[`, which is 91, but this is not marked as a valid char so it was automatically rejected. Refs #881 Refs gleam-lang/httpc#34 --- src/gleam/uri.gleam | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gleam/uri.gleam b/src/gleam/uri.gleam index cc0c3ac7..96acf1e8 100644 --- a/src/gleam/uri.gleam +++ b/src/gleam/uri.gleam @@ -287,12 +287,14 @@ fn parse_host_within_brackets_loop( } fn is_valid_host_within_brackets_char(char: Int) -> Bool { + // [ + char == 91 // [0-9] - { 48 >= char && char <= 57 } + || { 48 <= char && char <= 57 } // [A-Z] - || { 65 >= char && char <= 90 } + || { 65 <= char && char <= 90 } // [a-z] - || { 97 >= char && char <= 122 } + || { 97 <= char && char <= 122 } // : || char == 58 // . From 4a4076eee1f5f19478ddbfe8988820358d343242 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 10 Mar 2026 10:04:20 +0100 Subject: [PATCH 2/8] Tests: Add ipv6 tests Adds a set of tests for bracketed IPv6 addresses in both full and collapsed form. Refs #881 --- test/gleam/uri_test.gleam | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/test/gleam/uri_test.gleam b/test/gleam/uri_test.gleam index 7f556f22..47c36bc0 100644 --- a/test/gleam/uri_test.gleam +++ b/test/gleam/uri_test.gleam @@ -274,6 +274,46 @@ pub fn parse_empty_query_3_test() { let assert Some("") = assert_parse("http://example.com/test?").query } +pub fn parse_ipv6_host_preserves_brackets_test() { + let assert Ok(parsed) = + uri.parse("http://[2600:1406:bc00:53::b81e:94c8]/wobble") + assert parsed.host == Some("[2600:1406:bc00:53::b81e:94c8]") +} + +pub fn parse_ipv6_host_with_port_preserves_brackets_test() { + let assert Ok(parsed) = + uri.parse("http://[2600:1406:bc00:53::b81e:94c8]:8080/wobble") + assert parsed.host == Some("[2600:1406:bc00:53::b81e:94c8]") + assert parsed.port == Some(8080) +} + +pub fn parse_ipv6_host_roundtrip_to_string_test() { + let assert Ok(parsed) = uri.parse("http://[2600:1406:bc00:53::b81e:94c8]") + assert uri.to_string(parsed) == "http://[2600:1406:bc00:53::b81e:94c8]/" +} + +pub fn parse_ipv6_compact_loopback_preserves_brackets_test() { + let assert Ok(parsed) = uri.parse("http://[::1]/wobble") + assert parsed.host == Some("[::1]") + assert parsed.path == "/wobble" +} + +pub fn parse_ipv6_compact_loopback_with_port_test() { + let assert Ok(parsed) = uri.parse("http://[::1]:443/wobble") + assert parsed.host == Some("[::1]") + assert parsed.port == Some(443) +} + +pub fn parse_ipv6_collapsed_middle_preserves_brackets_test() { + let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]/wobble") + assert parsed.host == Some("[2001:db8::2:1]") +} + +pub fn parse_ipv6_collapsed_roundtrip_to_string_test() { + let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]") + assert uri.to_string(parsed) == "http://[2001:db8::2:1]/" +} + pub fn full_uri_to_string_test() { let test_uri = uri.Uri( From 3e10275bcbb61e4c89991636c91d702812a3be07 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 10 Mar 2026 10:14:15 +0100 Subject: [PATCH 3/8] Fixed: IPv6 parsing on erl target Fixes an issue where IPv6 hosts were not being produced in RFC-compliant bracketed forms. It parses the URI using the Erlang FFI, but then wraps the Host return in brackets to make it RFC-compliant. It looks for a `:` in the host component to identify whether a host is an IPv6 address. Fixes #881 Refs gleam-lang/httpc#34 (should also fix that) --- src/gleam_stdlib.erl | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index d89cb831..660d7178 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -233,7 +233,7 @@ uri_parse(String) -> {ok, {uri, maps_get_optional_lowercase(Uri, scheme), maps_get_optional(Uri, userinfo), - maps_get_optional(Uri, host), + maps_get_optional_host(Uri), Port, maps_get_or(Uri, path, <<>>), maps_get_optional(Uri, query), @@ -251,6 +251,23 @@ maps_get_optional(Map, Key) -> catch _:_ -> none end. +maps_get_optional_host(Map) -> + try {some, bracket_ipv6_host(maps:get(host, Map))} + catch _:_ -> none + end. + +bracket_ipv6_host(Host) when is_binary(Host) -> + case binary:match(Host, <<":">>) of + nomatch -> Host; + _ -> + case Host of + <<"[", _/binary>> -> Host; + _ -> <<"[", Host/binary, "]">> + end + end; +bracket_ipv6_host(Host) -> + Host. + maps_get_or(Map, Key, Default) -> try maps:get(Key, Map) catch _:_ -> Default From 1dac5bd01fd55a1a614ee660418764f54b31609a Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 10 Mar 2026 11:02:29 +0100 Subject: [PATCH 4/8] Tests: Additional IPv6 parsing tests Adds rejection tests to ensure malformed IPv6 Hosts are rejected instead of passing. --- test/gleam/uri_test.gleam | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/gleam/uri_test.gleam b/test/gleam/uri_test.gleam index 47c36bc0..42d46048 100644 --- a/test/gleam/uri_test.gleam +++ b/test/gleam/uri_test.gleam @@ -314,6 +314,37 @@ pub fn parse_ipv6_collapsed_roundtrip_to_string_test() { assert uri.to_string(parsed) == "http://[2001:db8::2:1]/" } +pub fn parse_ipv6_host_with_path_query_fragment_test() { + let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]/foo/bar?baz=bif#blah") + assert parsed.scheme == Some("http") + assert parsed.host == Some("[2001:db8::2:1]") + assert parsed.path == "/foo/bar" + assert parsed.query == Some("baz=bif") + assert parsed.fragment == Some("blah") + assert parsed.port == None + assert parsed.userinfo == None +} + +pub fn parse_malformed_many_opening_brackets_in_host_test() { + assert uri.parse("http://[[[[[[[[[[]/") == Error(Nil) +} + +pub fn parse_malformed_nested_opening_bracket_in_host_test() { + assert uri.parse("http://[[::1]/") == Error(Nil) +} + +pub fn parse_malformed_unclosed_bracket_host_test() { + assert uri.parse("http://[::1[/") == Error(Nil) +} + +pub fn parse_malformed_question_mark_within_bracket_host_test() { + assert uri.parse("http://[::1?bad]/") == Error(Nil) +} + +pub fn parse_malformed_slash_within_bracket_host_test() { + assert uri.parse("http://[::1/bad]/") == Error(Nil) +} + pub fn full_uri_to_string_test() { let test_uri = uri.Uri( From 43e3da334fa155f0212d90738984c5d75bb50e82 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 10 Mar 2026 11:11:51 +0100 Subject: [PATCH 5/8] Fixed: Reject malformed IPv6 hosts As best I can tell, these matches should be rejected since they do not form part of a valid IPv6 bracketed host. The opening bracket is now popped off so that detection can continue on the rest of the host. Since it is popped off, it is no longer considered a valid char in is_valid_host_within_brackets_char. However, if a `/`, `?`, `#` are detected BEFORE a closing `]` the host is considered malformed and a parsing error is now raised. (This would also align with `is_valid_host_within_brackets_char` since these characters are not in the ranges checked.) The comment: "if we find a special one then we know that we're actually parsing the other format for the host and we switch to that!" would indicate that there was a mixup in the code, since for IPv6 in brackets (which is what this code is parsing) there should only be one format, AFAIK? --- src/gleam/uri.gleam | 49 +++++++++++++-------------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/src/gleam/uri.gleam b/src/gleam/uri.gleam index 96acf1e8..88979cb8 100644 --- a/src/gleam/uri.gleam +++ b/src/gleam/uri.gleam @@ -202,7 +202,7 @@ fn parse_host(uri_string: String, pieces: Uri) -> Result(Uri, Nil) { // - [^:] case uri_string { // If we find an opening bracket we know it's the first format. - "[" <> _ -> parse_host_within_brackets(uri_string, pieces) + "[" <> rest -> parse_host_within_brackets(rest, pieces) // A `:` marks the beginning of the port part of the authority string. ":" <> _ -> { @@ -233,64 +233,43 @@ fn parse_host_within_brackets_loop( size: Int, ) -> Result(Uri, Nil) { case uri_string { - // If the string is over the entire string we were iterating through is the - // host part. - "" -> Ok(Uri(..pieces, host: Some(uri_string))) + // We reached the end without finding a closing `]`. + "" -> Error(Nil) // A `]` marks the end of the host and the start of the port part. - "]" <> rest if size == 0 -> parse_port(rest, pieces) + "]" <> _ if size == 0 -> Error(Nil) "]" <> rest -> { - let host = codeunit_slice(original, at_index: 0, length: size + 1) + let host = "[" <> codeunit_slice(original, at_index: 0, length: size) <> "]" let pieces = Uri(..pieces, host: Some(host)) parse_port(rest, pieces) } - // `/` marks the beginning of a path. - "/" <> _ if size == 0 -> parse_path(uri_string, pieces) - "/" <> _ -> { - let host = codeunit_slice(original, at_index: 0, length: size) - let pieces = Uri(..pieces, host: Some(host)) - parse_path(uri_string, pieces) - } + // Delimiters before a closing `]` are invalid. + "/" <> _ -> Error(Nil) - // `?` marks the beginning of the query with question mark. - "?" <> rest if size == 0 -> parse_query_with_question_mark(rest, pieces) - "?" <> rest -> { - let host = codeunit_slice(original, at_index: 0, length: size) - let pieces = Uri(..pieces, host: Some(host)) - parse_query_with_question_mark(rest, pieces) - } + // Delimiters before a closing `]` are invalid. + "?" <> _ -> Error(Nil) - // `#` marks the beginning of the fragment part. - "#" <> rest if size == 0 -> parse_fragment(rest, pieces) - "#" <> rest -> { - let host = codeunit_slice(original, at_index: 0, length: size) - let pieces = Uri(..pieces, host: Some(host)) - parse_fragment(rest, pieces) - } + // Delimiters before a closing `]` are invalid. + "#" <> _ -> Error(Nil) // In all other cases we just keep iterating. _ -> { let #(char, rest) = pop_codeunit(uri_string) - // Inside `[...]` there can only be some characters, if we find a special - // one then we know that we're actually parsing the other format for the - // host and we switch to that! + // Inside `[...]` there can only be some characters. case is_valid_host_within_brackets_char(char) { True -> parse_host_within_brackets_loop(original, rest, pieces, size + 1) - False -> - parse_host_outside_of_brackets_loop(original, original, pieces, 0) + False -> Error(Nil) } } } } fn is_valid_host_within_brackets_char(char: Int) -> Bool { - // [ - char == 91 // [0-9] - || { 48 <= char && char <= 57 } + { 48 <= char && char <= 57 } // [A-Z] || { 65 <= char && char <= 90 } // [a-z] From ae367ab5c872e79e57295a069f08635e8eaeae7e Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Tue, 10 Mar 2026 13:29:22 +0100 Subject: [PATCH 6/8] Fixed: restrict to hex alphanum chars Fixes the detection for valid chars in brackets to only allow A-Fa-f, given that it will only ever be hex digits. Credit to @upodevelop --- src/gleam/uri.gleam | 9 +++++---- test/gleam/uri_test.gleam | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/gleam/uri.gleam b/src/gleam/uri.gleam index 88979cb8..22f08949 100644 --- a/src/gleam/uri.gleam +++ b/src/gleam/uri.gleam @@ -268,12 +268,13 @@ fn parse_host_within_brackets_loop( } fn is_valid_host_within_brackets_char(char: Int) -> Bool { + // Valid IPv6 hosts are only [0-9A-Fa-f:.]. // [0-9] { 48 <= char && char <= 57 } - // [A-Z] - || { 65 <= char && char <= 90 } - // [a-z] - || { 97 <= char && char <= 122 } + // [A-F] + || { 65 <= char && char <= 70 } + // [a-f] + || { 97 <= char && char <= 102 } // : || char == 58 // . diff --git a/test/gleam/uri_test.gleam b/test/gleam/uri_test.gleam index 42d46048..9cde4555 100644 --- a/test/gleam/uri_test.gleam +++ b/test/gleam/uri_test.gleam @@ -345,6 +345,33 @@ pub fn parse_malformed_slash_within_bracket_host_test() { assert uri.parse("http://[::1/bad]/") == Error(Nil) } +pub fn ipv6_uppercase_test() { + // ensure A–F upper case are accepted + let assert Ok(parsed) = uri.parse("http://[2001:DB8::1]") + assert parsed.host == Some("[2001:DB8::1]") + assert uri.to_string(parsed) == "http://[2001:DB8::1]/" +} + +pub fn ipv6_mixedcase_test() { + let assert Ok(parsed) = uri.parse("http://[2001:dB8:ABcd::]") + assert parsed.host == Some("[2001:dB8:ABcd::]") +} + +pub fn parse_ipv6_with_invalid_char_test() { + // 'g' is not a hex digit + assert uri.parse("http://[::g]/") == Error(Nil) + assert uri.parse("http://[::G]/") == Error(Nil) +} + +pub fn parse_bracket_followed_by_text_error_test() { + // characters immediately after closing bracket and before slash should error + assert uri.parse("http://[::1]foo") == Error(Nil) +} + +pub fn parse_double_closing_bracket_test() { + assert uri.parse("http://[::1]]/") == Error(Nil) +} + pub fn full_uri_to_string_test() { let test_uri = uri.Uri( From ee5acbeeb72e25af1436977948ae05b8321973cf Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 16 Mar 2026 10:46:03 +0100 Subject: [PATCH 7/8] Fixed: redundant checks The `is_valid_host_within_brackets` already checks whether the character is valid, so these case branches were redundant with that check. --- src/gleam/uri.gleam | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/gleam/uri.gleam b/src/gleam/uri.gleam index 22f08949..8a1b062b 100644 --- a/src/gleam/uri.gleam +++ b/src/gleam/uri.gleam @@ -243,16 +243,6 @@ fn parse_host_within_brackets_loop( let pieces = Uri(..pieces, host: Some(host)) parse_port(rest, pieces) } - - // Delimiters before a closing `]` are invalid. - "/" <> _ -> Error(Nil) - - // Delimiters before a closing `]` are invalid. - "?" <> _ -> Error(Nil) - - // Delimiters before a closing `]` are invalid. - "#" <> _ -> Error(Nil) - // In all other cases we just keep iterating. _ -> { let #(char, rest) = pop_codeunit(uri_string) From 5e87d3012910654f2cdcd81efb03482b05579e27 Mon Sep 17 00:00:00 2001 From: Andrew Hankinson Date: Mon, 16 Mar 2026 10:52:23 +0100 Subject: [PATCH 8/8] Fixed: remove prepend by extending the slice by one. --- src/gleam/uri.gleam | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gleam/uri.gleam b/src/gleam/uri.gleam index 8a1b062b..652e28a5 100644 --- a/src/gleam/uri.gleam +++ b/src/gleam/uri.gleam @@ -239,7 +239,7 @@ fn parse_host_within_brackets_loop( // A `]` marks the end of the host and the start of the port part. "]" <> _ if size == 0 -> Error(Nil) "]" <> rest -> { - let host = "[" <> codeunit_slice(original, at_index: 0, length: size) <> "]" + let host = "[" <> codeunit_slice(original, at_index: 0, length: size + 1) let pieces = Uri(..pieces, host: Some(host)) parse_port(rest, pieces) }