diff --git a/src/gleam/uri.gleam b/src/gleam/uri.gleam index cc0c3ac7..652e28a5 100644 --- a/src/gleam/uri.gleam +++ b/src/gleam/uri.gleam @@ -202,7 +202,7 @@ fn parse_host(uri_string: String, pieces: Uri) -> Result(Uri, Nil) { // - [^:] case uri_string { // If we find an opening bracket we know it's the first format. - "[" <> _ -> parse_host_within_brackets(uri_string, pieces) + "[" <> rest -> parse_host_within_brackets(rest, pieces) // A `:` marks the beginning of the port part of the authority string. ":" <> _ -> { @@ -233,66 +233,38 @@ fn parse_host_within_brackets_loop( size: Int, ) -> Result(Uri, Nil) { case uri_string { - // If the string is over the entire string we were iterating through is the - // host part. - "" -> Ok(Uri(..pieces, host: Some(uri_string))) + // We reached the end without finding a closing `]`. + "" -> Error(Nil) // A `]` marks the end of the host and the start of the port part. - "]" <> rest if size == 0 -> parse_port(rest, pieces) + "]" <> _ if size == 0 -> Error(Nil) "]" <> rest -> { - let host = codeunit_slice(original, at_index: 0, length: size + 1) + let host = "[" <> codeunit_slice(original, at_index: 0, length: size + 1) let pieces = Uri(..pieces, host: Some(host)) parse_port(rest, pieces) } - - // `/` marks the beginning of a path. - "/" <> _ if size == 0 -> parse_path(uri_string, pieces) - "/" <> _ -> { - let host = codeunit_slice(original, at_index: 0, length: size) - let pieces = Uri(..pieces, host: Some(host)) - parse_path(uri_string, pieces) - } - - // `?` marks the beginning of the query with question mark. - "?" <> rest if size == 0 -> parse_query_with_question_mark(rest, pieces) - "?" <> rest -> { - let host = codeunit_slice(original, at_index: 0, length: size) - let pieces = Uri(..pieces, host: Some(host)) - parse_query_with_question_mark(rest, pieces) - } - - // `#` marks the beginning of the fragment part. - "#" <> rest if size == 0 -> parse_fragment(rest, pieces) - "#" <> rest -> { - let host = codeunit_slice(original, at_index: 0, length: size) - let pieces = Uri(..pieces, host: Some(host)) - parse_fragment(rest, pieces) - } - // In all other cases we just keep iterating. _ -> { let #(char, rest) = pop_codeunit(uri_string) - // Inside `[...]` there can only be some characters, if we find a special - // one then we know that we're actually parsing the other format for the - // host and we switch to that! + // Inside `[...]` there can only be some characters. case is_valid_host_within_brackets_char(char) { True -> parse_host_within_brackets_loop(original, rest, pieces, size + 1) - False -> - parse_host_outside_of_brackets_loop(original, original, pieces, 0) + False -> Error(Nil) } } } } fn is_valid_host_within_brackets_char(char: Int) -> Bool { + // Valid IPv6 hosts are only [0-9A-Fa-f:.]. // [0-9] - { 48 >= char && char <= 57 } - // [A-Z] - || { 65 >= char && char <= 90 } - // [a-z] - || { 97 >= char && char <= 122 } + { 48 <= char && char <= 57 } + // [A-F] + || { 65 <= char && char <= 70 } + // [a-f] + || { 97 <= char && char <= 102 } // : || char == 58 // . diff --git a/src/gleam_stdlib.erl b/src/gleam_stdlib.erl index d89cb831..660d7178 100644 --- a/src/gleam_stdlib.erl +++ b/src/gleam_stdlib.erl @@ -233,7 +233,7 @@ uri_parse(String) -> {ok, {uri, maps_get_optional_lowercase(Uri, scheme), maps_get_optional(Uri, userinfo), - maps_get_optional(Uri, host), + maps_get_optional_host(Uri), Port, maps_get_or(Uri, path, <<>>), maps_get_optional(Uri, query), @@ -251,6 +251,23 @@ maps_get_optional(Map, Key) -> catch _:_ -> none end. +maps_get_optional_host(Map) -> + try {some, bracket_ipv6_host(maps:get(host, Map))} + catch _:_ -> none + end. + +bracket_ipv6_host(Host) when is_binary(Host) -> + case binary:match(Host, <<":">>) of + nomatch -> Host; + _ -> + case Host of + <<"[", _/binary>> -> Host; + _ -> <<"[", Host/binary, "]">> + end + end; +bracket_ipv6_host(Host) -> + Host. + maps_get_or(Map, Key, Default) -> try maps:get(Key, Map) catch _:_ -> Default diff --git a/test/gleam/uri_test.gleam b/test/gleam/uri_test.gleam index 7f556f22..9cde4555 100644 --- a/test/gleam/uri_test.gleam +++ b/test/gleam/uri_test.gleam @@ -274,6 +274,104 @@ pub fn parse_empty_query_3_test() { let assert Some("") = assert_parse("http://example.com/test?").query } +pub fn parse_ipv6_host_preserves_brackets_test() { + let assert Ok(parsed) = + uri.parse("http://[2600:1406:bc00:53::b81e:94c8]/wobble") + assert parsed.host == Some("[2600:1406:bc00:53::b81e:94c8]") +} + +pub fn parse_ipv6_host_with_port_preserves_brackets_test() { + let assert Ok(parsed) = + uri.parse("http://[2600:1406:bc00:53::b81e:94c8]:8080/wobble") + assert parsed.host == Some("[2600:1406:bc00:53::b81e:94c8]") + assert parsed.port == Some(8080) +} + +pub fn parse_ipv6_host_roundtrip_to_string_test() { + let assert Ok(parsed) = uri.parse("http://[2600:1406:bc00:53::b81e:94c8]") + assert uri.to_string(parsed) == "http://[2600:1406:bc00:53::b81e:94c8]/" +} + +pub fn parse_ipv6_compact_loopback_preserves_brackets_test() { + let assert Ok(parsed) = uri.parse("http://[::1]/wobble") + assert parsed.host == Some("[::1]") + assert parsed.path == "/wobble" +} + +pub fn parse_ipv6_compact_loopback_with_port_test() { + let assert Ok(parsed) = uri.parse("http://[::1]:443/wobble") + assert parsed.host == Some("[::1]") + assert parsed.port == Some(443) +} + +pub fn parse_ipv6_collapsed_middle_preserves_brackets_test() { + let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]/wobble") + assert parsed.host == Some("[2001:db8::2:1]") +} + +pub fn parse_ipv6_collapsed_roundtrip_to_string_test() { + let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]") + assert uri.to_string(parsed) == "http://[2001:db8::2:1]/" +} + +pub fn parse_ipv6_host_with_path_query_fragment_test() { + let assert Ok(parsed) = uri.parse("http://[2001:db8::2:1]/foo/bar?baz=bif#blah") + assert parsed.scheme == Some("http") + assert parsed.host == Some("[2001:db8::2:1]") + assert parsed.path == "/foo/bar" + assert parsed.query == Some("baz=bif") + assert parsed.fragment == Some("blah") + assert parsed.port == None + assert parsed.userinfo == None +} + +pub fn parse_malformed_many_opening_brackets_in_host_test() { + assert uri.parse("http://[[[[[[[[[[]/") == Error(Nil) +} + +pub fn parse_malformed_nested_opening_bracket_in_host_test() { + assert uri.parse("http://[[::1]/") == Error(Nil) +} + +pub fn parse_malformed_unclosed_bracket_host_test() { + assert uri.parse("http://[::1[/") == Error(Nil) +} + +pub fn parse_malformed_question_mark_within_bracket_host_test() { + assert uri.parse("http://[::1?bad]/") == Error(Nil) +} + +pub fn parse_malformed_slash_within_bracket_host_test() { + assert uri.parse("http://[::1/bad]/") == Error(Nil) +} + +pub fn ipv6_uppercase_test() { + // ensure A–F upper case are accepted + let assert Ok(parsed) = uri.parse("http://[2001:DB8::1]") + assert parsed.host == Some("[2001:DB8::1]") + assert uri.to_string(parsed) == "http://[2001:DB8::1]/" +} + +pub fn ipv6_mixedcase_test() { + let assert Ok(parsed) = uri.parse("http://[2001:dB8:ABcd::]") + assert parsed.host == Some("[2001:dB8:ABcd::]") +} + +pub fn parse_ipv6_with_invalid_char_test() { + // 'g' is not a hex digit + assert uri.parse("http://[::g]/") == Error(Nil) + assert uri.parse("http://[::G]/") == Error(Nil) +} + +pub fn parse_bracket_followed_by_text_error_test() { + // characters immediately after closing bracket and before slash should error + assert uri.parse("http://[::1]foo") == Error(Nil) +} + +pub fn parse_double_closing_bracket_test() { + assert uri.parse("http://[::1]]/") == Error(Nil) +} + pub fn full_uri_to_string_test() { let test_uri = uri.Uri(