From 506acf206f3a33a0b55cc873fb528f913d9b36b6 Mon Sep 17 00:00:00 2001 From: qingliu Date: Sat, 13 Jun 2026 05:08:55 +0000 Subject: [PATCH] fix: handle IPv6 host URLs in parseGitLabHostURL parseGitLabHostURL split host:port with strings.Split + a len==2 check, so an IPv6 host (many colons) skipped the port split and kept its brackets and port, e.g. host = "[2335::aa1:1415]:32336". Rendered into the GitLab test-data template this produced `host: [2335::aa1:1415]:32336`, whose leading "[" makes YAML parse it as a flow sequence, failing the e2e prepare-gitlab-data step on IPv6 environments with "yaml: line 3: did not find expected key". IPv4 splits into two parts so it worked; the bug was IPv6-only. Parse with net/url instead (matching the sibling parseGitLabSSHEndpoint): url.Hostname() returns the bare host so the rendered host: scalar is valid YAML, and the endpoint is rebuilt with net.JoinHostPort so IPv6 stays bracketed. Preserves the default-port-omitted endpoint contract, subpath GitLab installs, and non-corrupting fallback for no-scheme/malformed input. Add internal/cli/cmd_test.go: table-driven TestParseGitLabHostURL (IPv6/ IPv4/hostname, ports, subpaths, no-scheme bare IPv6, malformed fallback) and TestGeneratedHostValueIsValidYAML as a regression guard for the original YAML failure. --- internal/cli/cmd.go | 128 ++++++++++++++++++-------- internal/cli/cmd_test.go | 189 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+), 35 deletions(-) create mode 100644 internal/cli/cmd_test.go diff --git a/internal/cli/cmd.go b/internal/cli/cmd.go index f25b12f..2c9b82d 100644 --- a/internal/cli/cmd.go +++ b/internal/cli/cmd.go @@ -3,6 +3,7 @@ package cli import ( "fmt" "log" + "net" "net/url" "strconv" "strings" @@ -568,50 +569,107 @@ func parseGitLabSSHEndpoint(rawEndpoint string) (endpoint, host string, port int return endpointBuilder, hostName, portNumber } -// parseGitLabHostURL 从 GitLab Host URL 解析出 endpoint、scheme 和 host +// parseGitLabHostURL 从 GitLab Host URL 解析出 endpoint、scheme、host 和 port。 +// 使用 net/url 解析,正确处理 IPv6 字面量(带方括号)的主机地址,例如 +// http://[2335::aa1:1415]:32336。返回的 host 为不带方括号、不带端口的裸主机名。 func parseGitLabHostURL(gitlabHost string) (endpoint, scheme, host string, port int) { - // 默认值 - scheme = "https" - port = 443 - endpoint = gitlabHost - - // 去除尾部斜杠 - endpoint = strings.TrimSuffix(endpoint, "/") - - // 检查是否包含 scheme - if strings.HasPrefix(endpoint, "http://") { - scheme = "http" - port = 80 - host = strings.TrimPrefix(endpoint, "http://") - } else if strings.HasPrefix(endpoint, "https://") { - scheme = "https" + // trimmedHost stores the host string without surrounding whitespace or trailing slash. + trimmedHost := strings.TrimSuffix(strings.TrimSpace(gitlabHost), "/") + + // inputScheme records the scheme detected from the raw input prefix (if any). It is + // used both to normalize the value for url.Parse and as the fallback scheme when + // parsing fails, so a malformed "http://[::1" still reports scheme "http". + inputScheme := "" + if idx := strings.Index(trimmedHost, "://"); idx >= 0 { + inputScheme = trimmedHost[:idx] + } + + // normalizedHost ensures a scheme is present so url.Parse treats the value as a URL + // rather than an opaque path; default to https to preserve the previous behavior. + normalizedHost := trimmedHost + if inputScheme == "" { + // No scheme: the remainder is a bare host[:port][/path]. Split off any path first so + // the IPv6 detection below only inspects the host[:port] portion (otherwise the path + // leaks into the bracketing). A bare (unbracketed) IPv6 literal such as + // "2335::aa1:1415" would otherwise let url.Parse mistake the final ":1415" for a port; + // detect that via net.SplitHostPort's "too many colons" error, bracket the host so it + // is preserved as a single host with no port, then re-attach the path. + hostPort, pathPart := trimmedHost, "" + if slash := strings.Index(trimmedHost, "/"); slash >= 0 { + hostPort, pathPart = trimmedHost[:slash], trimmedHost[slash:] + } + if _, _, splitErr := net.SplitHostPort(hostPort); splitErr != nil && + strings.Contains(splitErr.Error(), "too many colons") { + normalizedHost = "https://" + bracketIfIPv6(hostPort) + pathPart + } else { + normalizedHost = "https://" + trimmedHost + } + } + + // parsedURL holds the parsed host URL. On parse failure fall back to a sane, + // non-corrupting result: keep the trimmed input as the endpoint unchanged, use the + // detected scheme (or https), and leave host empty rather than mangling the input. + parsedURL, err := url.Parse(normalizedHost) + if err != nil { + scheme = inputScheme + if scheme == "" { + scheme = "https" + } port = 443 - host = strings.TrimPrefix(endpoint, "https://") - } else { - // 如果没有 scheme,则 host 就是原始的 endpoint - host = endpoint + if scheme == "http" { + port = 80 + } + host = "" + endpoint = trimmedHost + return endpoint, scheme, host, port } - // 检查 host 中是否包含端口号 - if strings.Contains(host, ":") { - parts := strings.Split(host, ":") - if len(parts) == 2 { - host = parts[0] - // 尝试解析端口号 - if parsedPort, err := strconv.Atoi(parts[1]); err == nil { - port = parsedPort - } + // scheme comes from the URL; default to https when the input had no scheme at all. + scheme = parsedURL.Scheme + if scheme == "" { + scheme = "https" + } + + // host is the bare hostname; url.Hostname() strips IPv6 brackets, which is exactly + // what we want rendered into the YAML template (a bracketed scalar is invalid YAML). + host = parsedURL.Hostname() + + // urlPath preserves any subpath (e.g. "/gitlab" for a GitLab instance served under a + // subpath); dropping it would change the effective endpoint. + urlPath := parsedURL.Path + + // defaultPort selects the well-known port for the scheme when none is given explicitly. + defaultPort := 443 + if scheme == "http" { + defaultPort = 80 + } + + // port uses the explicit URL port when present, otherwise the scheme default. + port = defaultPort + if parsedURL.Port() != "" { + if parsedPort, parseErr := strconv.Atoi(parsedURL.Port()); parseErr == nil { + port = parsedPort } } - // 重新添加 scheme 构造完整的 endpoint - if (scheme == "http" && port == 80) || (scheme == "https" && port == 443) { - // 默认端口,不需要在 endpoint 中显示 - endpoint = scheme + "://" + host + // Rebuild the endpoint: omit the port when it is the scheme default, otherwise include + // it. net.JoinHostPort re-adds IPv6 brackets so the endpoint stays a valid URL. The + // preserved path is appended last so subpath GitLab endpoints survive the round-trip. + if port == defaultPort { + endpoint = scheme + "://" + bracketIfIPv6(host) } else { - // 非默认端口,需要在 endpoint 中显示 - endpoint = scheme + "://" + host + ":" + strconv.Itoa(port) + endpoint = scheme + "://" + net.JoinHostPort(host, strconv.Itoa(port)) } + endpoint += urlPath return endpoint, scheme, host, port } + +// bracketIfIPv6 wraps an IPv6 literal host in square brackets so it is valid inside a URL. +// IPv6 hosts contain colons; IPv4 and DNS names never do. +func bracketIfIPv6(host string) string { + if strings.Contains(host, ":") { + return "[" + host + "]" + } + return host +} diff --git a/internal/cli/cmd_test.go b/internal/cli/cmd_test.go new file mode 100644 index 0000000..27d9268 --- /dev/null +++ b/internal/cli/cmd_test.go @@ -0,0 +1,189 @@ +package cli + +import ( + "testing" + + "gopkg.in/yaml.v3" +) + +// TestParseGitLabHostURL verifies that parseGitLabHostURL handles IPv6 literals, +// IPv4 addresses, and DNS hostnames correctly, including default-port omission and +// IPv6 re-bracketing in the rebuilt endpoint. +func TestParseGitLabHostURL(t *testing.T) { + tests := []struct { + name string + input string + wantEndpoint string + wantScheme string + wantHost string + wantPort int + }{ + { + name: "IPv6 with explicit port", + input: "http://[2335::aa1:1415]:32336", + wantEndpoint: "http://[2335::aa1:1415]:32336", + wantScheme: "http", + wantHost: "2335::aa1:1415", + wantPort: 32336, + }, + { + name: "IPv6 https default port", + input: "https://[2001:db8::1]", + wantEndpoint: "https://[2001:db8::1]", + wantScheme: "https", + wantHost: "2001:db8::1", + wantPort: 443, + }, + { + name: "IPv6 http default port", + input: "http://[::1]", + wantEndpoint: "http://[::1]", + wantScheme: "http", + wantHost: "::1", + wantPort: 80, + }, + { + name: "IPv4 with explicit port", + input: "http://10.161.11.29:32739", + wantEndpoint: "http://10.161.11.29:32739", + wantScheme: "http", + wantHost: "10.161.11.29", + wantPort: 32739, + }, + { + name: "IPv4 hostname https default", + input: "https://gitlab.example.com", + wantEndpoint: "https://gitlab.example.com", + wantScheme: "https", + wantHost: "gitlab.example.com", + wantPort: 443, + }, + { + name: "hostname with explicit port", + input: "https://gitlab.example.com:8443", + wantEndpoint: "https://gitlab.example.com:8443", + wantScheme: "https", + wantHost: "gitlab.example.com", + wantPort: 8443, + }, + { + name: "IPv6 with port and trailing slash", + input: "http://[2335::aa1:1415]:32336/", + wantEndpoint: "http://[2335::aa1:1415]:32336", + wantScheme: "http", + wantHost: "2335::aa1:1415", + wantPort: 32336, + }, + { + name: "no scheme defaults to https", + input: "gitlab.example.com", + wantEndpoint: "https://gitlab.example.com", + wantScheme: "https", + wantHost: "gitlab.example.com", + wantPort: 443, + }, + { + name: "leading and trailing whitespace trimmed", + input: " https://gitlab.example.com:8443 ", + wantEndpoint: "https://gitlab.example.com:8443", + wantScheme: "https", + wantHost: "gitlab.example.com", + wantPort: 8443, + }, + { + // Issue 1: a bare IPv6 literal with no scheme and no brackets must not have its + // trailing group mistaken for a port; the whole value is the host. + name: "no scheme bare IPv6 literal", + input: "2335::aa1:1415", + wantEndpoint: "https://[2335::aa1:1415]", + wantScheme: "https", + wantHost: "2335::aa1:1415", + wantPort: 443, + }, + { + // Issue (codex r2): no-scheme bare IPv6 WITH a subpath must keep both host and + // path; the path must not leak into the IPv6 bracket detection. + name: "no scheme bare IPv6 literal with subpath", + input: "2335::aa1:1415/gitlab", + wantEndpoint: "https://[2335::aa1:1415]/gitlab", + wantScheme: "https", + wantHost: "2335::aa1:1415", + wantPort: 443, + }, + { + // Issue 2: a GitLab instance served under a subpath must keep its path. + name: "https hostname with subpath", + input: "https://gitlab.example.com/gitlab", + wantEndpoint: "https://gitlab.example.com/gitlab", + wantScheme: "https", + wantHost: "gitlab.example.com", + wantPort: 443, + }, + { + // Issue 2: IPv6 with an explicit port and a subpath keeps both bracketed host + // and path. + name: "IPv6 with port and subpath", + input: "https://[2001:db8::1]:8443/gl", + wantEndpoint: "https://[2001:db8::1]:8443/gl", + wantScheme: "https", + wantHost: "2001:db8::1", + wantPort: 8443, + }, + { + // Issue 3: a malformed URL must fall back without corrupting the endpoint + // (no doubled scheme, no doubled brackets). Endpoint stays the trimmed input. + name: "malformed URL falls back without corruption", + input: "http://[::1", + wantEndpoint: "http://[::1", + wantScheme: "http", + wantHost: "", + wantPort: 80, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + endpoint, scheme, host, port := parseGitLabHostURL(tt.input) + if endpoint != tt.wantEndpoint { + t.Errorf("endpoint = %q, want %q", endpoint, tt.wantEndpoint) + } + if scheme != tt.wantScheme { + t.Errorf("scheme = %q, want %q", scheme, tt.wantScheme) + } + if host != tt.wantHost { + t.Errorf("host = %q, want %q", host, tt.wantHost) + } + if port != tt.wantPort { + t.Errorf("port = %d, want %d", port, tt.wantPort) + } + }) + } +} + +// TestGeneratedHostValueIsValidYAML is the regression guard for the original failure: +// an IPv6 host rendered with brackets (e.g. "[2335::aa1:1415]:32336") makes YAML parse +// the value as a flow sequence and fail. The fixed parser returns a bare host, which must +// round-trip through yaml.v3 without error. +func TestGeneratedHostValueIsValidYAML(t *testing.T) { + endpoint, _, host, _ := parseGitLabHostURL("http://[2335::aa1:1415]:32336") + + // snippet mirrors how testing/config/gitlab-template.yaml renders endpoint and host. + snippet := "toolchains:\n" + + " gitlab:\n" + + " endpoint: " + endpoint + "\n" + + " host: " + host + "\n" + + var parsed map[string]interface{} + if err := yaml.Unmarshal([]byte(snippet), &parsed); err != nil { + t.Fatalf("generated YAML snippet failed to parse: %v\nsnippet:\n%s", err, snippet) + } + + // Confirm the host scalar survived parsing intact (bare IPv6, no brackets). + gitlab, ok := parsed["toolchains"].(map[string]interface{})["gitlab"].(map[string]interface{}) + if !ok { + t.Fatalf("unexpected YAML structure: %#v", parsed) + } + if got := gitlab["host"]; got != "2335::aa1:1415" { + t.Errorf("parsed host = %v, want %q", got, "2335::aa1:1415") + } +}