Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .ci-operator.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
build_root_image:
name: boilerplate
namespace: openshift
tag: image-v8.3.2
tag: image-v8.3.3
2 changes: 1 addition & 1 deletion boilerplate/_data/backing-image-tag
Original file line number Diff line number Diff line change
@@ -1 +1 @@
image-v8.3.2
image-v8.3.3
2 changes: 1 addition & 1 deletion build/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM quay.io/redhat-services-prod/openshift/boilerplate:image-v8.3.2 AS builder
FROM quay.io/redhat-services-prod/openshift/boilerplate:image-v8.3.3 AS builder

RUN mkdir -p /workdir
WORKDIR /workdir
Expand Down
24 changes: 22 additions & 2 deletions test/e2e/certman_operator_tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"github.com/openshift/osde2e-common/pkg/clients/openshift"
corev1 "k8s.io/api/core/v1"
apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
"k8s.io/apimachinery/pkg/api/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

Expand Down Expand Up @@ -568,6 +567,27 @@ var _ = ginkgo.Describe("Certman Operator", ginkgo.Ordered, ginkgo.ContinueOnFai
certificateSecretName, err := utils.GetCertificateSecretNameFromCR(certificateRequest)
gomega.Expect(err).ShouldNot(gomega.HaveOccurred(), "CertificateRequest should have certificateSecret name")

// Perform DNS-01 challenge verification before certificate issuance
ginkgo.GinkgoLogr.Info("Starting DNS-01 challenge verification")

// Get acmeDNSDomain from CertificateRequest
acmeDNSDomain, found, err := unstructured.NestedString(certificateRequest.Object, "spec", "acmeDNSDomain")
if !found || err != nil {
ginkgo.GinkgoLogr.Info("acmeDNSDomain not found in CertificateRequest, using BaseDomain",
"baseDomain", certConfig.BaseDomain)
acmeDNSDomain = certConfig.BaseDomain
}

// Perform DNS-01 challenge test using operator's functions
verified, err := utils.PerformDNS01ChallengeTest(ctx, k8s.GetConfig(), scheme, certificateRequest, certConfig.TestNamespace, clusterDeploymentName, acmeDNSDomain)
if err != nil {
ginkgo.GinkgoLogr.Error(err, "DNS-01 challenge test failed")
gomega.Expect(verified).To(gomega.BeTrue(), "DNS-01 challenge should complete successfully: %v", err)
} else {
gomega.Expect(verified).To(gomega.BeTrue(), "DNS-01 challenge should complete successfully")
}
ginkgo.GinkgoLogr.Info("DNS-01 challenge verification completed successfully")

ginkgo.GinkgoLogr.Info("Looking for certificate secret",
"secretName", certificateSecretName,
"namespace", certConfig.TestNamespace)
Expand Down Expand Up @@ -707,7 +727,7 @@ var _ = ginkgo.Describe("Certman Operator", ginkgo.Ordered, ginkgo.ContinueOnFai
pollInterval := 30 * time.Second

originalSecret, err := clientset.CoreV1().Secrets(namespace_certman_operator).Get(ctx, secretNameToDelete, metav1.GetOptions{})
if errors.IsNotFound(err) {
if apierrors.IsNotFound(err) {
log.Log.Info("Secret does not exist, skipping deletion test.")
return
}
Expand Down
236 changes: 236 additions & 0 deletions test/e2e/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,30 @@ import (
"fmt"
"io"
"log"
"net"
"net/http"
"net/url"
"os"
"strings"
"time"

aws_sdk "github.com/aws/aws-sdk-go/aws"
aws_config "github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/route53"
"github.com/go-logr/logr"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
certmanv1alpha1 "github.com/openshift/certman-operator/api/v1alpha1"
awsclient "github.com/openshift/certman-operator/pkg/clients/aws"
hivev1 "github.com/openshift/hive/apis/hive/v1"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
syaml "k8s.io/apimachinery/pkg/runtime/serializer/yaml"
"k8s.io/apimachinery/pkg/util/yaml"
Expand All @@ -34,6 +43,7 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/restmapper"
"sigs.k8s.io/controller-runtime/pkg/client"
logs "sigs.k8s.io/controller-runtime/pkg/log"
)

Expand Down Expand Up @@ -1639,3 +1649,229 @@ func CleanupCertmanResources(ctx context.Context, dynamicClient dynamic.Interfac

return nil
}

// PerformDNS01ChallengeTest simulates a complete DNS-01 challenge workflow using operator functions:
// 1. Uses operator's AnswerDNSChallenge to create DNS TXT record in Route53
// 2. Verifies the record by querying Route53 nameservers directly
// 3. Uses operator's DeleteAcmeChallengeResourceRecords to cleanup
// Returns true if the complete flow succeeds, false otherwise.
func PerformDNS01ChallengeTest(ctx context.Context, cfg *rest.Config, scheme *runtime.Scheme, certificateRequestUnstructured *unstructured.Unstructured, namespace string, clusterDeploymentName string, domain string) (bool, error) {
log.Println("Starting DNS-01 challenge test for domain:", domain)

// Convert unstructured CertificateRequest to typed
var cr certmanv1alpha1.CertificateRequest
err := runtime.DefaultUnstructuredConverter.FromUnstructured(certificateRequestUnstructured.Object, &cr)
if err != nil {
return false, fmt.Errorf("failed to convert CertificateRequest: %w", err)
}

// Extract AWS configuration from CertificateRequest
if cr.Spec.Platform.AWS == nil {
return false, fmt.Errorf("CertificateRequest does not have AWS platform configured")
}

awsRegion := cr.Spec.Platform.AWS.Region
awsSecretName := cr.Spec.Platform.AWS.Credentials.Name

// Validate AWS configuration
if awsRegion == "" {
return false, fmt.Errorf("AWS region is empty in CertificateRequest spec")
}
if awsSecretName == "" {
return false, fmt.Errorf("AWS credentials secret name is empty in CertificateRequest spec")
}

// Create logger
reqLogger := logr.Discard() // Use discard logger for simplicity

// Add required types to the scheme if not already registered
_ = corev1.AddToScheme(scheme)
_ = certmanv1alpha1.AddToScheme(scheme)
_ = hivev1.AddToScheme(scheme) // Required for ClusterDeployment

// Create controller-runtime client using the scheme
runtimeClient, err := client.New(cfg, client.Options{Scheme: scheme})
if err != nil {
return false, fmt.Errorf("failed to create controller-runtime client: %w", err)
}

// Create operator's AWS client using the reused runtime client
log.Printf("Creating AWS client with region: %s, secret: %s", awsRegion, awsSecretName)
awsClient, err := awsclient.NewClient(reqLogger, runtimeClient, awsSecretName, namespace, awsRegion, clusterDeploymentName)
if err != nil {
return false, fmt.Errorf("failed to create AWS client: %w", err)
}

// Get hosted zone ID from environment or find it automatically
hostedZoneID := os.Getenv("HOSTED_ZONE_ID")
if hostedZoneID == "" {
log.Println("HOSTED_ZONE_ID not set, attempting to find hosted zone automatically...")
// Create a Route53 client for finding hosted zone
sess, err := aws_config.NewSession(&aws_sdk.Config{
Region: aws_sdk.String(awsRegion),
})
if err != nil {
return false, fmt.Errorf("failed to create AWS session: %w", err)
}
route53Client := route53.New(sess)
hostedZoneID, err = findHostedZoneID(route53Client, domain)
if err != nil {
return false, fmt.Errorf("failed to find hosted zone for domain %s: %w", domain, err)
}
log.Printf("Found hosted zone ID: %s", hostedZoneID)
}

// Generate test ACME challenge token
testToken := fmt.Sprintf("certman-dns01-test-%d", time.Now().Unix())

log.Printf("Creating DNS challenge record for domain: %s with token: %s", domain, testToken)

// Save original DnsNames and temporarily set to acmeDNSDomain for test
originalDnsNames := cr.Spec.DnsNames
cr.Spec.DnsNames = []string{domain}

// Step 1: Use operator's AnswerDNSChallenge to create DNS record
fqdn, err := awsClient.AnswerDNSChallenge(reqLogger, testToken, domain, &cr, hostedZoneID)
if err != nil {
return false, fmt.Errorf("failed to create DNS challenge record using operator function: %w", err)
}

log.Printf("DNS challenge record created successfully: %s", fqdn)

// Step 2: Verify DNS propagation by querying Route53 nameservers directly
log.Println("Verifying DNS record via Route53 nameservers...")
// Create Route53 client for verification
sess, err := aws_config.NewSession(&aws_sdk.Config{
Region: aws_sdk.String(awsRegion),
})
if err != nil {
return false, fmt.Errorf("failed to create AWS session for verification: %w", err)
}
route53Client := route53.New(sess)

verified, verifyErr := verifyDNSRecord(route53Client, hostedZoneID, fqdn, testToken)
if verifyErr != nil {
log.Printf("DNS verification failed: %v", verifyErr)
// Continue to cleanup even if verification failed
} else if !verified {
log.Println("DNS record not found in Route53 nameservers")
} else {
log.Println("DNS record verified successfully in Route53 nameservers")
}

// Step 3: Cleanup using operator's DeleteAcmeChallengeResourceRecords
log.Println("Cleaning up DNS challenge record...")
err = awsClient.DeleteAcmeChallengeResourceRecords(reqLogger, &cr)
if err != nil {
return false, fmt.Errorf("failed to cleanup DNS challenge record using operator function: %w", err)
}

log.Println("DNS challenge record cleaned up successfully")

// Restore original DnsNames
cr.Spec.DnsNames = originalDnsNames

if !verified {
if verifyErr != nil {
return false, fmt.Errorf("DNS-01 challenge failed: %w", verifyErr)
}
return false, fmt.Errorf("DNS-01 challenge failed: record created but DNS verification failed")
}

return true, nil
}

// verifyDNSRecord queries Route53's authoritative nameservers directly to verify a TXT record
// This bypasses cluster DNS and recursive resolvers, ensuring we get the authoritative answer
func verifyDNSRecord(r53Client *route53.Route53, hostedZoneID string, recordName string, expectedValue string) (bool, error) {
// Step 1: Get the hosted zone to retrieve nameservers
zone, err := r53Client.GetHostedZone(&route53.GetHostedZoneInput{
Id: aws_sdk.String(hostedZoneID),
})
if err != nil {
return false, fmt.Errorf("failed to get hosted zone: %w", err)
}

// Check if we have nameservers
if zone.DelegationSet == nil || len(zone.DelegationSet.NameServers) == 0 {
return false, fmt.Errorf("no nameservers found for hosted zone")
}

// Use the first nameserver
nameserver := *zone.DelegationSet.NameServers[0]
log.Printf("Querying Route53 nameserver directly: %s", nameserver)

// Step 2: Query the nameserver directly
maxRetries := 10
retryInterval := 2 * time.Second

for i := 0; i < maxRetries; i++ {
if i > 0 {
log.Printf("DNS verification attempt %d/%d...", i+1, maxRetries)
time.Sleep(retryInterval)
}

// Create custom resolver that queries the specific nameserver
resolver := &net.Resolver{
PreferGo: true,
Dial: func(ctx context.Context, network, address string) (net.Conn, error) {
d := net.Dialer{
Timeout: time.Second * 10,
}
// Force query to go to Route53's nameserver
return d.DialContext(ctx, network, nameserver+":53")
},
}

// Query the TXT record
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
txtRecords, err := resolver.LookupTXT(ctx, recordName)
cancel()

if err != nil {
log.Printf("DNS lookup failed (attempt %d/%d): %v", i+1, maxRetries, err)
continue
}

// Check if our expected value exists
for _, record := range txtRecords {
if record == expectedValue {
log.Printf("DNS record verified on Route53 nameserver: found expected value '%s'", expectedValue)
return true, nil
}
}

log.Printf("Record found but value mismatch. Expected: '%s', Got: %v (attempt %d/%d)",
expectedValue, txtRecords, i+1, maxRetries)
}

return false, fmt.Errorf("DNS record verification timed out after %d attempts", maxRetries)
}

// findHostedZoneID queries Route53 to find the hosted zone ID for a given domain
func findHostedZoneID(r53Client *route53.Route53, domain string) (string, error) {
// Ensure domain has trailing dot for Route53 comparison
if !strings.HasSuffix(domain, ".") {
domain = domain + "."
}

// List all hosted zones
input := &route53.ListHostedZonesInput{}
result, err := r53Client.ListHostedZones(input)
if err != nil {
return "", fmt.Errorf("failed to list hosted zones: %w", err)
}

// Find matching hosted zone
for _, zone := range result.HostedZones {
if zone.Name != nil && *zone.Name == domain {
// Extract zone ID (remove "/hostedzone/" prefix if present)
zoneID := *zone.Id
zoneID = strings.TrimPrefix(zoneID, "/hostedzone/")
log.Printf("Found hosted zone: %s for domain: %s", zoneID, domain)
return zoneID, nil
}
}
Comment on lines 1859 to 1874
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

cat -n test/e2e/utils/utils.go | sed -n '1848,1863p'

Repository: openshift/certman-operator

Length of output: 715


🌐 Web query:

AWS Route 53 ListHostedZones API pagination limit per response

💡 Result:

For the Route 53 ListHostedZones API, the pagination limit is 100 hosted zones per response. You can control the page size with the maxitems parameter, but it can only return results in groups of up to 100; if there are more, the response is truncated and you continue with NextMarker (passed back as marker in the next request). [1]

Answer: Max 100 hosted zones per response (via maxitems, up to 100). [1]

Sources: [1]


🏁 Script executed:

cd test/e2e/utils && rg -A 20 "func findHostedZoneID" utils.go

Repository: openshift/certman-operator

Length of output: 826


🏁 Script executed:

rg "ListHostedZonesPages" --type go

Repository: openshift/certman-operator

Length of output: 52


🌐 Web query:

AWS SDK for Go ListHostedZonesPages method pagination route53

💡 Result:

In the AWS SDK for Go v1 (github.com/aws/aws-sdk-go/service/route53), ListHostedZonesPages is the built-in paginator for the Route 53 ListHostedZones API. It repeatedly calls the API and invokes your callback once per page; return false to stop early. [1]

SDK for Go v1 (ListHostedZonesPages)

svc := route53.New(sess)

err := svc.ListHostedZonesPages(&route53.ListHostedZonesInput{
    MaxItems: aws.String("100"), // optional
}, func(out *route53.ListHostedZonesOutput, lastPage bool) bool {
    for _, hz := range out.HostedZones {
        fmt.Println(aws.StringValue(hz.Name), aws.StringValue(hz.Id))
    }
    return true // keep paging
})

lastPage is true on the final page. [1]

Under the hood, the API’s pagination is driven by IsTruncated and NextMarker; if IsTruncated is true, the next request should use Marker = NextMarker. [2]

Equivalent in SDK for Go v2 (recommended)

In aws-sdk-go-v2, there is no ListHostedZonesPages; use route53.NewListHostedZonesPaginator with HasMorePages/NextPage. [3]

p := route53.NewListHostedZonesPaginator(client, &route53.ListHostedZonesInput{
    MaxItems: aws.Int32(100), // optional
})

for p.HasMorePages() {
    out, err := p.NextPage(ctx)
    if err != nil { return err }
    for _, hz := range out.HostedZones {
        fmt.Println(*hz.Name, *hz.Id)
    }
}

Sources: Route 53 ListHostedZones code/examples and API usage notes. [4]

References: [1] [2] [3] [4]


🏁 Script executed:

find . -name "*.go" -type f -exec grep -l "route53" {} \; | head -5 | xargs grep -B 2 -A 2 "ListHostedZonesPages"

Repository: openshift/certman-operator

Length of output: 108


🏁 Script executed:

grep -i "github.com/aws/aws-sdk-go" go.mod | head -20

Repository: openshift/certman-operator

Length of output: 105


🏁 Script executed:

head -50 test/e2e/utils/utils.go | grep -E "^import|route53"

Repository: openshift/certman-operator

Length of output: 123


Replace ListHostedZones with ListHostedZonesPages to handle pagination across all hosted zones.

The AWS Route 53 API returns a maximum of 100 hosted zones per response. The current implementation makes a single call and stops, silently returning "no hosted zone found" if the target zone exists on a later page. For accounts with more than 100 zones, this will fail silently.

Use the AWS SDK's ListHostedZonesPages paginator method, which automatically handles pagination by invoking the callback for each page until the zone is found or all zones are exhausted.

Proposed fix
 func findHostedZoneID(client *route53.Route53, domain string) (string, error) {
 	if !strings.HasSuffix(domain, ".") {
 		domain = domain + "."
 	}
 
-	input := &route53.ListHostedZonesInput{}
-	result, err := client.ListHostedZones(input)
-	if err != nil {
-		return "", fmt.Errorf("failed to list hosted zones: %w", err)
-	}
-
-	for _, zone := range result.HostedZones {
-		if zone.Name != nil && *zone.Name == domain {
-			zoneID := *zone.Id
-			zoneID = strings.TrimPrefix(zoneID, "/hostedzone/")
-			log.Printf("Found hosted zone: %s for domain: %s", zoneID, domain)
-			return zoneID, nil
-		}
-	}
-
-	return "", fmt.Errorf("no hosted zone found for domain: %s", domain)
+	var foundID string
+	err := client.ListHostedZonesPages(&route53.ListHostedZonesInput{}, func(page *route53.ListHostedZonesOutput, _ bool) bool {
+		for _, zone := range page.HostedZones {
+			if zone.Name != nil && *zone.Name == domain {
+				foundID = strings.TrimPrefix(*zone.Id, "/hostedzone/")
+				log.Printf("Found hosted zone: %s for domain: %s", foundID, domain)
+				return false // stop paging
+			}
+		}
+		return true // continue paging
+	})
+	if err != nil {
+		return "", fmt.Errorf("failed to list hosted zones: %w", err)
+	}
+	if foundID == "" {
+		return "", fmt.Errorf("no hosted zone found for domain: %s", domain)
+	}
+	return foundID, nil
 }
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/e2e/utils/utils.go` around lines 1848 - 1863, The current lookup uses
client.ListHostedZones and only inspects the first page (result.HostedZones),
which misses zones beyond 100; replace the single call with
client.ListHostedZonesPages, iterate pages in the provided callback checking
each page.HostedZones for a matching zone.Name==domain, extract and TrimPrefix
the zone.Id as before, return the zoneID and nil immediately when found, and
propagate any error from the paginator (and from the initial call) so the
function fails instead of silently returning no match.


return "", fmt.Errorf("no hosted zone found for domain: %s", domain)
}