Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 68 additions & 37 deletions cmd/cluster/cad/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ package cad
import (
"context"
"fmt"
"slices"

"github.com/openshift/osdctl/cmd/setup"
"github.com/openshift/osdctl/pkg/k8s"
"github.com/openshift/osdctl/pkg/utils"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -15,6 +18,8 @@ import (
const (
cadClusterIDProd = "2fbi9mjhqpobh20ot5d7e5eeq3a8gfhs" // These IDs are hard-coded in app-interface
cadClusterIDStage = "2f9ghpikkv446iidcv7b92em2hgk13q9"
cadNamespaceProd = "configuration-anomaly-detection-production"
cadNamespaceStage = "configuration-anomaly-detection-stage"
)

var validInvestigations = []string{
Expand All @@ -40,6 +45,7 @@ type cadRunOptions struct {
investigation string
elevationReason string
environment string
isDryRun bool
}

func newCmdRun() *cobra.Command {
Expand All @@ -51,7 +57,7 @@ func newCmdRun() *cobra.Command {
Long: `Run a manual investigation on the Configuration Anomaly Detection (CAD) cluster.
This command schedules a Tekton PipelineRun on the appropriate CAD cluster (stage or production)
to run an investigation against a target cluster.
to run an investigation against a target cluster. The results will be written to a backplane report.
Prerequisites:
- Connected to the target cluster's OCM environment (production or stage)
Expand All @@ -61,17 +67,29 @@ Available Investigations:
chgm, cmbb, can-not-retrieve-updates, ai, cpd, etcd-quota-low,
insightsoperatordown, machine-health-check, must-gather, upgrade-config
Example:
# Run a change management investigation on a production cluster
osdctl cluster cad run \
--cluster-id 1a2b3c4d5e6f7g8h9i0j \
--investigation chgm \
--environment production \
--reason "OHSS-12345"
Examples:
` + "```bash" + `
# Run a change management investigation on a production cluster
osdctl cluster cad run \
--cluster-id 1a2b3c4d5e6f7g8h9i0j \
--investigation chgm \
--environment production \
--reason "OHSS-12345"
# Run a dry-run investigation (does not create a report)
osdctl cluster cad run \
--cluster-id 1a2b3c4d5e6f7g8h9i0j \
--investigation chgm \
--environment production \
--reason "OHSS-12345" \
--dry-run
` + "```" + `
Note:
After the investigation completes (may take several minutes), view results using:
osdctl cluster reports list -C <cluster-id> -l 1
` + "```bash" + `
osdctl cluster reports list -C <cluster-id> -l 1
` + "```" + `
You must be connected to the target cluster's OCM environment to view its reports.`,
Args: cobra.NoArgs,
Expand All @@ -83,9 +101,15 @@ Note:

runCmd.Flags().StringVarP(&opts.clusterID, "cluster-id", "C", "", "Cluster ID (internal or external)")
runCmd.Flags().StringVarP(&opts.investigation, "investigation", "i", "", "Investigation name")
runCmd.Flags().StringVarP(&opts.environment, "environment", "e", "", "Environment of the cluster we want to run the investigation on. Allowed values: \"stage\" or \"production\"")
runCmd.Flags().StringVarP(&opts.environment, "environment", "e", "", "Environment in which the target cluster runs. Allowed values: \"stage\" or \"production\"")
runCmd.Flags().BoolVarP(&opts.isDryRun, "dry-run", "d", false, "Dry-Run: Run the investigation with the dry-run flag. This will not create a report.")
runCmd.Flags().StringVar(&opts.elevationReason, "reason", "", "Provide a reason for running a manual investigation, used for backplane. Eg: 'OHSS-XXXX', or '#ITN-2024-XXXXX.")

_ = runCmd.MarkFlagRequired("cluster-id")
_ = runCmd.MarkFlagRequired("investigation")
_ = runCmd.MarkFlagRequired("environment")
_ = runCmd.MarkFlagRequired("reason")

_ = runCmd.RegisterFlagCompletionFunc("investigation", func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
return validInvestigations, cobra.ShellCompDirectiveNoFileComp
})
Expand All @@ -102,6 +126,9 @@ func (o *cadRunOptions) run() error {
return err
}

grafanaURL := viper.GetString(setup.CADGrafanaURL)
awsAccountID := viper.GetString(setup.CADAWSAccountID)

cadClusterID, cadNamespace := o.getCADClusterConfig()

// CAD clusters are always in production OCM, so explicitly create a production connection
Expand All @@ -123,42 +150,46 @@ func (o *cadRunOptions) run() error {
return fmt.Errorf("failed to schedule task: %w", err)
}

reportCmd := fmt.Sprintf("'osdctl cluster reports list -C %s -l 1'", o.clusterID)
fmt.Println("Successfully scheduled manual investigation. It can take several minutes until a report is available. Run this command to check the latest report for the results while being connected to the right OCM backplane environment. " + reportCmd)
// Get the generated name created by the API server
pipelineRunName := u.GetName()

var logsLink string
if grafanaURL != "" && awsAccountID != "" {
logsLink = fmt.Sprintf("%s/explore?schemaVersion=1&panes=%%7B%%22buh%%22:%%7B%%22datasource%%22:%%22P1A97A9592CB7F392%%22,%%22queries%%22:%%5B%%7B%%22id%%22:%%22%%22,%%22region%%22:%%22us-east-1%%22,%%22namespace%%22:%%22%%22,%%22refId%%22:%%22A%%22,%%22datasource%%22:%%7B%%22type%%22:%%22cloudwatch%%22,%%22uid%%22:%%22P1A97A9592CB7F392%%22%%7D,%%22queryMode%%22:%%22Logs%%22,%%22logGroups%%22:%%5B%%7B%%22arn%%22:%%22arn:aws:logs:us-east-1:%[2]s:log-group:cads01ue1.configuration-anomaly-detection-stage:%%2A%%22,%%22name%%22:%%22cads01ue1.configuration-anomaly-detection-stage%%22,%%22accountId%%22:%%22%[2]s%%22%%7D,%%7B%%22arn%%22:%%22arn:aws:logs:us-east-1:%[2]s:log-group:cadp01ue1.configuration-anomaly-detection-production:%%2A%%22,%%22name%%22:%%22cadp01ue1.configuration-anomaly-detection-production%%22,%%22accountId%%22:%%22%[2]s%%22%%7D%%5D,%%22expression%%22:%%22fields%%20message%%5Cn%%7C%%20filter%%20kubernetes.pod_name%%20like%%20%%5C%%22%s%%5C%%22%%22,%%22statsGroups%%22:%%5B%%5D%%7D%%5D,%%22range%%22:%%7B%%22from%%22:%%22now-1h%%22,%%22to%%22:%%22now%%22%%7D,%%22panelsState%%22:%%7B%%22logs%%22:%%7B%%22visualisationType%%22:%%22logs%%22%%7D%%7D%%7D%%7D&orgId=1", grafanaURL, awsAccountID, pipelineRunName)
}

if !o.isDryRun {
reportCmd := fmt.Sprintf("'osdctl cluster reports list -C %s -l 1'", o.clusterID)
msg := "Successfully scheduled manual investigation. It can take several minutes until a report is available. \n" +
"Run this command to check the latest report for the results while being connected to the right OCM backplane environment. " + reportCmd + " \n"

if logsLink != "" {
msg += "If a report fails to show up, check the TaskRun pod logs here after a few minutes: " + logsLink
} else {
msg += "To view TaskRun pod logs, configure 'cad_grafana_url' and 'cad_aws_account_id' using 'osdctl setup'"
}
fmt.Println(msg)
} else {
if logsLink != "" {
fmt.Println("Dry-run investigation scheduled. Check for logs here: ", logsLink)
} else {
fmt.Println("Dry-run investigation scheduled. To view logs, configure 'cad_grafana_url' and 'cad_aws_account_id' using 'osdctl setup'")
}
}

return nil
}

func (o *cadRunOptions) validate() error {
conn, err := utils.CreateConnection()
if err != nil {
return err
}
defer conn.Close()

if o.clusterID == "" {
return fmt.Errorf("cluster-id is required")
}

validInvestigation := false
for _, v := range validInvestigations {
if o.investigation == v {
validInvestigation = true
break
}
}
if !validInvestigation {
if !slices.Contains(validInvestigations, o.investigation) {
return fmt.Errorf("invalid investigation %q, must be one of: %v", o.investigation, validInvestigations)
}

validEnvironment := false
for _, v := range validEnvironments {
if o.environment == v {
validEnvironment = true
break
}
}
if !validEnvironment {
if !slices.Contains(validEnvironments, o.environment) {
return fmt.Errorf("invalid environment %q, must be one of: %v", o.environment, validEnvironments)
}

Expand All @@ -171,9 +202,9 @@ func (o *cadRunOptions) validate() error {

func (o *cadRunOptions) getCADClusterConfig() (clusterID, namespace string) {
if o.environment == "stage" {
return cadClusterIDStage, "configuration-anomaly-detection-stage"
return cadClusterIDStage, cadNamespaceStage
}
return cadClusterIDProd, "configuration-anomaly-detection-production"
return cadClusterIDProd, cadNamespaceProd
}

func (o *cadRunOptions) pipelineRunTemplate(cadNamespace string) *unstructured.Unstructured {
Expand All @@ -197,7 +228,7 @@ func (o *cadRunOptions) pipelineRunTemplate(cadNamespace string) *unstructured.U
},
{
"name": "dry-run",
"value": "false",
"value": o.isDryRun,
},
},
"pipelineRef": map[string]interface{}{
Expand Down
127 changes: 126 additions & 1 deletion cmd/cluster/cad/run_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package cad

import (
"strings"
"testing"

"github.com/spf13/viper"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -56,29 +58,41 @@ func TestPipelineRunTemplate(t *testing.T) {
clusterID string
investigation string
cadNamespace string
isDryRun bool
expectedNamespace string
}{
{
name: "basic pipeline run",
clusterID: "test-cluster-123",
investigation: "chgm",
cadNamespace: "configuration-anomaly-detection-production",
isDryRun: false,
expectedNamespace: "configuration-anomaly-detection-production",
},
{
name: "stage environment pipeline run",
clusterID: "stage-cluster-456",
investigation: "cmbb",
cadNamespace: "configuration-anomaly-detection-stage",
isDryRun: false,
expectedNamespace: "configuration-anomaly-detection-stage",
},
{
name: "dry-run pipeline run",
clusterID: "test-cluster-789",
investigation: "ai",
cadNamespace: "configuration-anomaly-detection-production",
isDryRun: true,
expectedNamespace: "configuration-anomaly-detection-production",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
opts := &cadRunOptions{
clusterID: tt.clusterID,
investigation: tt.investigation,
isDryRun: tt.isDryRun,
}

result := opts.pipelineRunTemplate(tt.cadNamespace)
Expand All @@ -103,7 +117,118 @@ func TestPipelineRunTemplate(t *testing.T) {
assert.Equal(t, tt.investigation, params[1]["value"], "investigation value should match")

assert.Equal(t, "dry-run", params[2]["name"], "third param should be dry-run")
assert.Equal(t, "false", params[2]["value"], "dry-run should be false")
assert.Equal(t, tt.isDryRun, params[2]["value"], "dry-run value should match")
})
}
}

func TestLogsLinkGeneration(t *testing.T) {
tests := []struct {
name string
grafanaURL string
awsAccountID string
pipelineRunName string
expectLogsLink bool
expectedURLContains string
expectedMessage string
}{
{
name: "both config values set",
grafanaURL: "https://grafana.example.com",
awsAccountID: "123456789012",
pipelineRunName: "cad-manual-xyz123",
expectLogsLink: true,
expectedURLContains: "https://grafana.example.com/explore",
expectedMessage: "",
},
{
name: "grafana URL missing",
grafanaURL: "",
awsAccountID: "123456789012",
pipelineRunName: "cad-manual-xyz123",
expectLogsLink: false,
expectedURLContains: "",
expectedMessage: "To view TaskRun pod logs, configure 'cad_grafana_url' and 'cad_aws_account_id' using 'osdctl setup'",
},
{
name: "AWS account ID missing",
grafanaURL: "https://grafana.example.com",
awsAccountID: "",
pipelineRunName: "cad-manual-xyz123",
expectLogsLink: false,
expectedURLContains: "",
expectedMessage: "To view TaskRun pod logs, configure 'cad_grafana_url' and 'cad_aws_account_id' using 'osdctl setup'",
},
{
name: "both config values missing",
grafanaURL: "",
awsAccountID: "",
pipelineRunName: "cad-manual-xyz123",
expectLogsLink: false,
expectedURLContains: "",
expectedMessage: "To view TaskRun pod logs, configure 'cad_grafana_url' and 'cad_aws_account_id' using 'osdctl setup'",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Reset viper config before each test
viper.Reset()

// Set config values
if tt.grafanaURL != "" {
viper.Set("cad_grafana_url", tt.grafanaURL)
}
if tt.awsAccountID != "" {
viper.Set("cad_aws_account_id", tt.awsAccountID)
}

// Simulate the logs link generation logic from run.go
grafanaURL := viper.GetString("cad_grafana_url")
awsAccountID := viper.GetString("cad_aws_account_id")

if tt.expectLogsLink {
assert.NotEmpty(t, grafanaURL, "grafana URL should be set")
assert.NotEmpty(t, awsAccountID, "AWS account ID should be set")

// Verify the logs link would be generated correctly
if grafanaURL != "" && awsAccountID != "" {
// Simple check that the URL would be constructed
assert.Contains(t, tt.expectedURLContains, grafanaURL, "grafana URL should be in the expected URL")
}
} else {
// Verify that at least one config value is missing
assert.True(t, grafanaURL == "" || awsAccountID == "", "at least one config value should be missing")
}
})
}
}

func TestLogsLinkURLConstruction(t *testing.T) {
// Test that the logs link URL is properly constructed with all required parameters
viper.Reset()
viper.Set("cad_grafana_url", "https://grafana.test.com")
viper.Set("cad_aws_account_id", "999888777666")

grafanaURL := viper.GetString("cad_grafana_url")
awsAccountID := viper.GetString("cad_aws_account_id")
pipelineRunName := "cad-manual-test123"

// Construct a simplified version of the logs link to verify format
if grafanaURL != "" && awsAccountID != "" {
// The actual URL is very long, so we'll just verify the key components
assert.Equal(t, "https://grafana.test.com", grafanaURL)
assert.Equal(t, "999888777666", awsAccountID)
assert.NotEmpty(t, pipelineRunName)

// Verify that all account IDs would be included (there are 4 occurrences in the URL)
expectedAccountIDCount := 4
actualCount := strings.Count(
strings.Repeat(awsAccountID+" ", expectedAccountIDCount),
awsAccountID,
)
assert.Equal(t, expectedAccountIDCount, actualCount, "should have 4 account ID references in the URL")
} else {
t.Fatal("Expected config values to be set")
}
}
Loading