Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 132 additions & 142 deletions cli/cmd/health_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@ import (
"strings"
"time"

"github.com/dreadnode/dreadgoad/internal/labmap"
"github.com/fatih/color"
"github.com/spf13/cobra"
)

var healthCheckCmd = &cobra.Command{
Use: "health-check",
Short: "Verify all GOAD instances are healthy",
Long: `Runs health checks across all GOAD instances via SSM to verify:
Short: "Verify all lab instances are healthy",
Long: `Runs health checks across all lab instances via SSM to verify:
- Domain controllers are responding
- AD replication is working with no failures
- Domain trusts are established
Expand All @@ -40,7 +41,7 @@ type healthCheck struct {
func runHealthCheck(cmd *cobra.Command, args []string) error {
ctx := context.Background()

title := " GOAD Health Check "
title := " Lab Health Check "
pad := 90 - len(title)
left := pad / 2
right := pad - left
Expand All @@ -54,16 +55,15 @@ func runHealthCheck(cmd *cobra.Command, args []string) error {
fmt.Printf("%-40s %-10s %s\n", "CHECK", "STATUS", "DETAIL")
fmt.Println(strings.Repeat("-", 90))

checks := buildChecks()
checks := buildChecks(infra.Lab)

passed := 0
failed := 0

for _, check := range checks {
instanceID, ok := infra.HostMap[check.host]
if !ok {
color.Red("%-40s %-10s %s", check.name, "SKIP", "instance not found")
failed++
color.Yellow("%-40s %-10s %s", check.name, "SKIP", "instance not found")
continue
}

Expand Down Expand Up @@ -98,129 +98,134 @@ func runHealthCheck(cmd *cobra.Command, args []string) error {
return nil
}

func buildChecks() []healthCheck {
return []healthCheck{
// DC01 - AD responding
{
name: "DC01 AD Domain Controller",
host: "DC01",
command: `(Get-ADDomainController -Filter *).Name -join ','`,
eval: nonEmptyEval("no domain controllers returned"),
},
// DC01 - Replication
{
name: "DC01 AD Replication",
host: "DC01",
command: `$r = repadmin /replsummary 2>&1 | Out-String; if ($r -match 'fails/total.*[1-9]\d*/') { Write-Output "REPL_ERRORS:$r" } else { Write-Output "REPL_OK" }`,
eval: replEval,
},
// DC01 - Trusts
{
name: "DC01 Domain Trusts",
host: "DC01",
command: `Get-ADTrust -Filter * | ForEach-Object { "$($_.Name)|$($_.Direction)|$($_.TrustType)" }`,
eval: dc01TrustsEval,
},
// DC02 - AD responding
{
name: "DC02 AD Domain Controller",
host: "DC02",
command: `(Get-ADDomainController -Filter *).Name -join ','`,
eval: nonEmptyEval("no domain controllers returned"),
},
// DC02 - DNS cross-domain
{
name: "DC02 DNS (sevenkingdoms.local)",
host: "DC02",
command: `(Resolve-DnsName kingslanding.sevenkingdoms.local -ErrorAction Stop).IPAddress`,
eval: nonEmptyEval("DNS resolution failed"),
},
{
name: "DC02 DNS (essos.local)",
host: "DC02",
command: `(Resolve-DnsName meereen.essos.local -ErrorAction Stop).IPAddress`,
eval: nonEmptyEval("DNS resolution failed"),
},
// DC03 - AD responding
{
name: "DC03 AD Domain Controller",
host: "DC03",
command: `(Get-ADDomainController -Filter *).Name -join ','`,
eval: nonEmptyEval("no domain controllers returned"),
},
// DC03 - Forest trust
{
name: "DC03 Forest Trust",
host: "DC03",
command: `Get-ADTrust -Filter * | ForEach-Object { "$($_.Name)|$($_.ForestTransitive)" }`,
eval: forestTrustEval,
},
// SRV02 - Domain membership
{
name: "SRV02 Domain Membership",
host: "SRV02",
command: `(Get-WmiObject Win32_ComputerSystem).Domain`,
eval: nonEmptyEval("not domain-joined"),
},
// SRV02 - DC reachable
{
name: "SRV02 DC Locator",
host: "SRV02",
command: `$r = nltest /dsgetdc: 2>&1 | Out-String; if ($r -match 'DC: \\\\(\S+)') { Write-Output $Matches[1] } else { Write-Output "FAIL" }`,
eval: dcLocatorEval,
},
// SRV02 - IIS
{
name: "SRV02 IIS (W3SVC)",
host: "SRV02",
command: `(Get-Service W3SVC -ErrorAction SilentlyContinue).Status`,
eval: serviceRunningEval,
},
// SRV02 - MSSQL
{
name: "SRV02 MSSQL",
host: "SRV02",
command: `(Get-Service 'MSSQL$SQLEXPRESS' -ErrorAction SilentlyContinue).Status`,
eval: serviceRunningEval,
},
// SRV03 - Domain membership
{
name: "SRV03 Domain Membership",
host: "SRV03",
command: `(Get-WmiObject Win32_ComputerSystem).Domain`,
eval: nonEmptyEval("not domain-joined"),
},
// SRV03 - DC reachable
{
name: "SRV03 DC Locator",
host: "SRV03",
command: `$r = nltest /dsgetdc: 2>&1 | Out-String; if ($r -match 'DC: \\\\(\S+)') { Write-Output $Matches[1] } else { Write-Output "FAIL" }`,
eval: dcLocatorEval,
},
// SRV03 - IIS
{
name: "SRV03 IIS (W3SVC)",
host: "SRV03",
func buildChecks(lab *labmap.LabMap) []healthCheck {
var checks []healthCheck

// For each DC: AD responding + replication
for _, role := range lab.DCs() {
host := strings.ToUpper(role)
checks = append(checks,
healthCheck{
name: fmt.Sprintf("%s AD Domain Controller", host),
host: host,
command: `(Get-ADDomainController -Filter *).Name -join ','`,
eval: nonEmptyEval("no domain controllers returned"),
},
healthCheck{
name: fmt.Sprintf("%s AD Replication", host),
host: host,
command: `$r = repadmin /replsummary 2>&1 | Out-String; if ($r -match 'fails/total.*[1-9]\d*/') { Write-Output "REPL_ERRORS:$r" } else { Write-Output "REPL_OK" }`,
eval: replEval,
},
)
}

// Trust checks — derived from config
for _, tf := range lab.DomainTrusts() {
if tf.SourceDCRole != "" {
srcHost := strings.ToUpper(tf.SourceDCRole)
checks = append(checks, healthCheck{
name: fmt.Sprintf("%s Trusts (%s)", srcHost, tf.TargetDomain),
host: srcHost,
command: `Get-ADTrust -Filter * | ForEach-Object { "$($_.Name)|$($_.Direction)|$($_.TrustType)" }`,
eval: trustContainsEval(tf.TargetDomain),
})
}
if tf.TargetDCRole != "" {
tgtHost := strings.ToUpper(tf.TargetDCRole)
checks = append(checks, healthCheck{
name: fmt.Sprintf("%s Trusts (%s)", tgtHost, tf.SourceDomain),
host: tgtHost,
command: `Get-ADTrust -Filter * | ForEach-Object { "$($_.Name)|$($_.Direction)|$($_.TrustType)" }`,
eval: trustContainsEval(tf.SourceDomain),
})
}
}

// Cross-domain DNS resolution between DCs on different domains
dcRoles := lab.DCs()
for i := 0; i < len(dcRoles); i++ {
for j := i + 1; j < len(dcRoles); j++ {
roleA, roleB := dcRoles[i], dcRoles[j]
domainA := lab.DomainForHost(roleA)
domainB := lab.DomainForHost(roleB)
if domainA == domainB {
continue
}
fqdnA := lab.FQDN(roleA)
fqdnB := lab.FQDN(roleB)
hostA := strings.ToUpper(roleA)
hostB := strings.ToUpper(roleB)

if fqdnB != "" {
checks = append(checks, healthCheck{
name: fmt.Sprintf("%s DNS (%s)", hostA, domainB),
host: hostA,
command: fmt.Sprintf(`(Resolve-DnsName %s -ErrorAction Stop).IPAddress`, fqdnB),
eval: nonEmptyEval("DNS resolution failed"),
})
}
if fqdnA != "" {
checks = append(checks, healthCheck{
name: fmt.Sprintf("%s DNS (%s)", hostB, domainA),
host: hostB,
command: fmt.Sprintf(`(Resolve-DnsName %s -ErrorAction Stop).IPAddress`, fqdnA),
eval: nonEmptyEval("DNS resolution failed"),
})
}
}
}

// Windows servers: domain membership + DC locator + services
for _, role := range lab.WindowsServers() {
host := strings.ToUpper(role)

checks = append(checks,
healthCheck{
name: fmt.Sprintf("%s Domain Membership", host),
host: host,
command: `(Get-WmiObject Win32_ComputerSystem).Domain`,
eval: nonEmptyEval("not domain-joined"),
},
healthCheck{
name: fmt.Sprintf("%s DC Locator", host),
host: host,
command: `$r = nltest /dsgetdc: 2>&1 | Out-String; if ($r -match 'DC: \\\\(\S+)') { Write-Output $Matches[1] } else { Write-Output "FAIL" }`,
eval: dcLocatorEval,
},
)

// IIS check (optional — passes if not installed)
checks = append(checks, healthCheck{
name: fmt.Sprintf("%s IIS (W3SVC)", host),
host: host,
command: `(Get-Service W3SVC -ErrorAction SilentlyContinue).Status`,
eval: serviceRunningEval,
},
// SRV03 - MSSQL
{
name: "SRV03 MSSQL",
host: "SRV03",
command: `(Get-Service 'MSSQL$SQLEXPRESS' -ErrorAction SilentlyContinue).Status`,
eval: serviceRunningEval,
},
eval: optionalServiceEval,
})
}

// MSSQL on hosts that have it configured
for _, role := range lab.HostsWithMSSQL() {
host := strings.ToUpper(role)
checks = append(checks, healthCheck{
name: fmt.Sprintf("%s MSSQL", host),
host: host,
command: `(Get-Service 'MSSQL$SQLEXPRESS','MSSQLSERVER' -ErrorAction SilentlyContinue | Where-Object {$_.Status -eq 'Running'}).Name`,
eval: nonEmptyEval("MSSQL not running"),
})
}

return checks
}

func serviceRunningEval(stdout string) (bool, string) {
// optionalServiceEval passes if running, skips (passes) if not installed.
func optionalServiceEval(stdout string) (bool, string) {
val := strings.TrimSpace(strings.ToLower(stdout))
if val == "running" {
return true, "running"
}
if val == "" {
return false, "service not found"
return true, "not installed (OK)"
}
return false, val
}
Expand All @@ -243,29 +248,14 @@ func replEval(stdout string) (bool, string) {
return false, "replication errors detected"
}

func dc01TrustsEval(stdout string) (bool, string) {
lower := strings.ToLower(stdout)
hasNorth := strings.Contains(lower, "north.sevenkingdoms.local")
hasEssos := strings.Contains(lower, "essos.local")
if hasNorth && hasEssos {
return true, "north.sevenkingdoms.local + essos.local"
}
var missing []string
if !hasNorth {
missing = append(missing, "north.sevenkingdoms.local")
}
if !hasEssos {
missing = append(missing, "essos.local")
}
return false, "missing: " + strings.Join(missing, ", ")
}

func forestTrustEval(stdout string) (bool, string) {
lower := strings.ToLower(stdout)
if strings.Contains(lower, "sevenkingdoms.local") && strings.Contains(lower, "true") {
return true, "sevenkingdoms.local (forest transitive)"
// trustContainsEval returns an eval func that checks if the trust output mentions a domain.
func trustContainsEval(expectedDomain string) func(string) (bool, string) {
return func(stdout string) (bool, string) {
if strings.Contains(strings.ToLower(stdout), strings.ToLower(expectedDomain)) {
return true, expectedDomain
}
return false, "trust to " + expectedDomain + " not found"
}
return false, "forest trust to sevenkingdoms.local not found"
}

func dcLocatorEval(stdout string) (bool, string) {
Expand Down
Loading
Loading