From ee634d0c5794d6e1d4764c50ab163af0a2b3751f Mon Sep 17 00:00:00 2001
From: Alexander Morozov <dodomorozov@10.211.220.145-vpn.dhcp.yndx.net>
Date: Fri, 5 Jun 2026 10:27:36 +0300
Subject: [PATCH 1/5] feat: send timings metrics

---
 internal/app/app.go            | 13 +++++
 internal/app/app_dcs.go        | 13 +++++
 internal/app/data.go           |  3 ++
 internal/app/timing_tracker.go | 91 ++++++++++++++++++++++++++++++++++
 tests/features/timings.feature | 70 ++++++++++++++++++++++++++
 tests/images/mysql/mysync.yaml |  2 +
 6 files changed, 192 insertions(+)
 create mode 100644 internal/app/timing_tracker.go
 create mode 100644 tests/features/timings.feature

diff --git a/internal/app/app.go b/internal/app/app.go
index bde78a09..9b75db2f 100644
--- a/internal/app/app.go
+++ b/internal/app/app.go
@@ -641,6 +641,7 @@ func (app *App) stateManager() appState {
 		} else {
 			if !switchover.InitiatedAt.IsZero() && time.Since(switchover.InitiatedAt) > app.config.SwitchoverTimeout {
 				app.logger.Error().Msgf("switchover %s => %s timed out after %s", switchover.From, switchover.To, time.Since(switchover.InitiatedAt))
+				app.logSwitchoverFailure(switchover)
 				err = app.FailSwitchover(switchover, fmt.Errorf("switchover timed out after %s", time.Since(switchover.InitiatedAt)))
 				if err != nil {
 					app.logger.Error().Err(err).Msg("failed to report switchover timeout")
@@ -711,6 +712,9 @@ func (app *App) stateManager() appState {
 		}
 	} else {
 		app.t.Clean(NodeFailedAt, master)
+		// flush timings left behind by an aborted failover/switchover
+		app.stopTiming(timingDowntime)
+		app.stopTiming(timingFailover)
 	}
 
 	if !clusterState[master].PingOk {
@@ -1367,6 +1371,11 @@ func (app *App) performSwitchover(clusterState map[string]*nodestate.NodeState,
 		}
 	}
 
+	// downtime for failover starts at master loss (see IssueFailover); for switchover it starts here
+	if switchover.MasterTransition == SwitchoverTransition {
+		app.startTiming(timingDowntime, time.Time{})
+	}
+
 	// set read only everywhere (all HA-nodes) and stop replication
 	app.logger.Info().Msg("switchover: phase 1: enter read only")
 	errs := util.RunParallel(func(host string) error {
@@ -1608,6 +1617,10 @@ func (app *App) performSwitchover(clusterState map[string]*nodestate.NodeState,
 	}
 	app.logger.Info().Msgf("switchover: new master %s set writable", newMaster)
 
+	// cluster is open for writes again, failover/downtime are over
+	app.stopTiming(timingDowntime)
+	app.stopTiming(timingFailover)
+
 	// reenable events
 	events, err := newMasterNode.ReenableEventsRetry()
 	if err != nil || app.emulateError("promote_reenable_events") {
diff --git a/internal/app/app_dcs.go b/internal/app/app_dcs.go
index 639579e1..92b73410 100644
--- a/internal/app/app_dcs.go
+++ b/internal/app/app_dcs.go
@@ -149,6 +149,12 @@ func (app *App) FinishSwitchover(switchover *Switchover, switchErr error) error
 		switchover.Result.Error = switchErr.Error()
 	}
 
+	if switchErr != nil {
+		app.logSwitchoverFailure(switchover)
+	} else if switchover.MasterTransition == SwitchoverTransition {
+		app.stopTiming(timingSwitchover)
+	}
+
 	err := app.dcs.Delete(pathCurrentSwitch)
 	if err != nil {
 		return err
@@ -171,6 +177,9 @@ func (app *App) StartSwitchover(switchover *Switchover) error {
 	app.logger.Info().Msgf("switchover: %s => %s starting...", switchover.From, switchover.To)
 	switchover.StartedAt = time.Now()
 	switchover.StartedBy = app.config.Hostname
+	if switchover.MasterTransition == SwitchoverTransition {
+		app.startTiming(timingSwitchover, time.Time{})
+	}
 	return app.dcs.Set(pathCurrentSwitch, switchover)
 }
 
@@ -199,6 +208,10 @@ func (app *App) IssueFailover(master string) error {
 	switchover.InitiatedAt = time.Now()
 	switchover.Cause = CauseAuto
 	switchover.MasterTransition = FailoverTransition
+	// downtime/failover are measured from the moment the master was lost
+	failedAt := app.t.Get(NodeFailedAt, master)
+	app.startTiming(timingDowntime, failedAt)
+	app.startTiming(timingFailover, failedAt)
 	return app.dcs.Create(pathCurrentSwitch, switchover)
 }
 
diff --git a/internal/app/data.go b/internal/app/data.go
index 20d60949..fb6fd6b6 100644
--- a/internal/app/data.go
+++ b/internal/app/data.go
@@ -65,6 +65,9 @@ const (
 
 	// last known timestamp from repl_mon table
 	pathMasterReplMonTS = "master_repl_mon_ts"
+
+	// timing start timestamps, structure: pathTimings/<name> -> time.Time
+	pathTimings = "timing"
 )
 
 var (
diff --git a/internal/app/timing_tracker.go b/internal/app/timing_tracker.go
new file mode 100644
index 00000000..09fa0346
--- /dev/null
+++ b/internal/app/timing_tracker.go
@@ -0,0 +1,91 @@
+package app
+
+import (
+	"context"
+	"fmt"
+	"os/exec"
+	"strconv"
+	"time"
+
+	"github.com/yandex/mysync/internal/dcs"
+	"github.com/yandex/mysync/internal/util"
+)
+
+// names of tracked timings
+const (
+	timingDowntime   = "downtime"
+	timingFailover   = "failover"
+	timingSwitchover = "switchover"
+)
+
+func (app *App) timingPath(name string) string {
+	return dcs.JoinPath(pathTimings, name)
+}
+
+// getTimingStart returns stored start time and whether it exists
+func (app *App) getTimingStart(name string) (time.Time, bool) {
+	var ts time.Time
+	if err := app.dcs.Get(app.timingPath(name), &ts); err != nil {
+		return time.Time{}, false
+	}
+	return ts, true
+}
+
+// startTiming stores timing start in dcs (zero ts means now)
+func (app *App) startTiming(name string, ts time.Time) {
+	if ts.IsZero() {
+		ts = time.Now()
+	}
+	if err := app.dcs.Set(app.timingPath(name), ts); err != nil {
+		app.logger.Warn().Err(err).Msgf("failed to start timing %s", name)
+	}
+}
+
+func (app *App) clearTiming(name string) {
+	if err := app.dcs.Delete(app.timingPath(name)); err != nil {
+		app.logger.Warn().Err(err).Msgf("failed to clear timing %s", name)
+	}
+}
+
+// stopTiming logs elapsed time since start and clears it
+func (app *App) stopTiming(name string) {
+	start, ok := app.getTimingStart(name)
+	if !ok {
+		return
+	}
+	app.clearTiming(name)
+	app.logTiming(name, time.Since(start))
+}
+
+// logSwitchoverFailure records time spent on a failed switchover as a negative value.
+// Marker-guarded so it is logged once, and only when the switchover actually started.
+func (app *App) logSwitchoverFailure(sw *Switchover) {
+	if sw.MasterTransition != SwitchoverTransition {
+		return
+	}
+	start, ok := app.getTimingStart(timingSwitchover)
+	if !ok {
+		return
+	}
+	app.clearTiming(timingSwitchover)
+	since := sw.InitiatedAt
+	if since.IsZero() {
+		since = start
+	}
+	app.logTiming(timingSwitchover+"_failure", -time.Since(since))
+}
+
+// logTiming runs the configured log_timing command with name and elapsed seconds
+func (app *App) logTiming(name string, d time.Duration) {
+	command, ok := app.config.Commands["log_timing"]
+	if !ok || command == "" {
+		return
+	}
+	command = fmt.Sprintf(command, name, strconv.FormatFloat(d.Seconds(), 'f', 3, 64))
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+	shell := util.GetEnvVariable("SHELL", "sh")
+	if err := exec.CommandContext(ctx, shell, "-c", command).Run(); err != nil {
+		app.logger.Warn().Err(err).Msgf("failed to execute log_timing command for %s", name)
+	}
+}
diff --git a/tests/features/timings.feature b/tests/features/timings.feature
new file mode 100644
index 00000000..44f7bac1
--- /dev/null
+++ b/tests/features/timings.feature
@@ -0,0 +1,70 @@
+Feature: failover and switchover timings are logged via log_timing command
+
+  Scenario: failover logs downtime and failover timings
+    Given cluster environment is
+      """
+      MYSYNC_FAILOVER=true
+      MYSYNC_FAILOVER_DELAY=0s
+      """
+    Given cluster is up and running
+    Then zookeeper node "/test/active_nodes" should match json_exactly within "20" seconds
+      """
+      ["mysql1","mysql2","mysql3"]
+      """
+    When host "mysql1" is stopped
+    Then mysql host "mysql1" should become unavailable within "10" seconds
+    Then zookeeper node "/test/last_switch" should match json within "30" seconds
+      """
+      {
+        "cause": "auto",
+        "from": "mysql1",
+        "master_transition": "failover",
+        "result": {
+          "ok": true
+        }
+      }
+      """
+    When I get zookeeper node "/test/manager"
+    And I save zookeeper query result as "manager"
+    When I run command on host "{{.manager.hostname}}" until result match regexp "failover:" with timeout "30" seconds
+      """
+      cat /tmp/timing.log
+      """
+    Then command output should match regexp
+      """
+      downtime:
+      """
+
+  Scenario: switchover logs downtime and switchover timings
+    Given cluster is up and running
+    Then mysql host "mysql1" should be master
+    And zookeeper node "/test/active_nodes" should match json_exactly within "30" seconds
+      """
+      ["mysql1","mysql2","mysql3"]
+      """
+    When I run command on host "mysql1"
+      """
+      mysync switch --to mysql2 --wait=0s
+      """
+    Then command return code should be "0"
+    Then zookeeper node "/test/last_switch" should match json within "120" seconds
+      """
+      {
+        "to": "mysql2",
+        "master_transition": "switchover",
+        "result": {
+          "ok": true
+        }
+      }
+      """
+    Then mysql host "mysql2" should be master
+    When I get zookeeper node "/test/manager"
+    And I save zookeeper query result as "manager"
+    When I run command on host "{{.manager.hostname}}" until result match regexp "switchover:" with timeout "30" seconds
+      """
+      cat /tmp/timing.log
+      """
+    Then command output should match regexp
+      """
+      downtime:
+      """
diff --git a/tests/images/mysql/mysync.yaml b/tests/images/mysql/mysync.yaml
index f0d83d16..32aedf3d 100644
--- a/tests/images/mysql/mysync.yaml
+++ b/tests/images/mysql/mysync.yaml
@@ -37,6 +37,8 @@ mysql:
   error_log: /var/log/mysql/error.log
 queries:
   replication_lag: $MYSYNC_REPLICATION_LAG_QUERY
+commands:
+  log_timing: 'echo "%s: %s" >> /tmp/timing.log'
 disable_semi_sync_replication_on_maintenance: ${MYSYNC_DISABLE_REPLICATION_ON_MAINT:-false}
 rpl_semi_sync_master_wait_for_slave_count: ${MYSYNC_WAIT_FOR_SLAVE_COUNT:-1}
 critical_disk_usage: ${MYSYNC_CRITICAL_DISK_USAGE:-100}

From d0042470da33effcc16b351398aa20d3e72bae11 Mon Sep 17 00:00:00 2001
From: Alexander Morozov <dodomorozov@10.217.129.116-red3.dhcp.yndx.net>
Date: Mon, 8 Jun 2026 13:59:23 +0300
Subject: [PATCH 2/5] fix timings

---
 internal/app/timing_tracker.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/internal/app/timing_tracker.go b/internal/app/timing_tracker.go
index 09fa0346..95753c48 100644
--- a/internal/app/timing_tracker.go
+++ b/internal/app/timing_tracker.go
@@ -68,6 +68,8 @@ func (app *App) logSwitchoverFailure(sw *Switchover) {
 		return
 	}
 	app.clearTiming(timingSwitchover)
+	// a failed switchover must not leave a downtime marker for the safety net to mislog later
+	app.clearTiming(timingDowntime)
 	since := sw.InitiatedAt
 	if since.IsZero() {
 		since = start

From 0ca6eb16f5dd050c3d7d9ce06f1ca4ad21bbf0ac Mon Sep 17 00:00:00 2001
From: Alexander Morozov <dodomorozov@10.217.129.116-red3.dhcp.yndx.net>
Date: Tue, 9 Jun 2026 15:18:51 +0300
Subject: [PATCH 3/5] test: fix zk jitter in tests

---
 tests/images/mysql/mysync.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/images/mysql/mysync.yaml b/tests/images/mysql/mysync.yaml
index 32aedf3d..fe4ddbb8 100644
--- a/tests/images/mysql/mysync.yaml
+++ b/tests/images/mysql/mysync.yaml
@@ -16,6 +16,8 @@ resetupfile: /tmp/mysync.resetup
 resetup_crashed_hosts: ${MYSYNC_RESETUP_CRASHED_HOSTS:-false}
 zookeeper:
   session_timeout: 3s
+  random_host_provider:
+    retry_jitter: 1s
   namespace: /test
   hosts: [ $ZK_SERVERS ]
   auth: true

From 3b3462b126fc488dd1ff471a3680a1e3fb55b105 Mon Sep 17 00:00:00 2001
From: Alexander Morozov <dodomorozov@10.215.158.217-vpn.dhcp.yndx.net>
Date: Wed, 10 Jun 2026 13:16:11 +0300
Subject: [PATCH 4/5] fxp

---
 internal/app/app.go            | 16 +++++++++++-----
 internal/app/app_dcs.go        |  6 +-----
 internal/app/timing_tracker.go |  7 ++++---
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/internal/app/app.go b/internal/app/app.go
index 9b75db2f..18a3c492 100644
--- a/internal/app/app.go
+++ b/internal/app/app.go
@@ -693,7 +693,12 @@ func (app *App) stateManager() appState {
 
 	if !clusterStateDcs[master].PingOk || clusterStateDcs[master].IsFileSystemReadonly {
 		app.logger.Error().Msgf("MASTER FAILURE")
-		app.t.SetIfZero(NodeFailedAt, master, time.Now())
+		if app.t.Get(NodeFailedAt, master).IsZero() {
+			now := time.Now()
+			app.t.Set(NodeFailedAt, master, now)
+			app.startTiming(timingDowntime, now)
+			app.startTiming(timingFailover, now)
+		}
 		if lightMaintenance {
 			app.logger.Info().Msgf("failover suppressed by light maintenance mode")
 		} else {
@@ -711,10 +716,11 @@ func (app *App) stateManager() appState {
 			return stateManager
 		}
 	} else {
-		app.t.Clean(NodeFailedAt, master)
-		// flush timings left behind by an aborted failover/switchover
-		app.stopTiming(timingDowntime)
-		app.stopTiming(timingFailover)
+		if !app.t.Get(NodeFailedAt, master).IsZero() {
+			app.stopTiming(timingDowntime)
+			app.stopTiming(timingFailover)
+			app.t.Clean(NodeFailedAt, master)
+		}
 	}
 
 	if !clusterState[master].PingOk {
diff --git a/internal/app/app_dcs.go b/internal/app/app_dcs.go
index 92b73410..c7c68128 100644
--- a/internal/app/app_dcs.go
+++ b/internal/app/app_dcs.go
@@ -178,7 +178,7 @@ func (app *App) StartSwitchover(switchover *Switchover) error {
 	switchover.StartedAt = time.Now()
 	switchover.StartedBy = app.config.Hostname
 	if switchover.MasterTransition == SwitchoverTransition {
-		app.startTiming(timingSwitchover, time.Time{})
+		app.startTiming(timingSwitchover, switchover.InitiatedAt)
 	}
 	return app.dcs.Set(pathCurrentSwitch, switchover)
 }
@@ -208,10 +208,6 @@ func (app *App) IssueFailover(master string) error {
 	switchover.InitiatedAt = time.Now()
 	switchover.Cause = CauseAuto
 	switchover.MasterTransition = FailoverTransition
-	// downtime/failover are measured from the moment the master was lost
-	failedAt := app.t.Get(NodeFailedAt, master)
-	app.startTiming(timingDowntime, failedAt)
-	app.startTiming(timingFailover, failedAt)
 	return app.dcs.Create(pathCurrentSwitch, switchover)
 }
 
diff --git a/internal/app/timing_tracker.go b/internal/app/timing_tracker.go
index 95753c48..fd1af971 100644
--- a/internal/app/timing_tracker.go
+++ b/internal/app/timing_tracker.go
@@ -49,17 +49,19 @@ func (app *App) clearTiming(name string) {
 
 // stopTiming logs elapsed time since start and clears it
 func (app *App) stopTiming(name string) {
+	now := time.Now()
 	start, ok := app.getTimingStart(name)
 	if !ok {
 		return
 	}
 	app.clearTiming(name)
-	app.logTiming(name, time.Since(start))
+	app.logTiming(name, now.Sub(start))
 }
 
 // logSwitchoverFailure records time spent on a failed switchover as a negative value.
 // Marker-guarded so it is logged once, and only when the switchover actually started.
 func (app *App) logSwitchoverFailure(sw *Switchover) {
+	now := time.Now()
 	if sw.MasterTransition != SwitchoverTransition {
 		return
 	}
@@ -68,13 +70,12 @@ func (app *App) logSwitchoverFailure(sw *Switchover) {
 		return
 	}
 	app.clearTiming(timingSwitchover)
-	// a failed switchover must not leave a downtime marker for the safety net to mislog later
 	app.clearTiming(timingDowntime)
 	since := sw.InitiatedAt
 	if since.IsZero() {
 		since = start
 	}
-	app.logTiming(timingSwitchover+"_failure", -time.Since(since))
+	app.logTiming(timingSwitchover+"_failure", -now.Sub(since))
 }
 
 // logTiming runs the configured log_timing command with name and elapsed seconds

From 3086190b0a3fb6ff986d0077b120d2f93d6bad68 Mon Sep 17 00:00:00 2001
From: Alexander Morozov <dodomorozov@10.215.158.217-vpn.dhcp.yndx.net>
Date: Wed, 10 Jun 2026 18:08:47 +0300
Subject: [PATCH 5/5] move failover timing

---
 internal/app/app.go     | 2 --
 internal/app/app_dcs.go | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/app/app.go b/internal/app/app.go
index 18a3c492..6e74b2f1 100644
--- a/internal/app/app.go
+++ b/internal/app/app.go
@@ -1623,9 +1623,7 @@ func (app *App) performSwitchover(clusterState map[string]*nodestate.NodeState,
 	}
 	app.logger.Info().Msgf("switchover: new master %s set writable", newMaster)
 
-	// cluster is open for writes again, failover/downtime are over
 	app.stopTiming(timingDowntime)
-	app.stopTiming(timingFailover)
 
 	// reenable events
 	events, err := newMasterNode.ReenableEventsRetry()
diff --git a/internal/app/app_dcs.go b/internal/app/app_dcs.go
index c7c68128..a32f769d 100644
--- a/internal/app/app_dcs.go
+++ b/internal/app/app_dcs.go
@@ -153,6 +153,8 @@ func (app *App) FinishSwitchover(switchover *Switchover, switchErr error) error
 		app.logSwitchoverFailure(switchover)
 	} else if switchover.MasterTransition == SwitchoverTransition {
 		app.stopTiming(timingSwitchover)
+	} else {
+		app.stopTiming(timingFailover)
 	}
 
 	err := app.dcs.Delete(pathCurrentSwitch)