From 3d575f42a5970829f2a4b32386b41907b6e48440 Mon Sep 17 00:00:00 2001 From: Amit Abel Date: Wed, 30 Jul 2025 14:48:34 +0300 Subject: [PATCH] Resolved conflict --- orchagent/Makefile.am | 1 + orchagent/orchdaemon.cpp | 11 ++- orchagent/orchdaemon.h | 1 + orchagent/portsorch.cpp | 6 ++ orchagent/portsorch.h | 1 + orchagent/txerrorcheckorch.cpp | 165 ++++++++++++++++++++++++++++++++ orchagent/txerrorcheckorch.h | 38 ++++++++ tests/test_tx_error_counters.py | 39 ++++++++ 8 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 orchagent/txerrorcheckorch.cpp create mode 100644 orchagent/txerrorcheckorch.h create mode 100644 tests/test_tx_error_counters.py diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index 9a066185952..d2ca0a98631 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -91,6 +91,7 @@ orchagent_SOURCES = \ request_parser.cpp \ vrforch.cpp \ countercheckorch.cpp \ + txerrorcheckorch.cpp \ vxlanorch.cpp \ vnetorch.cpp \ dtelorch.cpp \ diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 1a8785e5f88..bdc15cef976 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -68,6 +68,7 @@ StpOrch *gStpOrch; MuxOrch *gMuxOrch; IcmpOrch *gIcmpOrch; HFTelOrch *gHFTOrch; +TxErrorCheckOrch *gTxErrorCheckOrch; bool gIsNatSupported = false; event_handle_t g_events_handle; @@ -337,6 +338,12 @@ bool OrchDaemon::init() NvgreTunnelMapOrch *nvgre_tunnel_map_orch = new NvgreTunnelMapOrch(m_configDb, CFG_NVGRE_TUNNEL_MAP_TABLE_NAME); gDirectory.set(nvgre_tunnel_map_orch); + const vector tx_error_table = { + CFG_TX_ERROR_CHECK_TABLE_NAME, + }; + + gTxErrorCheckOrch = new TxErrorCheckOrch(m_configDb, tx_error_table); + gDirectory.set(gTxErrorCheckOrch); vector qos_tables = { CFG_TC_TO_QUEUE_MAP_TABLE_NAME, @@ -470,7 +477,7 @@ bool OrchDaemon::init() * when iterating ConsumerMap. This is ensured implicitly by the order of keys in ordered map. * For cases when Orch has to process tables in specific order, like PortsOrch during warm start, it has to override Orch::doTask() */ - m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gFlowCounterRouteOrch, gIntfsOrch, gNeighOrch, gNhgMapOrch, gNhgOrch, gCbfNhgOrch, gFgNhgOrch, gRouteOrch, gCoppOrch, gQosOrch, wm_orch, gPolicerOrch, gTunneldecapOrch, sflow_orch, gDebugCounterOrch, gMacsecOrch, bgp_global_state_orch, gBfdOrch, gIcmpOrch, gSrv6Orch, gMuxOrch, mux_cb_orch, gMonitorOrch, gStpOrch}; + m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gFlowCounterRouteOrch, gTxErrorCheckOrch, gIntfsOrch, gNeighOrch, gNhgMapOrch, gNhgOrch, gCbfNhgOrch, gFgNhgOrch, gRouteOrch, gCoppOrch, gQosOrch, wm_orch, gPolicerOrch, gTunneldecapOrch, sflow_orch, gDebugCounterOrch, gMacsecOrch, bgp_global_state_orch, gBfdOrch, gIcmpOrch, gSrv6Orch, gMuxOrch, mux_cb_orch, gMonitorOrch, gStpOrch}; bool initialize_dtel = false; if (platform == BFN_PLATFORM_SUBSTRING || platform == VS_PLATFORM_SUBSTRING) @@ -1333,4 +1340,4 @@ bool DpuOrchDaemon::init() addOrchList(dash_port_map_orch); return true; -} \ No newline at end of file +} diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index 2041c6bfff3..18ef34fdc89 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -32,6 +32,7 @@ #include "vnetorch.h" #include "countercheckorch.h" #include "flexcounterorch.h" +#include "txerrorcheckorch.h" #include "watermarkorch.h" #include "policerorch.h" #include "sfloworch.h" diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp index b1e9dcf5963..a7500fb85dd 100644 --- a/orchagent/portsorch.cpp +++ b/orchagent/portsorch.cpp @@ -29,6 +29,7 @@ #include "sai_serialize.h" #include "crmorch.h" #include "countercheckorch.h" +#include "txerrorcheckorch.h" #include "notifier.h" #include "fdborch.h" #include "switchorch.h" @@ -625,6 +626,9 @@ PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector(new Table(m_state_db.get(), STATE_QUEUE_COUNTER_CAPABILITIES_NAME)); m_portCounterCapabilitiesTable = unique_ptr(new Table(m_state_db.get(), STATE_PORT_COUNTER_CAPABILITIES_NAME)); + /* Initialize port state table */ + m_statePortTable = unique_ptr
(new Table(m_state_db.get(), STATE_PORT_TABLE_NAME)); + initGearbox(); string queueWmSha, pgWmSha, portRateSha; @@ -5895,6 +5899,8 @@ void PortsOrch::postPortInit(Port& p) initPortSupportedSpeeds(p.m_alias, p.m_port_id); initPortSupportedFecModes(p.m_alias, p.m_port_id); + + m_statePortTable->hset(p.m_alias, TX_ERROR_PORT_STATE_FIELD, TX_ERROR_PORT_STATE_OK); } void PortsOrch::doTask() diff --git a/orchagent/portsorch.h b/orchagent/portsorch.h index c7057489b22..6d2d40b3104 100644 --- a/orchagent/portsorch.h +++ b/orchagent/portsorch.h @@ -587,6 +587,7 @@ class PortsOrch : public Orch, public Subject bool m_isWredPortCounterMapGenerated = false; std::unique_ptr m_queueCounterCapabilitiesTable = nullptr; std::unique_ptr m_portCounterCapabilitiesTable = nullptr; + std::unique_ptr m_statePortTable = nullptr; private: // Port config aggregator diff --git a/orchagent/txerrorcheckorch.cpp b/orchagent/txerrorcheckorch.cpp new file mode 100644 index 00000000000..f81c3fae2db --- /dev/null +++ b/orchagent/txerrorcheckorch.cpp @@ -0,0 +1,165 @@ +#include "txerrorcheckorch.h" +#include "select.h" +#include "notifier.h" +#include "sai_serialize.h" +#include "portsorch.h" +#include + +extern sai_port_api_t *sai_port_api; +extern PortsOrch *gPortsOrch; + +#define TX_ERROR_CHECK_KEY "TX_ERROR_CHECK" +#define TX_ERROR_CHECK_POLL_NAME "TX_ERROR_CHECK_POLL" +#define THRESHOLD_FIELD "threshold" +#define TIME_PERIOD_FIELD "time_period" + +#define TX_ERROR_PORT_STATE_FIELD "tx_error_port_state" +#define TX_ERROR_PORT_STATE_ERROR "error" +#define TX_ERROR_PORT_STATE_OK "ok" + +TxErrorCheckOrch::TxErrorCheckOrch(swss::DBConnector *db, const std::string &tableName): + Orch(db, tableName) +{ + SWSS_LOG_ENTER(); + + m_countersDb = make_shared("COUNTERS_DB", 0); + m_countersTable = make_unique(m_countersDb.get(), COUNTERS_TABLE); + m_configDb = make_unique("CONFIG_DB", 0); + m_stateDb = make_unique("STATE_DB", 0); + + auto interv = timespec { .tv_sec = TX_ERROR_CHECK_POLL_TIMEOUT_SEC_DEFAULT, .tv_nsec = 0 }; + m_timer = std::make_shared(interv); + auto executor = new ExecutableTimer(m_timer.get(), this, TX_ERROR_CHECK_POLL_NAME); + Orch::addExecutor(executor); + m_timer->start(); +} + +TxErrorCheckOrch::~TxErrorCheckOrch(void) +{ + SWSS_LOG_ENTER(); +} + +void TxErrorCheckOrch::doTask(swss::SelectableTimer &timer) +{ + SWSS_LOG_ENTER(); + + mcCounterCheck(); +} + +void TxErrorCheckOrch::doTask(Consumer &consumer) +{ + SWSS_LOG_ENTER(); + + if (consumer.getTableName() != CFG_TX_ERROR_CHECK_TABLE_NAME) + { + SWSS_LOG_ERROR("Unknown table name %s", consumer.getTableName().c_str()); + return; + } + + auto it = consumer.m_toSync.begin(); + while (it != consumer.m_toSync.end()) + { + mcFieldsUpdate(it->second); + it = consumer.m_toSync.erase(it); + } +} + +void TxErrorCheckOrch::mcCounterCheck() +{ + SWSS_LOG_ENTER(); + + for (auto const &port : gPortsOrch->getAllPorts()) + { + sai_object_id_t portOid = port.second.m_port_id; + if (portOid == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_ERROR("Invalid port oid %lx" PRIx64, port.second.m_port_id); + continue; + } + + std::string outErrors; + + if (!m_countersTable->hget(sai_serialize_object_id(portOid), "SAI_PORT_STAT_IF_OUT_ERRORS", outErrors)) + { + SWSS_LOG_ERROR("Access to Counters DB with %lx port ID failed", port.second.m_port_id); + continue; + } + + uint32_t outErrorsCount = to_uint(outErrors); + + /* Note: for now the support is only for error state true; if needed we can add support for error state false. */ + if (outErrorsCount > m_error_threshold) + { + setPortStatus(port.second.m_alias, true); + } + } +} + +void TxErrorCheckOrch::mcFieldsUpdate(swss::KeyOpFieldsValuesTuple keyOpFieldsValues) +{ + SWSS_LOG_ENTER(); + + string key = kfvKey(keyOpFieldsValues); + + if (key != TX_ERROR_CHECK_KEY) + { + SWSS_LOG_ERROR("Unknown key %s", key.c_str()); + return; + } + + auto op = kfvOp(keyOpFieldsValues); + if ((op != DEL_COMMAND) && (op != SET_COMMAND)) + { + SWSS_LOG_ERROR("Unknown operation %s", op.c_str()); + return; + } + + for (auto fvMap : kfvFieldsValues(keyOpFieldsValues)) + { + auto fieldName = fvField(fvMap); + auto fieldValue = fvValue(fvMap); + + if (fieldName == THRESHOLD_FIELD) + { + if (op == DEL_COMMAND) + { + fieldValue = std::to_string(TX_ERROR_CHECK_THRESHOLD_DEFAULT); + } + + mcUpdateThreshold(to_uint(fieldValue)); + } + else if (fieldName == TIME_PERIOD_FIELD) + { + if (op == DEL_COMMAND) + { + fieldValue = std::to_string(TX_ERROR_CHECK_POLL_TIMEOUT_SEC_DEFAULT); + } + + mcUpdateTimePeriod(to_uint(fieldValue)); + } + } +} + +void TxErrorCheckOrch::mcUpdateThreshold(uint64_t new_threshold) +{ + SWSS_LOG_ENTER(); + + m_error_threshold = new_threshold; +} + +void TxErrorCheckOrch::mcUpdateTimePeriod(time_t new_time_period) +{ + SWSS_LOG_ENTER(); + + auto new_interv = timespec { .tv_sec = new_time_period, .tv_nsec = 0 }; + m_timer->setInterval(new_interv); + m_timer->reset(); +} + +void TxErrorCheckOrch::setPortStatus(std::string port_name, bool isTxErrorState) +{ + SWSS_LOG_ENTER(); + + swss::Table portStateTable(m_stateDb.get(), STATE_PORT_TABLE_NAME); + portStateTable.hset(port_name, TX_ERROR_PORT_STATE_FIELD, (isTxErrorState ? TX_ERROR_PORT_STATE_ERROR : TX_ERROR_PORT_STATE_OK)); +} diff --git a/orchagent/txerrorcheckorch.h b/orchagent/txerrorcheckorch.h new file mode 100644 index 00000000000..26b9521b30d --- /dev/null +++ b/orchagent/txerrorcheckorch.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include "orch.h" +#include "dbconnector.h" +#include "table.h" + +extern "C" { + #include "sai.h" +} + +#define TX_ERROR_CHECK_THRESHOLD_DEFAULT 1 +#define TX_ERROR_CHECK_POLL_TIMEOUT_SEC_DEFAULT 5 + +class TxErrorCheckOrch: public Orch +{ +public: + TxErrorCheckOrch(swss::DBConnector *db, const std::string &tableName); + virtual ~TxErrorCheckOrch(void); + void doTask(swss::SelectableTimer &timer); + void doTask(Consumer &consumer); + +private: + void mcCounterCheck(); + void mcFieldsUpdate(swss::KeyOpFieldsValuesTuple keyOpFieldsValues); + void mcUpdateThreshold(uint64_t new_threshold); + void mcUpdateTimePeriod(time_t new_time_period); + void setPortStatus(std::string port_name, bool isTxErrorState); + + std::shared_ptr m_countersDb = nullptr; + std::unique_ptr m_countersTable = nullptr; + std::unique_ptr m_configDb = nullptr; + std::unique_ptr m_stateDb = nullptr; + + uint64_t m_error_threshold = TX_ERROR_CHECK_THRESHOLD_DEFAULT; + std::shared_ptr m_timer = nullptr; +}; diff --git a/tests/test_tx_error_counters.py b/tests/test_tx_error_counters.py new file mode 100644 index 00000000000..c42296bd5e4 --- /dev/null +++ b/tests/test_tx_error_counters.py @@ -0,0 +1,39 @@ +import time +import pytest + +from swsscommon import swsscommon + +TX_ERROR_CHECK_POLL_TIMEOUT_SEC_DEFAULT = (5 * 60) +TX_ERROR_CHECK_THRESHOLD_DEFAULT = (5 * 60) + +CFG_TX_ERROR_CHECK_TABLE_NAME = "CFG_TX_ERROR_CHECK" +TX_ERROR_CHECK_KEY = "TX_ERROR_CHECK" +TX_ERROR_CHECK_POLL_NAME = "TX_ERROR_CHECK_POLL" +THRESHOLD_FIELD = "threshold" +TIME_PERIOD_FIELD = "time_period" + +TX_ERROR_PORT_STATE_FIELD = "tx_error_port_state" +TX_ERROR_PORT_STATE_ERROR = "error" +TX_ERROR_PORT_STATE_OK = "ok" + +# port to be tested +PORT = "Ethernet0" + +@pytest.mark.usefixtures('dvs_port_manager') +class TestTxErrorCounters(object): + def setup_db(self, dvs): + self.asic_db = swsscommon.DBConnector(1, dvs.redis_sock, 0) + self.config_db = swsscommon.DBConnector(4, dvs.redis_sock, 0) + self.flex_db = swsscommon.DBConnector(5, dvs.redis_sock, 0) + self.state_db = swsscommon.DBConnector(6, dvs.redis_sock, 0) + self.counters_db = swsscommon.DBConnector(2, dvs.redis_sock, 0) + + def genericGetAndAssert(self, table, key): + status, fields = table.get(key) + assert status + return fields + + def set_tx_error_config(self, field, value): + tx_error_table = swsscommon.Table(self.config_db, CFG_TX_ERROR_CHECK_TABLE_NAME) + entry = swsscommon.FieldValuePairs([(field, value)]) + tx_error_table.set(TX_ERROR_CHECK_KEY, entry) \ No newline at end of file