Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions orchagent/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ orchagent_SOURCES = \
request_parser.cpp \
vrforch.cpp \
countercheckorch.cpp \
txerrorcheckorch.cpp \
vxlanorch.cpp \
vnetorch.cpp \
dtelorch.cpp \
Expand Down
11 changes: 9 additions & 2 deletions orchagent/orchdaemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ StpOrch *gStpOrch;
MuxOrch *gMuxOrch;
IcmpOrch *gIcmpOrch;
HFTelOrch *gHFTOrch;
TxErrorCheckOrch *gTxErrorCheckOrch;

bool gIsNatSupported = false;
event_handle_t g_events_handle;
Expand Down Expand Up @@ -337,6 +338,12 @@ bool OrchDaemon::init()
NvgreTunnelMapOrch *nvgre_tunnel_map_orch = new NvgreTunnelMapOrch(m_configDb, CFG_NVGRE_TUNNEL_MAP_TABLE_NAME);
gDirectory.set(nvgre_tunnel_map_orch);

const vector<string> tx_error_table = {
CFG_TX_ERROR_CHECK_TABLE_NAME,
};

gTxErrorCheckOrch = new TxErrorCheckOrch(m_configDb, tx_error_table);
gDirectory.set(gTxErrorCheckOrch);

vector<string> qos_tables = {
CFG_TC_TO_QUEUE_MAP_TABLE_NAME,
Expand Down Expand Up @@ -470,7 +477,7 @@ bool OrchDaemon::init()
* when iterating ConsumerMap. This is ensured implicitly by the order of keys in ordered map.
* For cases when Orch has to process tables in specific order, like PortsOrch during warm start, it has to override Orch::doTask()
*/
m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gFlowCounterRouteOrch, gIntfsOrch, gNeighOrch, gNhgMapOrch, gNhgOrch, gCbfNhgOrch, gFgNhgOrch, gRouteOrch, gCoppOrch, gQosOrch, wm_orch, gPolicerOrch, gTunneldecapOrch, sflow_orch, gDebugCounterOrch, gMacsecOrch, bgp_global_state_orch, gBfdOrch, gIcmpOrch, gSrv6Orch, gMuxOrch, mux_cb_orch, gMonitorOrch, gStpOrch};
m_orchList = { gSwitchOrch, gCrmOrch, gPortsOrch, gBufferOrch, gFlowCounterRouteOrch, gTxErrorCheckOrch, gIntfsOrch, gNeighOrch, gNhgMapOrch, gNhgOrch, gCbfNhgOrch, gFgNhgOrch, gRouteOrch, gCoppOrch, gQosOrch, wm_orch, gPolicerOrch, gTunneldecapOrch, sflow_orch, gDebugCounterOrch, gMacsecOrch, bgp_global_state_orch, gBfdOrch, gIcmpOrch, gSrv6Orch, gMuxOrch, mux_cb_orch, gMonitorOrch, gStpOrch};

bool initialize_dtel = false;
if (platform == BFN_PLATFORM_SUBSTRING || platform == VS_PLATFORM_SUBSTRING)
Expand Down Expand Up @@ -1333,4 +1340,4 @@ bool DpuOrchDaemon::init()
addOrchList(dash_port_map_orch);

return true;
}
}
1 change: 1 addition & 0 deletions orchagent/orchdaemon.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "vnetorch.h"
#include "countercheckorch.h"
#include "flexcounterorch.h"
#include "txerrorcheckorch.h"
#include "watermarkorch.h"
#include "policerorch.h"
#include "sfloworch.h"
Expand Down
6 changes: 6 additions & 0 deletions orchagent/portsorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "sai_serialize.h"
#include "crmorch.h"
#include "countercheckorch.h"
#include "txerrorcheckorch.h"
#include "notifier.h"
#include "fdborch.h"
#include "switchorch.h"
Expand Down Expand Up @@ -625,6 +626,9 @@ PortsOrch::PortsOrch(DBConnector *db, DBConnector *stateDb, vector<table_name_wi
m_queueCounterCapabilitiesTable = unique_ptr<Table>(new Table(m_state_db.get(), STATE_QUEUE_COUNTER_CAPABILITIES_NAME));
m_portCounterCapabilitiesTable = unique_ptr<Table>(new Table(m_state_db.get(), STATE_PORT_COUNTER_CAPABILITIES_NAME));

/* Initialize port state table */
m_statePortTable = unique_ptr<Table>(new Table(m_state_db.get(), STATE_PORT_TABLE_NAME));

initGearbox();

string queueWmSha, pgWmSha, portRateSha;
Expand Down Expand Up @@ -5895,6 +5899,8 @@ void PortsOrch::postPortInit(Port& p)

initPortSupportedSpeeds(p.m_alias, p.m_port_id);
initPortSupportedFecModes(p.m_alias, p.m_port_id);

m_statePortTable->hset(p.m_alias, TX_ERROR_PORT_STATE_FIELD, TX_ERROR_PORT_STATE_OK);
}

void PortsOrch::doTask()
Expand Down
1 change: 1 addition & 0 deletions orchagent/portsorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ class PortsOrch : public Orch, public Subject
bool m_isWredPortCounterMapGenerated = false;
std::unique_ptr<swss::Table> m_queueCounterCapabilitiesTable = nullptr;
std::unique_ptr<swss::Table> m_portCounterCapabilitiesTable = nullptr;
std::unique_ptr<swss::Table> m_statePortTable = nullptr;

private:
// Port config aggregator
Expand Down
165 changes: 165 additions & 0 deletions orchagent/txerrorcheckorch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#include "txerrorcheckorch.h"
#include "select.h"
#include "notifier.h"
#include "sai_serialize.h"
#include "portsorch.h"
#include <inttypes.h>

extern sai_port_api_t *sai_port_api;
extern PortsOrch *gPortsOrch;

#define TX_ERROR_CHECK_KEY "TX_ERROR_CHECK"
#define TX_ERROR_CHECK_POLL_NAME "TX_ERROR_CHECK_POLL"
#define THRESHOLD_FIELD "threshold"
#define TIME_PERIOD_FIELD "time_period"

#define TX_ERROR_PORT_STATE_FIELD "tx_error_port_state"
#define TX_ERROR_PORT_STATE_ERROR "error"
#define TX_ERROR_PORT_STATE_OK "ok"

TxErrorCheckOrch::TxErrorCheckOrch(swss::DBConnector *db, const std::string &tableName):
Orch(db, tableName)
{
SWSS_LOG_ENTER();

m_countersDb = make_shared<swss::DBConnector>("COUNTERS_DB", 0);
m_countersTable = make_unique<swss::Table>(m_countersDb.get(), COUNTERS_TABLE);
m_configDb = make_unique<swss::DBConnector>("CONFIG_DB", 0);
m_stateDb = make_unique<swss::DBConnector>("STATE_DB", 0);

auto interv = timespec { .tv_sec = TX_ERROR_CHECK_POLL_TIMEOUT_SEC_DEFAULT, .tv_nsec = 0 };
m_timer = std::make_shared<swss::SelectableTimer>(interv);
auto executor = new ExecutableTimer(m_timer.get(), this, TX_ERROR_CHECK_POLL_NAME);
Orch::addExecutor(executor);
m_timer->start();
}

TxErrorCheckOrch::~TxErrorCheckOrch(void)
{
SWSS_LOG_ENTER();
}

void TxErrorCheckOrch::doTask(swss::SelectableTimer &timer)
{
SWSS_LOG_ENTER();

mcCounterCheck();
}

void TxErrorCheckOrch::doTask(Consumer &consumer)
{
SWSS_LOG_ENTER();

if (consumer.getTableName() != CFG_TX_ERROR_CHECK_TABLE_NAME)
{
SWSS_LOG_ERROR("Unknown table name %s", consumer.getTableName().c_str());
return;
}

auto it = consumer.m_toSync.begin();
while (it != consumer.m_toSync.end())
{
mcFieldsUpdate(it->second);
it = consumer.m_toSync.erase(it);
}
}

void TxErrorCheckOrch::mcCounterCheck()
{
SWSS_LOG_ENTER();

for (auto const &port : gPortsOrch->getAllPorts())
{
sai_object_id_t portOid = port.second.m_port_id;
if (portOid == SAI_NULL_OBJECT_ID)
{
SWSS_LOG_ERROR("Invalid port oid %lx" PRIx64, port.second.m_port_id);
continue;
}

std::string outErrors;

if (!m_countersTable->hget(sai_serialize_object_id(portOid), "SAI_PORT_STAT_IF_OUT_ERRORS", outErrors))
{
SWSS_LOG_ERROR("Access to Counters DB with %lx port ID failed", port.second.m_port_id);
continue;
}

uint32_t outErrorsCount = to_uint<uint32_t>(outErrors);

/* Note: for now the support is only for error state true; if needed we can add support for error state false. */
if (outErrorsCount > m_error_threshold)
{
setPortStatus(port.second.m_alias, true);
}
}
}

void TxErrorCheckOrch::mcFieldsUpdate(swss::KeyOpFieldsValuesTuple keyOpFieldsValues)
{
SWSS_LOG_ENTER();

string key = kfvKey(keyOpFieldsValues);

if (key != TX_ERROR_CHECK_KEY)
{
SWSS_LOG_ERROR("Unknown key %s", key.c_str());
return;
}

auto op = kfvOp(keyOpFieldsValues);
if ((op != DEL_COMMAND) && (op != SET_COMMAND))
{
SWSS_LOG_ERROR("Unknown operation %s", op.c_str());
return;
}

for (auto fvMap : kfvFieldsValues(keyOpFieldsValues))
{
auto fieldName = fvField(fvMap);
auto fieldValue = fvValue(fvMap);

if (fieldName == THRESHOLD_FIELD)
{
if (op == DEL_COMMAND)
{
fieldValue = std::to_string(TX_ERROR_CHECK_THRESHOLD_DEFAULT);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@abelamit FYI. Usually we do no do like this, only in a very rare cases. Different vendors may want to have different defaults

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand. So how will you define default values? Since we need to upload our system with some default before user is configuring. Basically it is not mandatory to support this in DB_CONFIG

}

mcUpdateThreshold(to_uint<uint64_t>(fieldValue));
}
else if (fieldName == TIME_PERIOD_FIELD)
{
if (op == DEL_COMMAND)
{
fieldValue = std::to_string(TX_ERROR_CHECK_POLL_TIMEOUT_SEC_DEFAULT);
}

mcUpdateTimePeriod(to_uint<time_t>(fieldValue));
}
}
}

void TxErrorCheckOrch::mcUpdateThreshold(uint64_t new_threshold)
{
SWSS_LOG_ENTER();

m_error_threshold = new_threshold;
}

void TxErrorCheckOrch::mcUpdateTimePeriod(time_t new_time_period)
{
SWSS_LOG_ENTER();

auto new_interv = timespec { .tv_sec = new_time_period, .tv_nsec = 0 };
m_timer->setInterval(new_interv);
m_timer->reset();
}

void TxErrorCheckOrch::setPortStatus(std::string port_name, bool isTxErrorState)
{
SWSS_LOG_ENTER();

swss::Table portStateTable(m_stateDb.get(), STATE_PORT_TABLE_NAME);
portStateTable.hset(port_name, TX_ERROR_PORT_STATE_FIELD, (isTxErrorState ? TX_ERROR_PORT_STATE_ERROR : TX_ERROR_PORT_STATE_OK));
}
38 changes: 38 additions & 0 deletions orchagent/txerrorcheckorch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include <array>
#include <linux/timer.h>
#include "orch.h"
#include "dbconnector.h"
#include "table.h"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@abelamit double quotes for a local includes, and angle brackets for an external libraries

The best practices:

  1. STL includes
  2. System includes
  3. User/Third party includes
  4. Local project includes

That helps to improve code readability

Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed


extern "C" {
#include "sai.h"
}

#define TX_ERROR_CHECK_THRESHOLD_DEFAULT 1
#define TX_ERROR_CHECK_POLL_TIMEOUT_SEC_DEFAULT 5

class TxErrorCheckOrch: public Orch
{
public:
TxErrorCheckOrch(swss::DBConnector *db, const std::string &tableName);
virtual ~TxErrorCheckOrch(void);
void doTask(swss::SelectableTimer &timer);
void doTask(Consumer &consumer);

private:
void mcCounterCheck();
void mcFieldsUpdate(swss::KeyOpFieldsValuesTuple keyOpFieldsValues);
void mcUpdateThreshold(uint64_t new_threshold);
void mcUpdateTimePeriod(time_t new_time_period);
void setPortStatus(std::string port_name, bool isTxErrorState);

std::shared_ptr<swss::DBConnector> m_countersDb = nullptr;
std::unique_ptr<swss::Table> m_countersTable = nullptr;
std::unique_ptr<swss::DBConnector> m_configDb = nullptr;
std::unique_ptr<swss::DBConnector> m_stateDb = nullptr;

uint64_t m_error_threshold = TX_ERROR_CHECK_THRESHOLD_DEFAULT;
std::shared_ptr<swss::SelectableTimer> m_timer = nullptr;
};
39 changes: 39 additions & 0 deletions tests/test_tx_error_counters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import time
import pytest

from swsscommon import swsscommon

TX_ERROR_CHECK_POLL_TIMEOUT_SEC_DEFAULT = (5 * 60)
TX_ERROR_CHECK_THRESHOLD_DEFAULT = (5 * 60)

CFG_TX_ERROR_CHECK_TABLE_NAME = "CFG_TX_ERROR_CHECK"
TX_ERROR_CHECK_KEY = "TX_ERROR_CHECK"
TX_ERROR_CHECK_POLL_NAME = "TX_ERROR_CHECK_POLL"
THRESHOLD_FIELD = "threshold"
TIME_PERIOD_FIELD = "time_period"

TX_ERROR_PORT_STATE_FIELD = "tx_error_port_state"
TX_ERROR_PORT_STATE_ERROR = "error"
TX_ERROR_PORT_STATE_OK = "ok"

# port to be tested
PORT = "Ethernet0"

@pytest.mark.usefixtures('dvs_port_manager')
class TestTxErrorCounters(object):
def setup_db(self, dvs):
self.asic_db = swsscommon.DBConnector(1, dvs.redis_sock, 0)
self.config_db = swsscommon.DBConnector(4, dvs.redis_sock, 0)
self.flex_db = swsscommon.DBConnector(5, dvs.redis_sock, 0)
self.state_db = swsscommon.DBConnector(6, dvs.redis_sock, 0)
self.counters_db = swsscommon.DBConnector(2, dvs.redis_sock, 0)

def genericGetAndAssert(self, table, key):
status, fields = table.get(key)
assert status
return fields

def set_tx_error_config(self, field, value):
tx_error_table = swsscommon.Table(self.config_db, CFG_TX_ERROR_CHECK_TABLE_NAME)
entry = swsscommon.FieldValuePairs([(field, value)])
tx_error_table.set(TX_ERROR_CHECK_KEY, entry)