-
Notifications
You must be signed in to change notification settings - Fork 710
Add enhanced SwssStats for comprehensive profiling #4434
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
73f0534
fb8b6fa
0d75119
13ec0f2
f6357c7
720c28b
d041da2
b74763b
dd29653
a2f7e3d
244cd16
bbe820f
e7be681
132510e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| #include <sys/time.h> | ||
| #include "timestamp.h" | ||
| #include "orch.h" | ||
| #include "swssstats.h" | ||
|
|
||
| #include "subscriberstatetable.h" | ||
| #include "portsorch.h" | ||
|
|
@@ -16,6 +17,7 @@ | |
| using namespace swss; | ||
|
|
||
| int gBatchSize = 0; | ||
| std::atomic<bool> gSwssStatsRecord(true); // Enable SwssStats by default | ||
|
|
||
| std::shared_ptr<RingBuffer> Orch::gRingBuffer = nullptr; | ||
| std::shared_ptr<RingBuffer> Executor::gRingBuffer = nullptr; | ||
|
|
@@ -248,8 +250,16 @@ void ConsumerBase::addToSync(const KeyOpFieldsValuesTuple &entry, bool onRetry) | |
| string op = kfvOp(entry); | ||
|
|
||
| if (!onRetry) | ||
| { | ||
| /* Record incoming tasks */ | ||
| Recorder::Instance().swss.record(dumpTuple(entry)); | ||
|
|
||
| /* Record statistics */ | ||
| if (gSwssStatsRecord) | ||
| { | ||
| SwssStats::getInstance()->recordTask(getTableName(), op); | ||
| } | ||
| } | ||
| else | ||
| Recorder::Instance().retry.record(dumpTuple(entry).append(DECACHE)); | ||
|
|
||
|
|
@@ -557,6 +567,8 @@ void Consumer::drain() | |
| { | ||
| if (!m_toSync.empty()) | ||
| { | ||
| size_t size_before = gSwssStatsRecord ? m_toSync.size() : 0; | ||
| bool threw = false; | ||
| try | ||
| { | ||
| ((Orch *)m_orch)->doTask((Consumer&)*this); | ||
|
|
@@ -565,21 +577,39 @@ void Consumer::drain() | |
| { | ||
| SWSS_LOG_ERROR("Exception caught: type=invalid_argument, table=%s, error=%s", | ||
| getName().c_str(), e.what()); | ||
| threw = true; | ||
| } | ||
| catch (const std::logic_error& e) | ||
| { | ||
| SWSS_LOG_ERROR("Exception caught: type=logic_error, table=%s, error=%s", | ||
| getName().c_str(), e.what()); | ||
| threw = true; | ||
| } | ||
| catch (const std::exception& e) | ||
| { | ||
| SWSS_LOG_ERROR("Exception caught: type=exception, table=%s, error=%s", | ||
| getName().c_str(), e.what()); | ||
| threw = true; | ||
| } | ||
| catch (...) | ||
| { | ||
| SWSS_LOG_ERROR("Exception caught: type=unknown, table=%s", | ||
| getName().c_str()); | ||
| threw = true; | ||
| } | ||
| if (gSwssStatsRecord && size_before > 0) | ||
| { | ||
| if (threw) | ||
| { | ||
| SwssStats::getInstance()->recordError(getTableName(), 1); | ||
| } | ||
| else | ||
| { | ||
| size_t size_after = m_toSync.size(); | ||
| uint64_t completed = (size_before > size_after) ? (size_before - size_after) : 0; | ||
| if (completed > 0) | ||
| SwssStats::getInstance()->recordComplete(getTableName(), completed); | ||
| } | ||
| } | ||
|
Comment on lines
572
to
+613
|
||
| } | ||
| } | ||
|
|
@@ -1235,3 +1265,4 @@ void Orch2::doTask(Consumer &consumer) | |
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,217 @@ | ||||||||||||
| #include "swssstats.h" | ||||||||||||
| #include "dbconnector.h" | ||||||||||||
| #include "table.h" | ||||||||||||
| #include "logger.h" | ||||||||||||
| #include <chrono> | ||||||||||||
|
||||||||||||
| #include <chrono> | |
| #include <chrono> | |
| #include <tuple> | |
| #include <unordered_map> | |
| #include <utility> |
Copilot
AI
Apr 21, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The PR description mentions latency metrics and queue depth (and “10 metrics per table”), but the implementation currently only serializes 4 counters (SET/DEL/COMPLETE/ERROR). Either implement the additional metrics or update the PR description and any in-code documentation so they match what’s actually exported to COUNTERS_DB.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
gSwssStatsRecordis enabled by default, but there is no mechanism in this PR to toggle it at runtime (no config/env/CLI hook), and it introduces extra per-task work and a background writer thread. Consider defaulting this to disabled or wiring it to a configuration option so operators can turn it on only when needed.