-
Notifications
You must be signed in to change notification settings - Fork 223
[multi-asic] add drop monitor patches to support multi-asic #576
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Yakiv-Huryk
wants to merge
1
commit into
sonic-net:master
Choose a base branch
from
Yakiv-Huryk:dropmon-patches
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
146 changes: 146 additions & 0 deletions
146
patches-sonic/0003-drop_monitor-Allow-running-multiple-instances-of-dropwatch.patch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| From b1fe183ad73ab30c6ed6aef7b961e3b440dfa174 Mon Sep 17 00:00:00 2001 | ||
| From: Ido Schimmel <idosch@nvidia.com> | ||
| Date: Wed, 11 Feb 2026 18:55:30 +0200 | ||
| Subject: [PATCH] drop_monitor: Allow running multiple instances of dropwatch | ||
|
|
||
| Despite the fact that the drop_monitor module sends multicast | ||
| notifications, it is not possible to run multiple instances of dropwatch | ||
| since tracing of software / hardware drops can only be enabled once. | ||
|
|
||
| Solve this by maintaining a reference count of the number of times that | ||
| software / hardware drop tracing was enabled. Initialize the tracing | ||
| when the reference count is incremented from 0 and tear it down when | ||
| decremented back to 0. | ||
|
|
||
| Note that like before, it is not possible to change configuration while | ||
| tracing is enabled. | ||
|
|
||
| Signed-off-by: Ido Schimmel <idosch@nvidia.com> | ||
| --- | ||
| net/core/drop_monitor.c | 46 ++++++++++++++++++++--------------------- | ||
| 1 file changed, 23 insertions(+), 23 deletions(-) | ||
|
|
||
| diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c | ||
| index 60d31c2feed3..29f2b8c6b8b9 100644 | ||
| --- a/net/core/drop_monitor.c | ||
| +++ b/net/core/drop_monitor.c | ||
| @@ -47,8 +47,8 @@ | ||
| * and the work handle that will send up | ||
| * netlink alerts | ||
| */ | ||
| -static int trace_state = TRACE_OFF; | ||
| -static bool monitor_hw; | ||
| +static refcount_t refcount_sw = REFCOUNT_INIT(0); | ||
| +static refcount_t refcount_hw = REFCOUNT_INIT(0); | ||
|
|
||
| /* net_dm_mutex | ||
| * | ||
| @@ -1051,10 +1051,8 @@ static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack) | ||
| const struct net_dm_alert_ops *ops; | ||
| int cpu, rc; | ||
|
|
||
| - if (monitor_hw) { | ||
| - NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled"); | ||
| - return -EAGAIN; | ||
| - } | ||
| + if (refcount_inc_not_zero(&refcount_hw)) | ||
| + return 0; | ||
|
|
||
| ops = net_dm_alert_ops_arr[net_dm_alert_mode]; | ||
|
|
||
| @@ -1079,7 +1077,7 @@ static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack) | ||
| goto err_module_put; | ||
| } | ||
|
|
||
| - monitor_hw = true; | ||
| + refcount_set(&refcount_hw, 1); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is setting the refcount to 1, but it's overwriting the increment done above? |
||
|
|
||
| return 0; | ||
|
|
||
| @@ -1107,14 +1105,14 @@ static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) | ||
| const struct net_dm_alert_ops *ops; | ||
| int cpu; | ||
|
|
||
| - if (!monitor_hw) { | ||
| - NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); | ||
| + /* Don't disable if already disabled. */ | ||
| + if (!refcount_read(&refcount_hw)) | ||
| return; | ||
| - } | ||
|
|
||
| - ops = net_dm_alert_ops_arr[net_dm_alert_mode]; | ||
| + if (!refcount_dec_and_test(&refcount_hw)) | ||
| + return; | ||
|
|
||
| - monitor_hw = false; | ||
| + ops = net_dm_alert_ops_arr[net_dm_alert_mode]; | ||
|
|
||
| net_dm_hw_probe_unregister(ops); | ||
|
|
||
| @@ -1141,6 +1139,9 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack) | ||
| const struct net_dm_alert_ops *ops; | ||
| int cpu, rc; | ||
|
|
||
| + if (refcount_inc_not_zero(&refcount_sw)) | ||
| + return 0; | ||
| + | ||
| ops = net_dm_alert_ops_arr[net_dm_alert_mode]; | ||
|
|
||
| if (!try_module_get(THIS_MODULE)) { | ||
| @@ -1174,6 +1175,8 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack) | ||
| goto err_unregister_trace; | ||
| } | ||
|
|
||
| + refcount_set(&refcount_sw, 1); | ||
| + | ||
| return 0; | ||
|
|
||
| err_unregister_trace: | ||
| @@ -1197,6 +1200,13 @@ static void net_dm_trace_off_set(void) | ||
| const struct net_dm_alert_ops *ops; | ||
| int cpu; | ||
|
|
||
| + /* Don't disable if already disabled. */ | ||
| + if (!refcount_read(&refcount_sw)) | ||
| + return; | ||
| + | ||
| + if (!refcount_dec_and_test(&refcount_sw)) | ||
| + return; | ||
| + | ||
| ops = net_dm_alert_ops_arr[net_dm_alert_mode]; | ||
|
|
||
| unregister_trace_napi_poll(ops->napi_poll_probe, NULL); | ||
| @@ -1224,11 +1234,6 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack) | ||
| { | ||
| int rc = 0; | ||
|
|
||
| - if (state == trace_state) { | ||
| - NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state"); | ||
| - return -EAGAIN; | ||
| - } | ||
| - | ||
| switch (state) { | ||
| case TRACE_ON: | ||
| rc = net_dm_trace_on_set(extack); | ||
| @@ -1241,17 +1246,12 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack) | ||
| break; | ||
| } | ||
|
|
||
| - if (!rc) | ||
| - trace_state = state; | ||
| - else | ||
| - rc = -EINPROGRESS; | ||
| - | ||
| return rc; | ||
| } | ||
|
|
||
| static bool net_dm_is_monitoring(void) | ||
| { | ||
| - return trace_state == TRACE_ON || monitor_hw; | ||
| + return refcount_read(&refcount_sw) || refcount_read(&refcount_hw); | ||
| } | ||
|
|
||
| static int net_dm_alert_mode_get_from_info(struct genl_info *info, | ||
| -- | ||
| GitLab | ||
|
|
||
55 changes: 55 additions & 0 deletions
55
patches-sonic/0004-drop_monitor-Send-hardware-drop-notifications-to-the.patch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| From 41e1e040ed42d47757819896fa275ce00301e162 Mon Sep 17 00:00:00 2001 | ||
| From: Ido Schimmel <idosch@nvidia.com> | ||
| Date: Wed, 11 Feb 2026 15:41:52 +0200 | ||
| Subject: [PATCH] drop_monitor: Send hardware drop notifications to the | ||
| appropriate netns | ||
|
|
||
| Unlike software drops, it is always clear in which network namespace a | ||
| hardware drop occurred. | ||
|
|
||
| Allow drop_monitor to work in different network namespaces and send | ||
| hardware drop notifications to the appropriate one. Other notifications | ||
| are still sent to the initial network namespace. | ||
|
|
||
| Signed-off-by: Ido Schimmel <idosch@nvidia.com> | ||
| --- | ||
| net/core/drop_monitor.c | 9 ++++++++- | ||
| 1 file changed, 8 insertions(+), 1 deletion(-) | ||
|
|
||
| diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c | ||
| index 29f2b8c6b8b9..e56d3e166df0 100644 | ||
| --- a/net/core/drop_monitor.c | ||
| +++ b/net/core/drop_monitor.c | ||
| @@ -911,6 +911,7 @@ static void net_dm_hw_packet_report(struct sk_buff *skb) | ||
| struct devlink_trap_metadata *hw_metadata; | ||
| struct sk_buff *msg; | ||
| size_t payload_len; | ||
| + struct net *net; | ||
| int rc; | ||
|
|
||
| if (skb->data > skb_mac_header(skb)) | ||
| @@ -934,7 +935,12 @@ static void net_dm_hw_packet_report(struct sk_buff *skb) | ||
| goto out; | ||
| } | ||
|
|
||
| - genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); | ||
| + /* The network namespace cannot disappear since we are holding a | ||
| + * reference on the net device. | ||
| + */ | ||
| + net = dev_net(hw_metadata->input_dev); | ||
| + genlmsg_multicast_netns(&net_drop_monitor_family, net, msg, 0, 0, | ||
| + GFP_KERNEL); | ||
|
|
||
| out: | ||
| net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata); | ||
| @@ -1659,6 +1665,7 @@ static struct genl_family net_drop_monitor_family __ro_after_init = { | ||
| .name = "NET_DM", | ||
| .version = 2, | ||
| .maxattr = NET_DM_ATTR_MAX, | ||
| + .netnsok = true, | ||
| .policy = net_dm_nl_policy, | ||
| .pre_doit = net_dm_nl_pre_doit, | ||
| .post_doit = net_dm_nl_post_doit, | ||
| -- | ||
| GitLab | ||
|
|
103 changes: 103 additions & 0 deletions
103
patches-sonic/0005-drop_monitor-Do-not-block-a-configuration-change-if-it-is-a.patch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| From 1581ee077219eaa6cca2aec1cdc190da14ebd6ab Mon Sep 17 00:00:00 2001 | ||
| From: Ido Schimmel <idosch@nvidia.com> | ||
| Date: Thu, 12 Feb 2026 10:46:23 +0200 | ||
| Subject: [PATCH] drop_monitor: Do not block a configuration change if it is a | ||
| NOP | ||
|
|
||
| Currently, drop_monitor blocks configuration changes if tracing is | ||
| enabled. Relax this condition and do not block a configuration change if | ||
| it is a NOP, even if tracing is enabled. | ||
|
|
||
| Signed-off-by: Ido Schimmel <idosch@nvidia.com> | ||
| --- | ||
| net/core/drop_monitor.c | 60 ++++++++++++++--------------------------- | ||
| 1 file changed, 20 insertions(+), 40 deletions(-) | ||
|
|
||
| diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c | ||
| index e56d3e166df0..5bdf8ca0caba 100644 | ||
| --- a/net/core/drop_monitor.c | ||
| +++ b/net/core/drop_monitor.c | ||
| @@ -1279,60 +1279,40 @@ static int net_dm_alert_mode_get_from_info(struct genl_info *info, | ||
| return 0; | ||
| } | ||
|
|
||
| -static int net_dm_alert_mode_set(struct genl_info *info) | ||
| +static int net_dm_cmd_config(struct sk_buff *skb, struct genl_info *info) | ||
| { | ||
| + enum net_dm_alert_mode alert_mode = net_dm_alert_mode; | ||
| struct netlink_ext_ack *extack = info->extack; | ||
| - enum net_dm_alert_mode alert_mode; | ||
| + u32 trunc_len = net_dm_trunc_len; | ||
| + u32 queue_len = net_dm_queue_len; | ||
| int rc; | ||
|
|
||
| - if (!info->attrs[NET_DM_ATTR_ALERT_MODE]) | ||
| - return 0; | ||
| - | ||
| - rc = net_dm_alert_mode_get_from_info(info, &alert_mode); | ||
| - if (rc) { | ||
| - NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode"); | ||
| - return -EINVAL; | ||
| + if (info->attrs[NET_DM_ATTR_ALERT_MODE]) { | ||
| + rc = net_dm_alert_mode_get_from_info(info, &alert_mode); | ||
| + if (rc) { | ||
| + NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode"); | ||
| + return -EINVAL; | ||
| + } | ||
| } | ||
|
|
||
| - net_dm_alert_mode = alert_mode; | ||
| - | ||
| - return 0; | ||
| -} | ||
| - | ||
| -static void net_dm_trunc_len_set(struct genl_info *info) | ||
| -{ | ||
| - if (!info->attrs[NET_DM_ATTR_TRUNC_LEN]) | ||
| - return; | ||
| - | ||
| - net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]); | ||
| -} | ||
| - | ||
| -static void net_dm_queue_len_set(struct genl_info *info) | ||
| -{ | ||
| - if (!info->attrs[NET_DM_ATTR_QUEUE_LEN]) | ||
| - return; | ||
| + if (info->attrs[NET_DM_ATTR_TRUNC_LEN]) | ||
| + trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]); | ||
|
|
||
| - net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]); | ||
| -} | ||
| + if (info->attrs[NET_DM_ATTR_QUEUE_LEN]) | ||
| + queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]); | ||
|
|
||
| -static int net_dm_cmd_config(struct sk_buff *skb, | ||
| - struct genl_info *info) | ||
| -{ | ||
| - struct netlink_ext_ack *extack = info->extack; | ||
| - int rc; | ||
| + if (alert_mode == net_dm_alert_mode && trunc_len == net_dm_trunc_len && | ||
| + queue_len == net_dm_queue_len) | ||
| + return 0; | ||
|
|
||
| if (net_dm_is_monitoring()) { | ||
| NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring"); | ||
| return -EBUSY; | ||
| } | ||
|
|
||
| - rc = net_dm_alert_mode_set(info); | ||
| - if (rc) | ||
| - return rc; | ||
| - | ||
| - net_dm_trunc_len_set(info); | ||
| - | ||
| - net_dm_queue_len_set(info); | ||
| + net_dm_alert_mode = alert_mode; | ||
| + net_dm_trunc_len = trunc_len; | ||
| + net_dm_queue_len = queue_len; | ||
|
|
||
| return 0; | ||
| } | ||
| -- | ||
| GitLab | ||
|
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.