diff --git a/patches-sonic/driver-arista-pci-aer-disable-recovery.patch b/patches-sonic/driver-arista-pci-aer-disable-recovery.patch new file mode 100644 index 000000000..b477b0f7c --- /dev/null +++ b/patches-sonic/driver-arista-pci-aer-disable-recovery.patch @@ -0,0 +1,104 @@ +From: yurypm +Date: Mon, 25 May 2026 13:45:51 +0000 +Subject: Add noaer_recovery pci kernel boot option + +AER error recovery is part of the AER error handling subsystem in +the Linux kernel. AER is enabled by default in the SONiC Linux +kernel. The default Linux behavior is incompatible with Arista +chassis hardware architecture. Enabling AER recovery on large +modular systems with a complex PCIe tree could cause unexpected +behavior and side effects. It would be nice to have an option to +disable AER recovery on some chassis. + +Add pci=noaer_recovery kernel boot option to disable AER error +recovery when an uncorrectable error is reported. + +Signed-off-by: Yury Murashka +--- + Documentation/admin-guide/kernel-parameters.txt | 4 ++++ + drivers/pci/pci.c | 2 ++ + drivers/pci/pci.h | 2 ++ + drivers/pci/pcie/err.c | 15 +++++++++++++++ + 4 files changed, 23 insertions(+) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index f402bba..2bbd7ab 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4483,6 +4483,10 @@ + noaer [PCIE] If the PCIEAER kernel config parameter is + enabled, this kernel boot option can be used to + disable the use of PCIE advanced error reporting. ++ noaer_recovery [PCIE] If the PCIEAER kernel config parameter is ++ enabled, this kernel boot option can be used to ++ disable AER error recovery when an uncorrectable ++ error is reported. + nodomains [PCI] Disable support for multiple PCI + root domains (aka PCI segments, in ACPI-speak). + nommconf [X86] Disable use of MMCONFIG for PCI +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c +index 51a09e4..77d0786 100644 +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -6896,6 +6896,8 @@ static int __init pci_setup(char *str) + pcie_ats_disabled = true; + } else if (!strcmp(str, "noaer")) { + pci_no_aer(); ++ } else if (!strcmp(str, "noaer_recovery")) { ++ pci_no_aer_recovery(); + } else if (!strcmp(str, "earlydump")) { + pci_early_dump = true; + } else if (!strncmp(str, "realloc=", 8)) { +diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h +index 65df6d2..551b6e8 100644 +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -826,6 +826,7 @@ static inline void of_pci_remove_node(struct pci_dev *pdev) { } + + #ifdef CONFIG_PCIEAER + void pci_no_aer(void); ++void pci_no_aer_recovery(void); + void pci_aer_init(struct pci_dev *dev); + void pci_aer_exit(struct pci_dev *dev); + extern const struct attribute_group aer_stats_attr_group; +@@ -836,6 +837,7 @@ void pci_save_aer_state(struct pci_dev *dev); + void pci_restore_aer_state(struct pci_dev *dev); + #else + static inline void pci_no_aer(void) { } ++static inline void pci_no_aer_recovery(void) { } + static inline void pci_aer_init(struct pci_dev *d) { } + static inline void pci_aer_exit(struct pci_dev *d) { } + static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } +diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c +index 3109077..bb5ec0c 100644 +--- a/drivers/pci/pcie/err.c ++++ b/drivers/pci/pcie/err.c +@@ -21,6 +21,13 @@ + #include "portdrv.h" + #include "../pci.h" + ++static int pcie_aer_recovery_disable = 0; ++ ++void pci_no_aer_recovery(void) ++{ ++ pcie_aer_recovery_disable = 1; ++} ++ + static pci_ers_result_t merge_result(enum pci_ers_result orig, + enum pci_ers_result new) + { +@@ -197,6 +204,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + ++ if (pcie_aer_recovery_disable) { ++ if (host->native_aer || pcie_ports_native) { ++ pcie_clear_device_status(dev); ++ pci_aer_clear_nonfatal_status(dev); ++ } ++ return status; ++ } ++ + /* + * If the error was detected by a Root Port, Downstream Port, RCEC, + * or RCiEP, recovery runs on the device itself. For Ports, that diff --git a/patches-sonic/driver-arista-pci-dpc-disable.patch b/patches-sonic/driver-arista-pci-dpc-disable.patch new file mode 100644 index 000000000..26504c41a --- /dev/null +++ b/patches-sonic/driver-arista-pci-dpc-disable.patch @@ -0,0 +1,121 @@ +From: yurypm +Date: Mon, 25 May 2026 13:45:50 +0000 +Subject: Add nodpc pci kernel boot option + +PCI DPC (Downstream Port Containment) is enabled by default in the +SONiC Linux kernel. DPC support can be advertised by PCIe devices, +but it might not be fully supported in the firmware. The default Linux +behavior is incompatible with Arista chassis hardware architecture. +Enabling DPC could cause unexpected behavior and side effects. It +would be nice to have an option to disable DPC on some chassis. + +Add pci=nodpc kernel boot option to disable PCI DPC. + +Signed-off-by: Yury Murashka +--- + Documentation/admin-guide/kernel-parameters.txt | 3 +++ + drivers/pci/pci.c | 2 ++ + drivers/pci/pci.h | 2 ++ + drivers/pci/pcie/dpc.c | 16 +++++++++++++--- + 4 files changed, 20 insertions(+), 3 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 2bbd7ab..068891e 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4480,6 +4480,9 @@ + through ports 0xC000-0xCFFF). + See http://wiki.osdev.org/PCI for more info + on the configuration access mechanisms. ++ nodpc [PCIE] If the PCIE_DPC kernel config parameter is ++ enabled, this kernel boot option can be used to ++ disable the use of PCIE DPC. + noaer [PCIE] If the PCIEAER kernel config parameter is + enabled, this kernel boot option can be used to + disable the use of PCIE advanced error reporting. +diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c +index 77d0786..f6a4c2f 100644 +--- a/drivers/pci/pci.c ++++ b/drivers/pci/pci.c +@@ -6894,6 +6894,8 @@ static int __init pci_setup(char *str) + } else if (!strncmp(str, "noats", 5)) { + pr_info("PCIe: ATS is disabled\n"); + pcie_ats_disabled = true; ++ } else if (!strcmp(str, "nodpc")) { ++ pci_no_dpc(); + } else if (!strcmp(str, "noaer")) { + pci_no_aer(); + } else if (!strcmp(str, "noaer_recovery")) { +diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h +index 551b6e8..bece8b9 100644 +--- a/drivers/pci/pci.h ++++ b/drivers/pci/pci.h +@@ -535,6 +535,7 @@ struct rcec_ea { + #endif + + #ifdef CONFIG_PCIE_DPC ++void pci_no_dpc(void); + void pci_save_dpc_state(struct pci_dev *dev); + void pci_restore_dpc_state(struct pci_dev *dev); + void pci_dpc_init(struct pci_dev *pdev); +@@ -542,6 +543,7 @@ void dpc_process_error(struct pci_dev *pdev); + pci_ers_result_t dpc_reset_link(struct pci_dev *pdev); + bool pci_dpc_recovered(struct pci_dev *pdev); + #else ++static inline void pci_no_dpc(void) { } + static inline void pci_save_dpc_state(struct pci_dev *dev) { } + static inline void pci_restore_dpc_state(struct pci_dev *dev) { } + static inline void pci_dpc_init(struct pci_dev *pdev) { } +diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c +index cdc5431..8eb2a1f 100644 +--- a/drivers/pci/pcie/dpc.c ++++ b/drivers/pci/pcie/dpc.c +@@ -43,12 +43,19 @@ static const char * const rp_pio_error_string[] = { + "Memory Request Completion Timeout", /* Bit Position 18 */ + }; + ++static int pcie_dpc_disable = 0; ++ ++void pci_no_dpc(void) ++{ ++ pcie_dpc_disable = 1; ++} ++ + void pci_save_dpc_state(struct pci_dev *dev) + { + struct pci_cap_saved_state *save_state; + u16 *cap; + +- if (!pci_is_pcie(dev)) ++ if (pcie_dpc_disable || !pci_is_pcie(dev)) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); +@@ -64,7 +71,7 @@ void pci_restore_dpc_state(struct pci_dev *dev) + struct pci_cap_saved_state *save_state; + u16 *cap; + +- if (!pci_is_pcie(dev)) ++ if (pcie_dpc_disable || !pci_is_pcie(dev)) + return; + + save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); +@@ -104,7 +111,7 @@ bool pci_dpc_recovered(struct pci_dev *pdev) + { + struct pci_host_bridge *host; + +- if (!pdev->dpc_cap) ++ if (pcie_dpc_disable || !pdev->dpc_cap) + return false; + + /* +@@ -398,6 +405,9 @@ void pci_dpc_init(struct pci_dev *pdev) + { + u16 cap; + ++ if (pcie_dpc_disable) ++ return; ++ + pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); + if (!pdev->dpc_cap) + return; diff --git a/patches-sonic/series b/patches-sonic/series index b84d84667..8b133188b 100644 --- a/patches-sonic/series +++ b/patches-sonic/series @@ -11,6 +11,8 @@ driver-arista-pci-reassign-pref-mem.patch driver-arista-mmcblk-not-working-on-AMD-platforms.patch driver-arista-restrict-eMMC-drive-to-50Mhz-from-userland.patch driver-arista-i2c-designware-shutdown.patch +driver-arista-pci-aer-disable-recovery.patch +driver-arista-pci-dpc-disable.patch driver-support-sff-8436-eeprom.patch driver-support-sff-8436-eeprom-update.patch driver-sff-8436-use-nvmem-framework.patch