From f1d6655004760046089c0e2ca256fcc5027ba661 Mon Sep 17 00:00:00 2001 From: Volodymyr Samotiy Date: Sun, 28 Jan 2024 16:26:07 +0200 Subject: [PATCH] [Mellanox] Disable SSD NCQ on Mellanox platforms (#17567) - Why I did it Based on some research some products might experience an occasional IO failures in the communication between CPU and SSD because of NCQ. There seems to be a problem between some kernel versions and some SATA controllers. Syslog error message examples: Error "ata1: SError: { UnrecovData Handshk }" - "failed command: WRITE FPDMA QUEUED". Error "ata1: SError: { RecovComm HostInt PHYRdyChg CommWake 10B8B DevExch }" - "failed command: READ FPDMA QUEUED". Some vendors already disabled NCQ on their platforms in SONiC due to similar issue: [Arista] Disable ATA NCQ for a few products #13739 [Arista] Disable ATA NCQ for a few products [Arista] Disable SSD NCQ on DCS-7050CX3-32S #13964 [Arista] Disable SSD NCQ on DCS-7050CX3-32S Also there are other discussions on Debian/Ubuntu forums about similar issues and it was suggested to disable NCQ: https://askubuntu.com/questions/133946/are-these-sata-errors-dangerous - How I did it Add a kernel parameter to tell libata to disable NCQ - How to verify it Use FIO tool - fio --direct=1 --rw=randrw --bs=64k --ioengine=libaio --iodepth=64 --runtime=120 --numjobs=4 --- device/mellanox/x86_64-mlnx_msn2010-r0/installer.conf | 2 +- device/mellanox/x86_64-mlnx_msn2700-r0/installer.conf | 2 +- device/mellanox/x86_64-mlnx_msn2700a1-r0/installer.conf | 1 + device/mellanox/x86_64-mlnx_msn3420-r0/installer.conf | 1 + device/mellanox/x86_64-mlnx_msn3700-r0/installer.conf | 1 + device/mellanox/x86_64-mlnx_msn3700c-r0/installer.conf | 1 + device/mellanox/x86_64-mlnx_msn3800-r0/installer.conf | 1 + device/mellanox/x86_64-mlnx_msn4410-r0/installer.conf | 1 + device/mellanox/x86_64-mlnx_msn4600-r0/installer.conf | 1 + device/mellanox/x86_64-mlnx_msn4600c-r0/installer.conf | 1 + device/mellanox/x86_64-mlnx_msn4700-r0/installer.conf | 1 + device/mellanox/x86_64-nvidia_sn2201-r0/installer.conf | 2 +- device/mellanox/x86_64-nvidia_sn4800-r0/installer.conf | 1 + device/mellanox/x86_64-nvidia_sn5600-r0/installer.conf | 1 + 14 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 device/mellanox/x86_64-mlnx_msn2700a1-r0/installer.conf create mode 100644 device/mellanox/x86_64-mlnx_msn3420-r0/installer.conf create mode 100644 device/mellanox/x86_64-mlnx_msn3700-r0/installer.conf create mode 100644 device/mellanox/x86_64-mlnx_msn3700c-r0/installer.conf create mode 100644 device/mellanox/x86_64-mlnx_msn3800-r0/installer.conf create mode 100644 device/mellanox/x86_64-mlnx_msn4410-r0/installer.conf create mode 100644 device/mellanox/x86_64-mlnx_msn4600-r0/installer.conf create mode 100644 device/mellanox/x86_64-mlnx_msn4600c-r0/installer.conf create mode 100644 device/mellanox/x86_64-mlnx_msn4700-r0/installer.conf create mode 100644 device/mellanox/x86_64-nvidia_sn4800-r0/installer.conf create mode 100644 device/mellanox/x86_64-nvidia_sn5600-r0/installer.conf diff --git a/device/mellanox/x86_64-mlnx_msn2010-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn2010-r0/installer.conf index c9c9493a5404..1aba51906edf 100644 --- a/device/mellanox/x86_64-mlnx_msn2010-r0/installer.conf +++ b/device/mellanox/x86_64-mlnx_msn2010-r0/installer.conf @@ -1 +1 @@ -ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="acpi_enforce_resources=lax acpi=noirq" +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="acpi_enforce_resources=lax acpi=noirq libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn2700-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn2700-r0/installer.conf index c9c9493a5404..1aba51906edf 100644 --- a/device/mellanox/x86_64-mlnx_msn2700-r0/installer.conf +++ b/device/mellanox/x86_64-mlnx_msn2700-r0/installer.conf @@ -1 +1 @@ -ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="acpi_enforce_resources=lax acpi=noirq" +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="acpi_enforce_resources=lax acpi=noirq libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn2700a1-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn2700a1-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn2700a1-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn3420-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn3420-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3420-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn3700-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn3700-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3700-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn3700c-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn3700c-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3700c-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn3800-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn3800-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn3800-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn4410-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn4410-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn4410-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn4600-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn4600-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn4600-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn4600c-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn4600c-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn4600c-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-mlnx_msn4700-r0/installer.conf b/device/mellanox/x86_64-mlnx_msn4700-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-mlnx_msn4700-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-nvidia_sn2201-r0/installer.conf b/device/mellanox/x86_64-nvidia_sn2201-r0/installer.conf index 8fcb08aba3b8..c1376afcd13f 100644 --- a/device/mellanox/x86_64-nvidia_sn2201-r0/installer.conf +++ b/device/mellanox/x86_64-nvidia_sn2201-r0/installer.conf @@ -1 +1 @@ -ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="acpi_enforce_resources=lax" +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="acpi_enforce_resources=lax libata.force=noncq" diff --git a/device/mellanox/x86_64-nvidia_sn4800-r0/installer.conf b/device/mellanox/x86_64-nvidia_sn4800-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-nvidia_sn4800-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq" diff --git a/device/mellanox/x86_64-nvidia_sn5600-r0/installer.conf b/device/mellanox/x86_64-nvidia_sn5600-r0/installer.conf new file mode 100644 index 000000000000..c46f0eb7a459 --- /dev/null +++ b/device/mellanox/x86_64-nvidia_sn5600-r0/installer.conf @@ -0,0 +1 @@ +ONIE_PLATFORM_EXTRA_CMDLINE_LINUX="libata.force=noncq"