From 01f578d05b5083c8192eb3aca87f8ab88a28ad3c Mon Sep 17 00:00:00 2001 From: Brandon Chuang Date: Fri, 12 Sep 2025 10:53:01 +0800 Subject: [PATCH] fan_config: minipack3n: update fan service config with incremental PID logic Description Update the fan service configuration for minipack3n to support incremental PID logic. This change improves fan speed control based on real-time thermal data, as verified by the thermal team. Motivation The existing configuration uses a fixed 60% PWM setting, regardless of temperature changes in optics, CPU, inlet, or ASIC components. This update introduces dynamic fan speed control based on temperature inputs to enhance thermal responsiveness and system reliability. Changes 1. Updated `platform_manager.json`: changed SMB CPLD address from 0x3e to 0x33 to enable SP4 power control. 2. Applied `OPTIC_AGGREGATION_TYPE_INCREMENTAL_PID` for optics temperature management. 3. Applied `SENSOR_PWM_CALC_TYPE_INCREMENTAL_PID` for `CPU_UNCORE_TEMP`. 4. Applied `SENSOR_PWM_CALC_TYPE_FOUR_LINEAR_TABLE` for `SCM_INLET_U36_TEMP`. 5. Applied `SENSOR_PWM_CALC_TYPE_INCREMENTAL_PID` for `asic_temp`. 6. Added `shutdownCondition` with associated `shutdownCmd` for SP4. Test Plan 1) Build and deploy the latest versions of fboss components including fan_service, sensor_service, and platform_manager to ensure the updated configuration is in effect. 2) Run platform_manager and confirm that the SMB CPLD address has been updated from 0x3e to 0x33 for SP4 power control. 3) Start sensor_service, qsfp_service, and fan_service to ensure proper initialization and inter-service communication with the new configuration. 4) Confirm with the thermal team that the new incremental PID logic adjusts fan speed dynamically based on temperature changes (optics, CPU, inlet, ASIC). 5) Sequentially unplug fans (fan-1 to fan-8) and verify that fan_service detects each failure and increases PWM to compensate. 6) Trigger the shutdown condition and verify that the shutdownCmd for SP4 is executed correctly. Test Log [mp3n_platform_manager_smbcpld_change_to_0x33.txt](https://github.com/user-attachments/files/22288809/mp3n_platform_manager_smbcpld_change_to_0x33.txt) [mp3n_sensor_service.txt](https://github.com/user-attachments/files/22288811/mp3n_sensor_service.txt) [mp3n_test_fan_service_fan1_to_fan8_fail_then_recover.txt](https://github.com/user-attachments/files/22288812/mp3n_test_fan_service_fan1_to_fan8_fail_then_recover.txt) [mp3n_test_fan_service_sp4_shutdown.txt](https://github.com/user-attachments/files/22288813/mp3n_test_fan_service_sp4_shutdown.txt) [mp3n_thermal_team_fan_service_35C.log](https://github.com/user-attachments/files/22288814/mp3n_thermal_team_fan_service_35C.log) [mp3n_thermal_team_fan_service_35C.xlsx](https://github.com/user-attachments/files/22288815/mp3n_thermal_team_fan_service_35C.xlsx) [mp3n_thermal_team_fan_service_35C_fan3_one_rotor_failed.log](https://github.com/user-attachments/files/22288816/mp3n_thermal_team_fan_service_35C_fan3_one_rotor_failed.log) [mp3n_thermal_team_fan_service_35C_fan3_one_rotor_failed.xlsx](https://github.com/user-attachments/files/22288819/mp3n_thermal_team_fan_service_35C_fan3_one_rotor_failed.xlsx) --- .../configs/minipack3n/fan_service.json | 137 +++++++++++++++++- .../configs/minipack3n/platform_manager.json | 2 +- 2 files changed, 130 insertions(+), 9 deletions(-) diff --git a/fboss/platform/configs/minipack3n/fan_service.json b/fboss/platform/configs/minipack3n/fan_service.json index 0265216b15e9b..55de503569e4f 100644 --- a/fboss/platform/configs/minipack3n/fan_service.json +++ b/fboss/platform/configs/minipack3n/fan_service.json @@ -1,11 +1,12 @@ { - "pwmBoostOnNumDeadFan": 0, + "pwmBoostOnNumDeadFan": 2, "pwmBoostOnNumDeadSensor": 0, "pwmBoostOnNoQsfpAfterInSec": 55, "pwmBoostValue": 60, - "pwmTransitionValue": 60, - "pwmLowerThreshold": 60, - "pwmUpperThreshold": 60, + "pwmTransitionValue": 45, + "pwmLowerThreshold": 25, + "pwmUpperThreshold": 70, + "shutdownCmd": "echo 0 > /run/devmap/cplds/SMB_CPLD/sp4_pwr_en", "watchdog": { "sysfsPath": "/run/devmap/watchdogs/FAN_WATCHDOG", "value": 0 @@ -14,8 +15,123 @@ "sensorReadInterval": 5, "pwmUpdateInterval": 5 }, - "optics": [], - "sensors": [], + "optics": [ + { + "opticName": "qsfp_group_1", + "access": { + "accessType": "ACCESS_TYPE_QSFP" + }, + "portList": [], + "aggregationType": "OPTIC_AGGREGATION_TYPE_INCREMENTAL_PID", + "pidSettings": { + "OPTIC_TYPE_800_GENERIC": { + "kp": 2, + "ki": 0.6, + "kd": 0, + "setPoint": 65.0, + "posHysteresis": 2.0, + "negHysteresis": 0.0 + }, + "OPTIC_TYPE_400_GENERIC": { + "kp": 2, + "ki": 0.6, + "kd": 0, + "setPoint": 65.0, + "posHysteresis": 2.0, + "negHysteresis": 0.0 + }, + "OPTIC_TYPE_200_GENERIC": { + "kp": 2, + "ki": 0.6, + "kd": 0, + "setPoint": 65.0, + "posHysteresis": 2.0, + "negHysteresis": 0.0 + }, + "OPTIC_TYPE_100_GENERIC": { + "kp": 2, + "ki": 0.6, + "kd": 0, + "setPoint": 65.0, + "posHysteresis": 2.0, + "negHysteresis": 0.0 + } + } + } + ], + "sensors": [ + { + "sensorName": "CPU_UNCORE_TEMP", + "access": { + "accessType": "ACCESS_TYPE_THRIFT" + }, + "pwmCalcType": "SENSOR_PWM_CALC_TYPE_INCREMENTAL_PID", + "pidSetting": { + "kp": 2, + "ki": 0.6, + "kd": 0, + "setPoint": 94.0, + "posHysteresis": 3.0, + "negHysteresis": 3.0 + } + }, + { + "sensorName": "SCM_INLET_U36_TEMP", + "access": { + "accessType": "ACCESS_TYPE_THRIFT" + }, + "pwmCalcType": "SENSOR_PWM_CALC_TYPE_FOUR_LINEAR_TABLE", + "normalUpTable": { + "31": 25, + "32": 30, + "37": 35, + "42": 60 + }, + "normalDownTable": { + "29": 25, + "30": 30, + "35": 35, + "40": 60 + }, + "failUpTable": { + "31": 30, + "32": 35, + "37": 40, + "42": 65 + }, + "failDownTable": { + "29": 30, + "30": 35, + "35": 40, + "40": 65 + } + }, + { + "sensorName": "asic_temp", + "access": { + "accessType": "ACCESS_TYPE_THRIFT" + }, + "pwmCalcType": "SENSOR_PWM_CALC_TYPE_INCREMENTAL_PID", + "pidSetting": { + "kp": 2, + "ki": 0.6, + "kd": 0, + "setPoint": 95.0, + "posHysteresis": 2.0, + "negHysteresis": 0.0 + } + } + ], + "shutdownCondition": { + "numOvertempSensorForShutdown": 1, + "conditions": [ + { + "sensorName": "asic_temp", + "overtempThreshold": 110.0, + "slidingWindowSize": 1 + } + ] + }, "fans": [ { "fanName": "FAN_1_F", @@ -246,7 +362,12 @@ { "zoneType": "ZONE_TYPE_MAX", "zoneName": "zone1", - "sensorNames": [], + "sensorNames": [ + "CPU_UNCORE_TEMP", + "SCM_INLET_U36_TEMP", + "asic_temp", + "qsfp_group_1" + ], "fanNames": [ "FAN_1_F", "FAN_1_R", @@ -265,7 +386,7 @@ "FAN_8_F", "FAN_8_R" ], - "slope": 0 + "slope": 10 } ] } diff --git a/fboss/platform/configs/minipack3n/platform_manager.json b/fboss/platform/configs/minipack3n/platform_manager.json index fedb6644082b2..39287452d800b 100644 --- a/fboss/platform/configs/minipack3n/platform_manager.json +++ b/fboss/platform/configs/minipack3n/platform_manager.json @@ -3016,7 +3016,7 @@ }, { "busName": "INCOMING@3", - "address": "0x3e", + "address": "0x33", "kernelDeviceName": "mp3n_smbcpld", "pmUnitScopedName": "SMB_CPLD" },