From ff290ac45dd9bb0e33c99c93b809136c23bcb5df Mon Sep 17 00:00:00 2001 From: jsg Date: Wed, 26 Jul 2023 06:17:53 +0000 Subject: [PATCH] drm/amdgpu: add the fan abnormal detection feature From lyndonli d7d53c669da90181827e42583582192646a4d933 in linux-6.1.y/6.1.40 ef5fca9f7294509ee5013af9e879edc5837c1d6c in mainline linux --- sys/dev/pci/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + .../pci/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 28 +++++++++++++++++++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 3 files changed, 30 insertions(+) diff --git a/sys/dev/pci/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/sys/dev/pci/drm/amd/pm/swsmu/inc/amdgpu_smu.h index adcf99148c7..1bb953c9796 100644 --- a/sys/dev/pci/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/sys/dev/pci/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -168,6 +168,7 @@ struct smu_temperature_range { int mem_crit_max; int mem_emergency_max; int software_shutdown_temp; + int software_shutdown_temp_offset; }; struct smu_state_validation_block { diff --git a/sys/dev/pci/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/sys/dev/pci/drm/amd/pm/swsmu/smu13/smu_v13_0.c index d829f952560..a72fa30ea5e 100644 --- a/sys/dev/pci/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/sys/dev/pci/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1381,6 +1381,7 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, */ uint32_t ctxid = entry->src_data[0]; uint32_t data; + uint32_t high; if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { @@ -1437,6 +1438,33 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, schedule_work(&smu->throttling_logging_work); break; + case 0x8: + high = smu->thermal_range.software_shutdown_temp + + smu->thermal_range.software_shutdown_temp_offset; + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, high); + dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n", + high, + smu->thermal_range.software_shutdown_temp_offset); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; + case 0x9: + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); + dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; } } } diff --git a/sys/dev/pci/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/sys/dev/pci/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 57579f5e00e..67cbd2efe2b 100644 --- a/sys/dev/pci/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/sys/dev/pci/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1288,6 +1288,7 @@ static int smu_v13_0_7_get_thermal_temperature_range(struct smu_context *smu, range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset; return 0; } -- 2.20.1