From be2642e00fdd86eb17b9266962a74d1f89539bbd Mon Sep 17 00:00:00 2001 From: jsg Date: Fri, 13 Jan 2023 01:56:44 +0000 Subject: [PATCH] drm/amd/display: Add check for DET fetch latency hiding for dcn32 From Dillon Varone 4ac1437d64efdd2788f8c511276243f594e946fd in linux-6.1.y/6.1.5 6d4727c80947de0e6fad58b196a9d215e3b32608 in mainline linux --- .../dc/dml/dcn32/display_mode_vba_32.c | 39 +++++++++++ .../dc/dml/dcn32/display_mode_vba_util_32.c | 69 +++++++++++++++++++ .../dc/dml/dcn32/display_mode_vba_util_32.h | 18 +++++ .../drm/amd/display/dc/dml/display_mode_vba.h | 2 + 4 files changed, 128 insertions(+) diff --git a/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 9afd9ba23fb..820042f6aac 100644 --- a/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -670,6 +670,25 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; } + v->NotEnoughDETSwathFillLatencyHiding = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + v->UrgentLatency, + mode_lib->vba.SwathHeightY, + mode_lib->vba.SwathHeightC, + v->swath_width_luma_ub, + v->swath_width_chroma_ub, + v->BytePerPixelDETY, + v->BytePerPixelDETC, + mode_lib->vba.DETBufferSizeY, + mode_lib->vba.DETBufferSizeC, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] && !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ? @@ -1664,6 +1683,7 @@ static void mode_support_configuration(struct vba_vars_st *v, && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true && mode_lib->vba.NonsupportedDSCInputBPC == false + && mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] == false && !mode_lib->vba.ExceededMALLSize && ((mode_lib->vba.HostVMEnable == false && !mode_lib->vba.ImmediateFlipRequiredFinal) @@ -3158,6 +3178,25 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); + mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.UrgLatency[i], + mode_lib->vba.SwathHeightYThisState, + mode_lib->vba.SwathHeightCThisState, + mode_lib->vba.swath_width_luma_ub_this_state, + mode_lib->vba.swath_width_chroma_ub_this_state, + mode_lib->vba.BytePerPixelInDETY, + mode_lib->vba.BytePerPixelInDETC, + mode_lib->vba.DETBufferSizeYThisState, + mode_lib->vba.DETBufferSizeCThisState, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); diff --git a/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index debe46b24a3..5af601cff1a 100644 --- a/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -6228,3 +6228,72 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; } + +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]) +{ + int k; + double SwathSizeAllSurfaces = 0; + double SwathSizeAllSurfacesInFetchTimeUs; + double DETSwathLatencyHidingUs; + double DETSwathLatencyHidingYUs; + double DETSwathLatencyHidingCUs; + double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; + double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; + bool NotEnoughDETSwathFillLatencyHiding = false; + + /* calculate sum of single swath size for all pipes in bytes*/ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; + + if (SwathHeightC[k] != 0) + SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; + else + SwathSizePerSurfaceC[k] = 0; + + SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; + } + + SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; + + /* ensure all DET - 1 swath can hide a fetch for all surfaces */ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + double LineTime = HTotal[k] / PixelClock[k]; + + /* only care if surface is not phantom */ + if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { + DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; + + if (SwathHeightC[k] != 0) { + DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; + + DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); + } else { + DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; + } + + /* DET must be able to hide time to fetch 1 swath for each surface */ + if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { + NotEnoughDETSwathFillLatencyHiding = true; + break; + } + } + } + + return NotEnoughDETSwathFillLatencyHiding; +} diff --git a/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 3989c2a28fa..779c6805f59 100644 --- a/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/sys/dev/pci/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -1141,4 +1141,22 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf double *FractionOfUrgentBandwidth, bool *ImmediateFlipBandwidthSupport); +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]); + #endif diff --git a/sys/dev/pci/drm/amd/display/dc/dml/display_mode_vba.h b/sys/dev/pci/drm/amd/display/dc/dml/display_mode_vba.h index a0207a8f875..2b34b02dbd4 100644 --- a/sys/dev/pci/drm/amd/display/dc/dml/display_mode_vba.h +++ b/sys/dev/pci/drm/amd/display/dc/dml/display_mode_vba.h @@ -1041,6 +1041,7 @@ struct vba_vars_st { double MinFullDETBufferingTime; double AverageReadBandwidthGBytePerSecond; bool FirstMainPlane; + bool NotEnoughDETSwathFillLatencyHiding; unsigned int ViewportWidthChroma[DC__NUM_DPP__MAX]; unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX]; @@ -1224,6 +1225,7 @@ struct vba_vars_st { unsigned int BlockWidthC[DC__NUM_DPP__MAX]; unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX]; bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2]; + bool NotEnoughDETSwathFillLatencyHidingPerState[DC__VOLTAGE_STATES][2]; struct dummy_vars dummy_vars; }; -- 2.20.1