drm/amd/display: add pixel rate based CRB allocation support
authorjsg <jsg@openbsd.org>
Fri, 4 Aug 2023 08:43:06 +0000 (08:43 +0000)
committerjsg <jsg@openbsd.org>
Fri, 4 Aug 2023 08:43:06 +0000 (08:43 +0000)
From Dmytro Laktyushkin
38fa05cad9df113fb31fabc0b88f5ca681235bd2 in linux-6.1.y/6.1.43
9ba90d760e9354c124fa9bbea08017d96699a82c in mainline linux

sys/dev/pci/drm/amd/display/dc/dcn31/dcn31_hubbub.c
sys/dev/pci/drm/amd/display/dc/dcn315/dcn315_resource.c
sys/dev/pci/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
sys/dev/pci/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
sys/dev/pci/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
sys/dev/pci/drm/amd/display/dc/dml/display_mode_vba.c

index 6360dc9..0f231e4 100644 (file)
@@ -103,6 +103,7 @@ static void dcn31_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigne
        default:
                break;
        }
+       DC_LOG_DEBUG("Set DET%d to %d segments\n", hubp_inst, det_size_segments);
        /* Should never be hit, if it is we have an erroneous hw config*/
        ASSERT(hubbub2->det0_size + hubbub2->det1_size + hubbub2->det2_size
                        + hubbub2->det3_size + hubbub2->compbuf_size_segments <= hubbub2->crb_size_segs);
index 19f2025..88c4a37 100644 (file)
 
 #define DCN3_15_MAX_DET_SIZE 384
 #define DCN3_15_CRB_SEGMENT_SIZE_KB 64
+#define DCN3_15_MAX_DET_SEGS (DCN3_15_MAX_DET_SIZE / DCN3_15_CRB_SEGMENT_SIZE_KB)
+/* Minimum 2 extra segments need to be in compbuf and claimable to guarantee seamless mpo transitions */
+#define MIN_RESERVED_DET_SEGS 2
 
 enum dcn31_clk_src_array_id {
        DCN31_CLK_SRC_PLL0,
@@ -1636,21 +1639,57 @@ static bool is_dual_plane(enum surface_pixel_format format)
        return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
 }
 
+static int source_format_to_bpp (enum source_format_class SourcePixelFormat)
+{
+       if (SourcePixelFormat == dm_444_64)
+               return 8;
+       else if (SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_16)
+               return 2;
+       else if (SourcePixelFormat == dm_444_8)
+               return 1;
+       else if (SourcePixelFormat == dm_rgbe_alpha)
+               return 5;
+       else if (SourcePixelFormat == dm_420_8)
+               return 3;
+       else if (SourcePixelFormat == dm_420_12)
+               return 6;
+       else
+               return 4;
+}
+
+static bool allow_pixel_rate_crb(struct dc *dc, struct dc_state *context)
+{
+       int i;
+       struct resource_context *res_ctx = &context->res_ctx;
+
+       for (i = 0; i < dc->res_pool->pipe_count; i++) {
+               if (!res_ctx->pipe_ctx[i].stream)
+                       continue;
+
+               /*Don't apply if MPO to avoid transition issues*/
+               if (res_ctx->pipe_ctx[i].top_pipe && res_ctx->pipe_ctx[i].top_pipe->plane_state != res_ctx->pipe_ctx[i].plane_state)
+                       return false;
+       }
+       return true;
+}
+
 static int dcn315_populate_dml_pipes_from_context(
        struct dc *dc, struct dc_state *context,
        display_e2e_pipe_params_st *pipes,
        bool fast_validate)
 {
-       int i, pipe_cnt;
+       int i, pipe_cnt, crb_idx, crb_pipes;
        struct resource_context *res_ctx = &context->res_ctx;
        struct pipe_ctx *pipe;
        const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB;
+       int remaining_det_segs = max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB;
+       bool pixel_rate_crb = allow_pixel_rate_crb(dc, context);
 
        DC_FP_START();
        dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
        DC_FP_END();
 
-       for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
+       for (i = 0, pipe_cnt = 0, crb_pipes = 0; i < dc->res_pool->pipe_count; i++) {
                struct dc_crtc_timing *timing;
 
                if (!res_ctx->pipe_ctx[i].stream)
@@ -1672,6 +1711,23 @@ static int dcn315_populate_dml_pipes_from_context(
                pipes[pipe_cnt].dout.dsc_input_bpc = 0;
                DC_FP_START();
                dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+               if (pixel_rate_crb && !pipe->top_pipe && !pipe->prev_odm_pipe) {
+                       int bpp = source_format_to_bpp(pipes[pipe_cnt].pipe.src.source_format);
+                       /* Ceil to crb segment size */
+                       int approx_det_segs_required_for_pstate = dcn_get_approx_det_segs_required_for_pstate(
+                                       &context->bw_ctx.dml.soc, timing->pix_clk_100hz, bpp, DCN3_15_CRB_SEGMENT_SIZE_KB);
+                       if (approx_det_segs_required_for_pstate <= 2 * DCN3_15_MAX_DET_SEGS) {
+                               bool split_required = approx_det_segs_required_for_pstate > DCN3_15_MAX_DET_SEGS;
+                               split_required = split_required || timing->pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc);
+                               split_required = split_required || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120);
+                               if (split_required)
+                                       approx_det_segs_required_for_pstate += approx_det_segs_required_for_pstate % 2;
+                               pipes[pipe_cnt].pipe.src.det_size_override = approx_det_segs_required_for_pstate;
+                               remaining_det_segs -= approx_det_segs_required_for_pstate;
+                       } else
+                               remaining_det_segs = -1;
+                       crb_pipes++;
+               }
                DC_FP_END();
 
                if (pipes[pipe_cnt].dout.dsc_enable) {
@@ -1690,16 +1746,49 @@ static int dcn315_populate_dml_pipes_from_context(
                                break;
                        }
                }
-
                pipe_cnt++;
        }
 
+       /* Spread remaining unreserved crb evenly among all pipes, use default policy if not enough det or single pipe */
+       if (pixel_rate_crb) {
+               for (i = 0, pipe_cnt = 0, crb_idx = 0; i < dc->res_pool->pipe_count; i++) {
+                       pipe = &res_ctx->pipe_ctx[i];
+                       if (!pipe->stream)
+                               continue;
+
+                       if (!pipe->top_pipe && !pipe->prev_odm_pipe) {
+                               bool split_required = pipe->stream->timing.pix_clk_100hz >= dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)
+                                               || (pipe->plane_state && pipe->plane_state->src_rect.width > 5120);
+
+                               if (remaining_det_segs < 0 || crb_pipes == 1)
+                                       pipes[pipe_cnt].pipe.src.det_size_override = 0;
+                               if (remaining_det_segs > MIN_RESERVED_DET_SEGS)
+                                       pipes[pipe_cnt].pipe.src.det_size_override += (remaining_det_segs - MIN_RESERVED_DET_SEGS) / crb_pipes +
+                                                       (crb_idx < (remaining_det_segs - MIN_RESERVED_DET_SEGS) % crb_pipes ? 1 : 0);
+                               if (pipes[pipe_cnt].pipe.src.det_size_override > 2 * DCN3_15_MAX_DET_SEGS) {
+                                       /* Clamp to 2 pipe split max det segments */
+                                       remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override - 2 * (DCN3_15_MAX_DET_SEGS);
+                                       pipes[pipe_cnt].pipe.src.det_size_override = 2 * DCN3_15_MAX_DET_SEGS;
+                               }
+                               if (pipes[pipe_cnt].pipe.src.det_size_override > DCN3_15_MAX_DET_SEGS || split_required) {
+                                       /* If we are splitting we must have an even number of segments */
+                                       remaining_det_segs += pipes[pipe_cnt].pipe.src.det_size_override % 2;
+                                       pipes[pipe_cnt].pipe.src.det_size_override -= pipes[pipe_cnt].pipe.src.det_size_override % 2;
+                               }
+                               /* Convert segments into size for DML use */
+                               pipes[pipe_cnt].pipe.src.det_size_override *= DCN3_15_CRB_SEGMENT_SIZE_KB;
+                               crb_idx++;
+                       }
+                       pipe_cnt++;
+               }
+       }
+
        if (pipe_cnt)
                context->bw_ctx.dml.ip.det_buffer_size_kbytes =
                                (max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB / pipe_cnt) * DCN3_15_CRB_SEGMENT_SIZE_KB;
        if (context->bw_ctx.dml.ip.det_buffer_size_kbytes > DCN3_15_MAX_DET_SIZE)
                context->bw_ctx.dml.ip.det_buffer_size_kbytes = DCN3_15_MAX_DET_SIZE;
-       ASSERT(context->bw_ctx.dml.ip.det_buffer_size_kbytes >= DCN3_15_DEFAULT_DET_SIZE);
+
        dc->config.enable_4to1MPC = false;
        if (pipe_cnt == 1 && pipe->plane_state && !dc->debug.disable_z9_mpc) {
                if (is_dual_plane(pipe->plane_state->format)
index e48923f..0c0feec 100644 (file)
@@ -483,7 +483,7 @@ void dcn31_calculate_wm_and_dlg_fp(
                int pipe_cnt,
                int vlevel)
 {
-       int i, pipe_idx, active_hubp_count = 0;
+       int i, pipe_idx, total_det = 0, active_hubp_count = 0;
        double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
 
        dc_assert_fp_enabled();
@@ -560,6 +560,18 @@ void dcn31_calculate_wm_and_dlg_fp(
                context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
                context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
        }
+       for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+               if (!context->res_ctx.pipe_ctx[i].stream)
+                       continue;
+
+               context->res_ctx.pipe_ctx[i].det_buffer_size_kb =
+                               get_det_buffer_size_kbytes(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+               if (context->res_ctx.pipe_ctx[i].det_buffer_size_kb > 384)
+                       context->res_ctx.pipe_ctx[i].det_buffer_size_kb /= 2;
+               total_det += context->res_ctx.pipe_ctx[i].det_buffer_size_kb;
+               pipe_idx++;
+       }
+       context->bw_ctx.bw.dcn.compbuf_size_kb = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - total_det;
 }
 
 void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
@@ -812,3 +824,14 @@ int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc)
 {
        return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0);
 }
+
+int dcn_get_approx_det_segs_required_for_pstate(
+               struct _vcs_dpi_soc_bounding_box_st *soc,
+               int pix_clk_100hz, int bpp, int seg_size_kb)
+{
+       /* Roughly calculate required crb to hide latency. In practice there is slightly
+        * more buffer available for latency hiding
+        */
+       return (int)(soc->dram_clock_change_latency_us * pix_clk_100hz * bpp
+                                       / 10240000 + seg_size_kb - 1) / seg_size_kb;
+}
index ab8c48b..99518f6 100644 (file)
@@ -47,5 +47,8 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
 void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
 void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
 int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc);
+int dcn_get_approx_det_segs_required_for_pstate(
+               struct _vcs_dpi_soc_bounding_box_st *soc,
+               int pix_clk_100hz, int bpp, int seg_size_kb);
 
 #endif /* __DCN31_FPU_H__*/
index cf8f3d6..ebc04b7 100644 (file)
@@ -533,7 +533,8 @@ static void CalculateStutterEfficiency(
 static void CalculateSwathAndDETConfiguration(
                bool ForceSingleDPP,
                int NumberOfActivePlanes,
-               unsigned int DETBufferSizeInKByte,
+               bool DETSharedByAllDPP,
+               unsigned int DETBufferSizeInKByte[],
                double MaximumSwathWidthLuma[],
                double MaximumSwathWidthChroma[],
                enum scan_direction_class SourceScan[],
@@ -3116,7 +3117,7 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
                                v->SurfaceWidthC[k],
                                v->SurfaceHeightY[k],
                                v->SurfaceHeightC[k],
-                               v->DETBufferSizeInKByte[0] * 1024,
+                               v->DETBufferSizeInKByte[k] * 1024,
                                v->BlockHeight256BytesY[k],
                                v->BlockHeight256BytesC[k],
                                v->SurfaceTiling[k],
@@ -3311,7 +3312,8 @@ static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
        CalculateSwathAndDETConfiguration(
                        false,
                        v->NumberOfActivePlanes,
-                       v->DETBufferSizeInKByte[0],
+                       mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+                       v->DETBufferSizeInKByte,
                        dummy1,
                        dummy2,
                        v->SourceScan,
@@ -3777,14 +3779,16 @@ static noinline void CalculatePrefetchSchedulePerPlane(
                &v->VReadyOffsetPix[k]);
 }
 
-static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int *DETBufferSizeInKByte)
+static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[])
 {
        int i, total_pipes = 0;
        for (i = 0; i < NumberOfActivePlanes; i++)
                total_pipes += NoOfDPPThisState[i];
-       *DETBufferSizeInKByte = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
-       if (*DETBufferSizeInKByte > DCN3_15_MAX_DET_SIZE)
-               *DETBufferSizeInKByte = DCN3_15_MAX_DET_SIZE;
+       DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
+       if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE)
+               DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE;
+       for (i = 1; i < NumberOfActivePlanes; i++)
+               DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0];
 }
 
 
@@ -4024,7 +4028,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
        CalculateSwathAndDETConfiguration(
                        true,
                        v->NumberOfActivePlanes,
-                       v->DETBufferSizeInKByte[0],
+                       mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+                       v->DETBufferSizeInKByte,
                        v->MaximumSwathWidthLuma,
                        v->MaximumSwathWidthChroma,
                        v->SourceScan,
@@ -4164,6 +4169,10 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                                || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
                                        v->DISPCLK_DPPCLK_Support[i][j] = false;
                                }
+                               if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) {
+                                       v->MPCCombine[i][j][k] = true;
+                                       v->NoOfDPP[i][j][k] = 2;
+                               }
                        }
                        v->TotalNumberOfActiveDPP[i][j] = 0;
                        v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
@@ -4640,12 +4649,13 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
                                v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
                        }
 
-                       if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315)
-                               PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, &v->DETBufferSizeInKByte[0]);
+                       if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0])
+                               PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte);
                        CalculateSwathAndDETConfiguration(
                                        false,
                                        v->NumberOfActivePlanes,
-                                       v->DETBufferSizeInKByte[0],
+                                       mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
+                                       v->DETBufferSizeInKByte,
                                        v->MaximumSwathWidthLuma,
                                        v->MaximumSwathWidthChroma,
                                        v->SourceScan,
@@ -6557,7 +6567,8 @@ static void CalculateStutterEfficiency(
 static void CalculateSwathAndDETConfiguration(
                bool ForceSingleDPP,
                int NumberOfActivePlanes,
-               unsigned int DETBufferSizeInKByte,
+               bool DETSharedByAllDPP,
+               unsigned int DETBufferSizeInKByteA[],
                double MaximumSwathWidthLuma[],
                double MaximumSwathWidthChroma[],
                enum scan_direction_class SourceScan[],
@@ -6641,6 +6652,10 @@ static void CalculateSwathAndDETConfiguration(
 
        *ViewportSizeSupport = true;
        for (k = 0; k < NumberOfActivePlanes; ++k) {
+               unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k];
+
+               if (DETSharedByAllDPP && DPPPerPlane[k])
+                       DETBufferSizeInKByte /= DPPPerPlane[k];
                if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
                                || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
                        if (SurfaceTiling[k] == dm_sw_linear
index 8e6585d..1070cf8 100644 (file)
@@ -569,6 +569,10 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
                mode_lib->vba.OutputLinkDPRate[mode_lib->vba.NumberOfActivePlanes] = dout->dp_rate;
                mode_lib->vba.ODMUse[mode_lib->vba.NumberOfActivePlanes] = dst->odm_combine_policy;
                mode_lib->vba.DETSizeOverride[mode_lib->vba.NumberOfActivePlanes] = src->det_size_override;
+               if (src->det_size_override)
+                       mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = src->det_size_override;
+               else
+                       mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = ip->det_buffer_size_kbytes;
                //TODO: Need to assign correct values to dp_multistream vars
                mode_lib->vba.OutputMultistreamEn[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_en;
                mode_lib->vba.OutputMultistreamId[mode_lib->vba.NumberOfActiveSurfaces] = dout->dp_multistream_id;
@@ -783,6 +787,8 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
                                        mode_lib->vba.pipe_plane[k] =
                                                        mode_lib->vba.NumberOfActivePlanes;
                                        mode_lib->vba.DPPPerPlane[mode_lib->vba.NumberOfActivePlanes]++;
+                                       if (src_k->det_size_override)
+                                               mode_lib->vba.DETBufferSizeInKByte[mode_lib->vba.NumberOfActivePlanes] = src_k->det_size_override;
                                        if (mode_lib->vba.SourceScan[mode_lib->vba.NumberOfActivePlanes]
                                                        == dm_horz) {
                                                mode_lib->vba.ViewportWidth[mode_lib->vba.NumberOfActivePlanes] +=