From 2ecbf4e042638745749bfc966c13d72e06fa4389 Mon Sep 17 00:00:00 2001 From: jsg Date: Mon, 19 Jul 2021 10:01:10 +0000 Subject: [PATCH] drm/scheduler: Fix hang when sched_entity released From Andrey Grodzovsky 5ed8c298b2e140c640af8813a490fb4d77165e97 in linux 5.10.y/5.10.51 c61cdbdbffc169dc7f1e6fe94dfffaf574fe672a in mainline linux --- sys/dev/pci/drm/scheduler/sched_entity.c | 3 ++- sys/dev/pci/drm/scheduler/sched_main.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/sys/dev/pci/drm/scheduler/sched_entity.c b/sys/dev/pci/drm/scheduler/sched_entity.c index 23ef960b2eb..3127cba770d 100644 --- a/sys/dev/pci/drm/scheduler/sched_entity.c +++ b/sys/dev/pci/drm/scheduler/sched_entity.c @@ -113,7 +113,8 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity) rmb(); /* for list_empty to work without lock */ if (list_empty(&entity->list) || - spsc_queue_count(&entity->job_queue) == 0) + spsc_queue_count(&entity->job_queue) == 0 || + entity->stopped) return true; return false; diff --git a/sys/dev/pci/drm/scheduler/sched_main.c b/sys/dev/pci/drm/scheduler/sched_main.c index 682744c9c8b..8ebcab51606 100644 --- a/sys/dev/pci/drm/scheduler/sched_main.c +++ b/sys/dev/pci/drm/scheduler/sched_main.c @@ -895,9 +895,33 @@ EXPORT_SYMBOL(drm_sched_init); */ void drm_sched_fini(struct drm_gpu_scheduler *sched) { + struct drm_sched_entity *s_entity; + int i; + if (sched->thread) kthread_stop(sched->thread); + for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + struct drm_sched_rq *rq = &sched->sched_rq[i]; + + if (!rq) + continue; + + spin_lock(&rq->lock); + list_for_each_entry(s_entity, &rq->entities, list) + /* + * Prevents reinsertion and marks job_queue as idle, + * it will removed from rq in drm_sched_entity_fini + * eventually + */ + s_entity->stopped = true; + spin_unlock(&rq->lock); + + } + + /* Wakeup everyone stuck in drm_sched_entity_flush for this scheduler */ + wake_up_all(&sched->job_scheduled); + /* Confirm no work left behind accessing device structures */ cancel_delayed_work_sync(&sched->work_tdr); -- 2.20.1