drm/i915: Fix up locking around dumping requests lists
authorjsg <jsg@openbsd.org>
Fri, 10 Feb 2023 14:32:03 +0000 (14:32 +0000)
committerjsg <jsg@openbsd.org>
Fri, 10 Feb 2023 14:32:03 +0000 (14:32 +0000)
From John Harrison
04dcff26490cc8dedbfcf44cfb3e3e7a08622fd0 in linux-6.1.y/6.1.11
5bc4b43d5c6c9692ddc7b96116650cdf9406f3da in mainline linux

sys/dev/pci/drm/i915/gt/intel_engine.h
sys/dev/pci/drm/i915/gt/intel_engine_cs.c
sys/dev/pci/drm/i915/gt/intel_execlists_submission.c
sys/dev/pci/drm/i915/gt/intel_execlists_submission.h
sys/dev/pci/drm/i915/i915_gpu_error.c

index cbc8b85..7a4504e 100644 (file)
@@ -248,8 +248,8 @@ void intel_engine_dump_active_requests(struct list_head *requests,
 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
                                   ktime_t *now);
 
-struct i915_request *
-intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine);
+void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
+                                 struct intel_context **ce, struct i915_request **rq);
 
 u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
 struct intel_context *
index 5ca6a82..c8ae868 100644 (file)
@@ -2085,17 +2085,6 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
        }
 }
 
-static unsigned long list_count(struct list_head *list)
-{
-       struct list_head *pos;
-       unsigned long count = 0;
-
-       list_for_each(pos, list)
-               count++;
-
-       return count;
-}
-
 static unsigned long read_ul(void *p, size_t x)
 {
        return *(unsigned long *)(p + x);
@@ -2187,11 +2176,11 @@ void intel_engine_dump_active_requests(struct list_head *requests,
        }
 }
 
-static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m)
+static void engine_dump_active_requests(struct intel_engine_cs *engine,
+                                       struct drm_printer *m)
 {
+       struct intel_context *hung_ce = NULL;
        struct i915_request *hung_rq = NULL;
-       struct intel_context *ce;
-       bool guc;
 
        /*
         * No need for an engine->irq_seqno_barrier() before the seqno reads.
@@ -2200,29 +2189,20 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
         * But the intention here is just to report an instantaneous snapshot
         * so that's fine.
         */
-       lockdep_assert_held(&engine->sched_engine->lock);
+       intel_engine_get_hung_entity(engine, &hung_ce, &hung_rq);
 
        drm_printf(m, "\tRequests:\n");
 
-       guc = intel_uc_uses_guc_submission(&engine->gt->uc);
-       if (guc) {
-               ce = intel_engine_get_hung_context(engine);
-               if (ce)
-                       hung_rq = intel_context_get_active_request(ce);
-       } else {
-               hung_rq = intel_engine_execlist_find_hung_request(engine);
-               if (hung_rq)
-                       hung_rq = i915_request_get_rcu(hung_rq);
-       }
-
        if (hung_rq)
                engine_dump_request(hung_rq, m, "\t\thung");
+       else if (hung_ce)
+               drm_printf(m, "\t\tGot hung ce but no hung rq!\n");
 
-       if (guc)
+       if (intel_uc_uses_guc_submission(&engine->gt->uc))
                intel_guc_dump_active_requests(engine, hung_rq, m);
        else
-               intel_engine_dump_active_requests(&engine->sched_engine->requests,
-                                                 hung_rq, m);
+               intel_execlists_dump_active_requests(engine, hung_rq, m);
+
        if (hung_rq)
                i915_request_put(hung_rq);
 }
@@ -2234,7 +2214,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
        struct i915_gpu_error * const error = &engine->i915->gpu_error;
        struct i915_request *rq;
        intel_wakeref_t wakeref;
-       unsigned long flags;
        ktime_t dummy;
 
        if (header) {
@@ -2271,13 +2250,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
                   i915_reset_count(error));
        print_properties(engine, m);
 
-       spin_lock_irqsave(&engine->sched_engine->lock, flags);
        engine_dump_active_requests(engine, m);
 
-       drm_printf(m, "\tOn hold?: %lu\n",
-                  list_count(&engine->sched_engine->hold));
-       spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
-
        drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
        wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
        if (wakeref) {
@@ -2323,8 +2297,7 @@ intel_engine_create_virtual(struct intel_engine_cs **siblings,
        return siblings[0]->cops->create_virtual(siblings, count, flags);
 }
 
-struct i915_request *
-intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
+static struct i915_request *engine_execlist_find_hung_request(struct intel_engine_cs *engine)
 {
        struct i915_request *request, *active = NULL;
 
@@ -2376,6 +2349,33 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
        return active;
 }
 
+void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
+                                 struct intel_context **ce, struct i915_request **rq)
+{
+       unsigned long flags;
+
+       *ce = intel_engine_get_hung_context(engine);
+       if (*ce) {
+               intel_engine_clear_hung_context(engine);
+
+               *rq = intel_context_get_active_request(*ce);
+               return;
+       }
+
+       /*
+        * Getting here with GuC enabled means it is a forced error capture
+        * with no actual hang. So, no need to attempt the execlist search.
+        */
+       if (intel_uc_uses_guc_submission(&engine->gt->uc))
+               return;
+
+       spin_lock_irqsave(&engine->sched_engine->lock, flags);
+       *rq = engine_execlist_find_hung_request(engine);
+       if (*rq)
+               *rq = i915_request_get_rcu(*rq);
+       spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+}
+
 void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
 {
        /*
index 18116f1..682f9a6 100644 (file)
@@ -4156,6 +4156,33 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
        spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
+static unsigned long list_count(struct list_head *list)
+{
+       struct list_head *pos;
+       unsigned long count = 0;
+
+       list_for_each(pos, list)
+               count++;
+
+       return count;
+}
+
+void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
+                                         struct i915_request *hung_rq,
+                                         struct drm_printer *m)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&engine->sched_engine->lock, flags);
+
+       intel_engine_dump_active_requests(&engine->sched_engine->requests, hung_rq, m);
+
+       drm_printf(m, "\tOn hold?: %lu\n",
+                  list_count(&engine->sched_engine->hold));
+
+       spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftest_execlists.c"
 #endif
index a1aa92c..d2c7d45 100644 (file)
@@ -32,6 +32,10 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
                                                        int indent),
                                   unsigned int max);
 
+void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
+                                         struct i915_request *hung_rq,
+                                         struct drm_printer *m);
+
 bool
 intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
 
index 0dbc91c..b11a4df 100644 (file)
@@ -1643,35 +1643,15 @@ capture_engine(struct intel_engine_cs *engine,
 {
        struct intel_engine_capture_vma *capture = NULL;
        struct intel_engine_coredump *ee;
-       struct intel_context *ce;
+       struct intel_context *ce = NULL;
        struct i915_request *rq = NULL;
-       unsigned long flags;
 
        ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags);
        if (!ee)
                return NULL;
 
-       ce = intel_engine_get_hung_context(engine);
-       if (ce) {
-               intel_engine_clear_hung_context(engine);
-               rq = intel_context_get_active_request(ce);
-               if (!rq || !i915_request_started(rq))
-                       goto no_request_capture;
-       } else {
-               /*
-                * Getting here with GuC enabled means it is a forced error capture
-                * with no actual hang. So, no need to attempt the execlist search.
-                */
-               if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
-                       spin_lock_irqsave(&engine->sched_engine->lock, flags);
-                       rq = intel_engine_execlist_find_hung_request(engine);
-                       if (rq)
-                               rq = i915_request_get_rcu(rq);
-                       spin_unlock_irqrestore(&engine->sched_engine->lock,
-                                              flags);
-               }
-       }
-       if (!rq)
+       intel_engine_get_hung_entity(engine, &ce, &rq);
+       if (!rq || !i915_request_started(rq))
                goto no_request_capture;
 
        capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);