drm/i915: Fix request ref counting during error capture & debugfs dump
authorjsg <jsg@openbsd.org>
Fri, 10 Feb 2023 14:29:34 +0000 (14:29 +0000)
committerjsg <jsg@openbsd.org>
Fri, 10 Feb 2023 14:29:34 +0000 (14:29 +0000)
From John Harrison
9467397f417dd7b5d0db91452f0474e79716a527 in linux-6.1.y/6.1.11
86d8ddc74124c3fdfc139f246ba6da15e45e86e3 in mainline linux

sys/dev/pci/drm/i915/gt/intel_context.c
sys/dev/pci/drm/i915/gt/intel_context.h
sys/dev/pci/drm/i915/gt/intel_engine_cs.c
sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c
sys/dev/pci/drm/i915/i915_gpu_error.c

index d9a9d54..0c1af43 100644 (file)
@@ -545,7 +545,7 @@ retry:
        return rq;
 }
 
-struct i915_request *intel_context_find_active_request(struct intel_context *ce)
+struct i915_request *intel_context_get_active_request(struct intel_context *ce)
 {
        struct intel_context *parent = intel_context_to_parent(ce);
        struct i915_request *rq, *active = NULL;
@@ -569,6 +569,8 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce)
 
                active = rq;
        }
+       if (active)
+               active = i915_request_get_rcu(active);
        spin_unlock_irqrestore(&parent->guc_state.lock, flags);
 
        return active;
index be09fb2..4ab6c8d 100644 (file)
@@ -268,8 +268,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
 
 struct i915_request *intel_context_create_request(struct intel_context *ce);
 
-struct i915_request *
-intel_context_find_active_request(struct intel_context *ce);
+struct i915_request *intel_context_get_active_request(struct intel_context *ce);
 
 static inline bool intel_context_is_barrier(const struct intel_context *ce)
 {
index 1323eec..5ca6a82 100644 (file)
@@ -2208,9 +2208,11 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
        if (guc) {
                ce = intel_engine_get_hung_context(engine);
                if (ce)
-                       hung_rq = intel_context_find_active_request(ce);
+                       hung_rq = intel_context_get_active_request(ce);
        } else {
                hung_rq = intel_engine_execlist_find_hung_request(engine);
+               if (hung_rq)
+                       hung_rq = i915_request_get_rcu(hung_rq);
        }
 
        if (hung_rq)
@@ -2221,6 +2223,8 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
        else
                intel_engine_dump_active_requests(&engine->sched_engine->requests,
                                                  hung_rq, m);
+       if (hung_rq)
+               i915_request_put(hung_rq);
 }
 
 void intel_engine_dump(struct intel_engine_cs *engine,
index 418fe6f..3a811a6 100644 (file)
@@ -1688,7 +1688,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st
                        goto next_context;
 
                guilty = false;
-               rq = intel_context_find_active_request(ce);
+               rq = intel_context_get_active_request(ce);
                if (!rq) {
                        head = ce->ring->tail;
                        goto out_replay;
@@ -1701,6 +1701,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st
                head = intel_ring_wrap(ce->ring, rq->head);
 
                __i915_request_reset(rq, guilty);
+               i915_request_put(rq);
 out_replay:
                guc_reset_state(ce, head, guilty);
 next_context:
index c2926af..0dbc91c 100644 (file)
@@ -1654,7 +1654,7 @@ capture_engine(struct intel_engine_cs *engine,
        ce = intel_engine_get_hung_context(engine);
        if (ce) {
                intel_engine_clear_hung_context(engine);
-               rq = intel_context_find_active_request(ce);
+               rq = intel_context_get_active_request(ce);
                if (!rq || !i915_request_started(rq))
                        goto no_request_capture;
        } else {
@@ -1665,21 +1665,18 @@ capture_engine(struct intel_engine_cs *engine,
                if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
                        spin_lock_irqsave(&engine->sched_engine->lock, flags);
                        rq = intel_engine_execlist_find_hung_request(engine);
+                       if (rq)
+                               rq = i915_request_get_rcu(rq);
                        spin_unlock_irqrestore(&engine->sched_engine->lock,
                                               flags);
                }
        }
-       if (rq)
-               rq = i915_request_get_rcu(rq);
-
        if (!rq)
                goto no_request_capture;
 
        capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
-       if (!capture) {
-               i915_request_put(rq);
+       if (!capture)
                goto no_request_capture;
-       }
        if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
                intel_guc_capture_get_matching_node(engine->gt, ee, ce);
 
@@ -1689,6 +1686,8 @@ capture_engine(struct intel_engine_cs *engine,
        return ee;
 
 no_request_capture:
+       if (rq)
+               i915_request_put(rq);
        kfree(ee);
        return NULL;
 }