From 7c23aa04de8933cadcbece4e0abb7818ff440522 Mon Sep 17 00:00:00 2001 From: jsg Date: Fri, 10 Feb 2023 14:29:34 +0000 Subject: [PATCH] drm/i915: Fix request ref counting during error capture & debugfs dump From John Harrison 9467397f417dd7b5d0db91452f0474e79716a527 in linux-6.1.y/6.1.11 86d8ddc74124c3fdfc139f246ba6da15e45e86e3 in mainline linux --- sys/dev/pci/drm/i915/gt/intel_context.c | 4 +++- sys/dev/pci/drm/i915/gt/intel_context.h | 3 +-- sys/dev/pci/drm/i915/gt/intel_engine_cs.c | 6 +++++- sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c | 3 ++- sys/dev/pci/drm/i915/i915_gpu_error.c | 13 ++++++------- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/sys/dev/pci/drm/i915/gt/intel_context.c b/sys/dev/pci/drm/i915/gt/intel_context.c index d9a9d548171..0c1af43070d 100644 --- a/sys/dev/pci/drm/i915/gt/intel_context.c +++ b/sys/dev/pci/drm/i915/gt/intel_context.c @@ -545,7 +545,7 @@ retry: return rq; } -struct i915_request *intel_context_find_active_request(struct intel_context *ce) +struct i915_request *intel_context_get_active_request(struct intel_context *ce) { struct intel_context *parent = intel_context_to_parent(ce); struct i915_request *rq, *active = NULL; @@ -569,6 +569,8 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce) active = rq; } + if (active) + active = i915_request_get_rcu(active); spin_unlock_irqrestore(&parent->guc_state.lock, flags); return active; diff --git a/sys/dev/pci/drm/i915/gt/intel_context.h b/sys/dev/pci/drm/i915/gt/intel_context.h index be09fb2e883..4ab6c8ddd6e 100644 --- a/sys/dev/pci/drm/i915/gt/intel_context.h +++ b/sys/dev/pci/drm/i915/gt/intel_context.h @@ -268,8 +268,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce, struct i915_request *intel_context_create_request(struct intel_context *ce); -struct i915_request * -intel_context_find_active_request(struct intel_context *ce); +struct i915_request *intel_context_get_active_request(struct intel_context *ce); static inline bool intel_context_is_barrier(const struct intel_context *ce) { diff --git a/sys/dev/pci/drm/i915/gt/intel_engine_cs.c b/sys/dev/pci/drm/i915/gt/intel_engine_cs.c index 1323eec5713..5ca6a8214fd 100644 --- a/sys/dev/pci/drm/i915/gt/intel_engine_cs.c +++ b/sys/dev/pci/drm/i915/gt/intel_engine_cs.c @@ -2208,9 +2208,11 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d if (guc) { ce = intel_engine_get_hung_context(engine); if (ce) - hung_rq = intel_context_find_active_request(ce); + hung_rq = intel_context_get_active_request(ce); } else { hung_rq = intel_engine_execlist_find_hung_request(engine); + if (hung_rq) + hung_rq = i915_request_get_rcu(hung_rq); } if (hung_rq) @@ -2221,6 +2223,8 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d else intel_engine_dump_active_requests(&engine->sched_engine->requests, hung_rq, m); + if (hung_rq) + i915_request_put(hung_rq); } void intel_engine_dump(struct intel_engine_cs *engine, diff --git a/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c b/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c index 418fe6f8f98..3a811a65340 100644 --- a/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c +++ b/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c @@ -1688,7 +1688,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st goto next_context; guilty = false; - rq = intel_context_find_active_request(ce); + rq = intel_context_get_active_request(ce); if (!rq) { head = ce->ring->tail; goto out_replay; @@ -1701,6 +1701,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st head = intel_ring_wrap(ce->ring, rq->head); __i915_request_reset(rq, guilty); + i915_request_put(rq); out_replay: guc_reset_state(ce, head, guilty); next_context: diff --git a/sys/dev/pci/drm/i915/i915_gpu_error.c b/sys/dev/pci/drm/i915/i915_gpu_error.c index c2926afa051..0dbc91c37ef 100644 --- a/sys/dev/pci/drm/i915/i915_gpu_error.c +++ b/sys/dev/pci/drm/i915/i915_gpu_error.c @@ -1654,7 +1654,7 @@ capture_engine(struct intel_engine_cs *engine, ce = intel_engine_get_hung_context(engine); if (ce) { intel_engine_clear_hung_context(engine); - rq = intel_context_find_active_request(ce); + rq = intel_context_get_active_request(ce); if (!rq || !i915_request_started(rq)) goto no_request_capture; } else { @@ -1665,21 +1665,18 @@ capture_engine(struct intel_engine_cs *engine, if (!intel_uc_uses_guc_submission(&engine->gt->uc)) { spin_lock_irqsave(&engine->sched_engine->lock, flags); rq = intel_engine_execlist_find_hung_request(engine); + if (rq) + rq = i915_request_get_rcu(rq); spin_unlock_irqrestore(&engine->sched_engine->lock, flags); } } - if (rq) - rq = i915_request_get_rcu(rq); - if (!rq) goto no_request_capture; capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); - if (!capture) { - i915_request_put(rq); + if (!capture) goto no_request_capture; - } if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) intel_guc_capture_get_matching_node(engine->gt, ee, ce); @@ -1689,6 +1686,8 @@ capture_engine(struct intel_engine_cs *engine, return ee; no_request_capture: + if (rq) + i915_request_put(rq); kfree(ee); return NULL; } -- 2.20.1