drm/i915: Flush TLBs before releasing backing store
authorjsg <jsg@openbsd.org>
Sat, 29 Jan 2022 12:37:08 +0000 (12:37 +0000)
committerjsg <jsg@openbsd.org>
Sat, 29 Jan 2022 12:37:08 +0000 (12:37 +0000)
From Tvrtko Ursulin
8a17a077e7e9ecce25c95dbdb27843d2d6c2f0f7 in linux 5.15.y/5.15.18
7938d61591d33394a21bdd7797a245b65428f44c in mainline linux

sys/dev/pci/drm/i915/gem/i915_gem_object_types.h
sys/dev/pci/drm/i915/gem/i915_gem_pages.c
sys/dev/pci/drm/i915/gt/intel_gt.c
sys/dev/pci/drm/i915/gt/intel_gt.h
sys/dev/pci/drm/i915/gt/intel_gt_types.h
sys/dev/pci/drm/i915/i915_reg.h
sys/dev/pci/drm/i915/i915_vma.c
sys/dev/pci/drm/i915/intel_uncore.c
sys/dev/pci/drm/i915/intel_uncore.h

index 913b6db..182e4fa 100644 (file)
@@ -298,6 +298,7 @@ struct drm_i915_gem_object {
                             I915_BO_ALLOC_USER)
 #define I915_BO_READONLY         BIT(4)
 #define I915_TILING_QUIRK_BIT    5 /* unknown swizzling; do not release! */
+#define I915_BO_WAS_BOUND_BIT    6
 
        /**
         * @mem_flags - Mutable placement-related flags
index 1a10d9a..c3eda20 100644 (file)
@@ -10,6 +10,8 @@
 #include "i915_gem_lmem.h"
 #include "i915_gem_mman.h"
 
+#include "gt/intel_gt.h"
+
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
                                 struct sg_table *pages,
                                 unsigned int sg_page_sizes)
@@ -218,6 +220,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
        __i915_gem_object_reset_page_iter(obj);
        obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
 
+       if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+               struct drm_i915_private *i915 = to_i915(obj->base.dev);
+               intel_wakeref_t wakeref;
+
+               with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
+                       intel_gt_invalidate_tlbs(&i915->gt);
+       }
+
        return pages;
 }
 
index dd9e779..6411b01 100644 (file)
@@ -29,6 +29,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 
        mtx_init(&gt->irq_lock, IPL_TTY);
 
+       rw_init(&gt->tlb_invalidate_lock, "itlbinv");
+
        INIT_LIST_HEAD(&gt->closed_vma);
        mtx_init(&gt->closed_lock, IPL_TTY);
 
@@ -899,3 +901,103 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 
        intel_sseu_dump(&info->sseu, p);
 }
+
+struct reg_and_bit {
+       i915_reg_t reg;
+       u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
+               const i915_reg_t *regs, const unsigned int num)
+{
+       const unsigned int class = engine->class;
+       struct reg_and_bit rb = { };
+
+       if (drm_WARN_ON_ONCE(&engine->i915->drm,
+                            class >= num || !regs[class].reg))
+               return rb;
+
+       rb.reg = regs[class];
+       if (gen8 && class == VIDEO_DECODE_CLASS)
+               rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+       else
+               rb.bit = engine->instance;
+
+       rb.bit = BIT(rb.bit);
+
+       return rb;
+}
+
+void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+{
+       static const i915_reg_t gen8_regs[] = {
+               [RENDER_CLASS]                  = GEN8_RTCR,
+               [VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
+               [VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
+               [COPY_ENGINE_CLASS]             = GEN8_BTCR,
+       };
+       static const i915_reg_t gen12_regs[] = {
+               [RENDER_CLASS]                  = GEN12_GFX_TLB_INV_CR,
+               [VIDEO_DECODE_CLASS]            = GEN12_VD_TLB_INV_CR,
+               [VIDEO_ENHANCEMENT_CLASS]       = GEN12_VE_TLB_INV_CR,
+               [COPY_ENGINE_CLASS]             = GEN12_BLT_TLB_INV_CR,
+       };
+       struct drm_i915_private *i915 = gt->i915;
+       struct intel_uncore *uncore = gt->uncore;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       const i915_reg_t *regs;
+       unsigned int num = 0;
+
+       if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+               return;
+
+       if (GRAPHICS_VER(i915) == 12) {
+               regs = gen12_regs;
+               num = ARRAY_SIZE(gen12_regs);
+       } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
+               regs = gen8_regs;
+               num = ARRAY_SIZE(gen8_regs);
+       } else if (GRAPHICS_VER(i915) < 8) {
+               return;
+       }
+
+       if (drm_WARN_ONCE(&i915->drm, !num,
+                         "Platform does not implement TLB invalidation!"))
+               return;
+
+       GEM_TRACE("\n");
+
+       assert_rpm_wakelock_held(&i915->runtime_pm);
+
+       mutex_lock(&gt->tlb_invalidate_lock);
+       intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+       for_each_engine(engine, gt, id) {
+               /*
+                * HW architecture suggest typical invalidation time at 40us,
+                * with pessimistic cases up to 100us and a recommendation to
+                * cap at 1ms. We go a bit higher just in case.
+                */
+               const unsigned int timeout_us = 100;
+               const unsigned int timeout_ms = 4;
+               struct reg_and_bit rb;
+
+               rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+               if (!i915_mmio_reg_offset(rb.reg))
+                       continue;
+
+               intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+               if (__intel_wait_for_register_fw(uncore,
+                                                rb.reg, rb.bit, 0,
+                                                timeout_us, timeout_ms,
+                                                NULL))
+                       drm_err_ratelimited(&gt->i915->drm,
+                                           "%s TLB invalidation did not complete in %ums!\n",
+                                           engine->name, timeout_ms);
+       }
+
+       intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
+       mutex_unlock(&gt->tlb_invalidate_lock);
+}
index 74e7718..c0169d6 100644 (file)
@@ -90,4 +90,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,
 
 void intel_gt_watchdog_work(struct work_struct *work);
 
+void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+
 #endif /* __INTEL_GT_H__ */
index a81e21b..02c241b 100644 (file)
@@ -72,6 +72,8 @@ struct intel_gt {
 
        struct intel_uc uc;
 
+       struct rwlock tlb_invalidate_lock;
+
        struct intel_gt_timelines {
                spinlock_t lock; /* protects active_list */
                struct list_head active_list;
index 9023d4e..c65473f 100644 (file)
@@ -2669,6 +2669,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING     (1 << 28)
 #define   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT       (1 << 24)
 
+#define GEN8_RTCR      _MMIO(0x4260)
+#define GEN8_M1TCR     _MMIO(0x4264)
+#define GEN8_M2TCR     _MMIO(0x4268)
+#define GEN8_BTCR      _MMIO(0x426c)
+#define GEN8_VTCR      _MMIO(0x4270)
+
 #if 0
 #define PRB0_TAIL      _MMIO(0x2030)
 #define PRB0_HEAD      _MMIO(0x2034)
@@ -2763,6 +2769,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define   FAULT_VA_HIGH_BITS           (0xf << 0)
 #define   FAULT_GTT_SEL                        (1 << 4)
 
+#define GEN12_GFX_TLB_INV_CR   _MMIO(0xced8)
+#define GEN12_VD_TLB_INV_CR    _MMIO(0xcedc)
+#define GEN12_VE_TLB_INV_CR    _MMIO(0xcee0)
+#define GEN12_BLT_TLB_INV_CR   _MMIO(0xcee4)
+
 #define GEN12_AUX_ERR_DBG              _MMIO(0x43f4)
 
 #define FPGA_DBG               _MMIO(0x42300)
index 0d457a6..8a20291 100644 (file)
@@ -446,6 +446,9 @@ int i915_vma_bind(struct i915_vma *vma,
                vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
        }
 
+       if (vma->obj)
+               set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
        atomic_or(bind_flags, &vma->flags);
        return 0;
 }
index 4e83bd9..f3690ab 100644 (file)
@@ -751,7 +751,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore,
 }
 
 static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
-                                        enum forcewake_domains fw_domains)
+                                        enum forcewake_domains fw_domains,
+                                        bool delayed)
 {
        struct intel_uncore_forcewake_domain *domain;
        unsigned int tmp;
@@ -766,7 +767,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore,
                        continue;
                }
 
-               uncore->funcs.force_wake_put(uncore, domain->mask);
+               if (delayed &&
+                   !(domain->uncore->fw_domains_timer & domain->mask))
+                       fw_domain_arm_timer(domain);
+               else
+                       uncore->funcs.force_wake_put(uncore, domain->mask);
        }
 }
 
@@ -787,7 +792,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
                return;
 
        spin_lock_irqsave(&uncore->lock, irqflags);
-       __intel_uncore_forcewake_put(uncore, fw_domains);
+       __intel_uncore_forcewake_put(uncore, fw_domains, false);
+       spin_unlock_irqrestore(&uncore->lock, irqflags);
+}
+
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+                                       enum forcewake_domains fw_domains)
+{
+       unsigned long irqflags;
+
+       if (!uncore->funcs.force_wake_put)
+               return;
+
+       spin_lock_irqsave(&uncore->lock, irqflags);
+       __intel_uncore_forcewake_put(uncore, fw_domains, true);
        spin_unlock_irqrestore(&uncore->lock, irqflags);
 }
 
@@ -829,7 +847,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore,
        if (!uncore->funcs.force_wake_put)
                return;
 
-       __intel_uncore_forcewake_put(uncore, fw_domains);
+       __intel_uncore_forcewake_put(uncore, fw_domains, false);
 }
 
 void assert_forcewakes_inactive(struct intel_uncore *uncore)
index f4a18ac..9e46fff 100644 (file)
@@ -229,6 +229,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
                                enum forcewake_domains domains);
 void intel_uncore_forcewake_put(struct intel_uncore *uncore,
                                enum forcewake_domains domains);
+void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore,
+                                       enum forcewake_domains domains);
 void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
                                  enum forcewake_domains fw_domains);