drm/i915: improve the catch-all evict to handle lock contention
authorjsg <jsg@openbsd.org>
Mon, 9 Jan 2023 04:22:46 +0000 (04:22 +0000)
committerjsg <jsg@openbsd.org>
Mon, 9 Jan 2023 04:22:46 +0000 (04:22 +0000)
From Matthew Auld
ea62bd769994d6f18bd265cc156e6182a288c880 in linux-6.1.y/6.1.4
3f882f2d4f689627c1566c2c92087bc3ff734953 in mainline linux

sys/dev/pci/drm/i915/gem/i915_gem_execbuffer.c
sys/dev/pci/drm/i915/gem/i915_gem_mman.c
sys/dev/pci/drm/i915/i915_gem_evict.c
sys/dev/pci/drm/i915/i915_gem_evict.h
sys/dev/pci/drm/i915/i915_vma.c
sys/dev/pci/drm/i915/selftests/i915_gem_evict.c

index 0598d53..f897a9e 100644 (file)
@@ -736,32 +736,69 @@ static int eb_reserve(struct i915_execbuffer *eb)
        bool unpinned;
 
        /*
-        * Attempt to pin all of the buffers into the GTT.
-        * This is done in 2 phases:
+        * We have one more buffers that we couldn't bind, which could be due to
+        * various reasons. To resolve this we have 4 passes, with every next
+        * level turning the screws tighter:
         *
-        * 1. Unbind all objects that do not match the GTT constraints for
-        *    the execbuffer (fenceable, mappable, alignment etc).
-        * 2. Bind new objects.
+        * 0. Unbind all objects that do not match the GTT constraints for the
+        * execbuffer (fenceable, mappable, alignment etc). Bind all new
+        * objects.  This avoids unnecessary unbinding of later objects in order
+        * to make room for the earlier objects *unless* we need to defragment.
         *
-        * This avoid unnecessary unbinding of later objects in order to make
-        * room for the earlier objects *unless* we need to defragment.
+        * 1. Reorder the buffers, where objects with the most restrictive
+        * placement requirements go first (ignoring fixed location buffers for
+        * now).  For example, objects needing the mappable aperture (the first
+        * 256M of GTT), should go first vs objects that can be placed just
+        * about anywhere. Repeat the previous pass.
         *
-        * Defragmenting is skipped if all objects are pinned at a fixed location.
+        * 2. Consider buffers that are pinned at a fixed location. Also try to
+        * evict the entire VM this time, leaving only objects that we were
+        * unable to lock. Try again to bind the buffers. (still using the new
+        * buffer order).
+        *
+        * 3. We likely have object lock contention for one or more stubborn
+        * objects in the VM, for which we need to evict to make forward
+        * progress (perhaps we are fighting the shrinker?). When evicting the
+        * VM this time around, anything that we can't lock we now track using
+        * the busy_bo, using the full lock (after dropping the vm->mutex to
+        * prevent deadlocks), instead of trylock. We then continue to evict the
+        * VM, this time with the stubborn object locked, which we can now
+        * hopefully unbind (if still bound in the VM). Repeat until the VM is
+        * evicted. Finally we should be able bind everything.
         */
-       for (pass = 0; pass <= 2; pass++) {
+       for (pass = 0; pass <= 3; pass++) {
                int pin_flags = PIN_USER | PIN_VALIDATE;
 
                if (pass == 0)
                        pin_flags |= PIN_NONBLOCK;
 
                if (pass >= 1)
-                       unpinned = eb_unbind(eb, pass == 2);
+                       unpinned = eb_unbind(eb, pass >= 2);
 
                if (pass == 2) {
                        err = mutex_lock_interruptible(&eb->context->vm->mutex);
                        if (!err) {
-                               err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
+                               err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
+                               mutex_unlock(&eb->context->vm->mutex);
+                       }
+                       if (err)
+                               return err;
+               }
+
+               if (pass == 3) {
+retry:
+                       err = mutex_lock_interruptible(&eb->context->vm->mutex);
+                       if (!err) {
+                               struct drm_i915_gem_object *busy_bo = NULL;
+
+                               err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
                                mutex_unlock(&eb->context->vm->mutex);
+                               if (err && busy_bo) {
+                                       err = i915_gem_object_lock(busy_bo, &eb->ww);
+                                       i915_gem_object_put(busy_bo);
+                                       if (!err)
+                                               goto retry;
+                               }
                        }
                        if (err)
                                return err;
index fe8d0d3..3a46257 100644 (file)
@@ -404,7 +404,7 @@ retry:
                if (vma == ERR_PTR(-ENOSPC)) {
                        ret = mutex_lock_interruptible(&ggtt->vm.mutex);
                        if (!ret) {
-                               ret = i915_gem_evict_vm(&ggtt->vm, &ww);
+                               ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
                                mutex_unlock(&ggtt->vm.mutex);
                        }
                        if (ret)
@@ -725,7 +725,7 @@ retry:
                if (vma == ERR_PTR(-ENOSPC)) {
                        ret = mutex_lock_interruptible(&ggtt->vm.mutex);
                        if (!ret) {
-                               ret = i915_gem_evict_vm(&ggtt->vm, &ww);
+                               ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
                                mutex_unlock(&ggtt->vm.mutex);
                        }
                        if (ret)
index eb8b428..1b852fe 100644 (file)
@@ -416,6 +416,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  * @vm: Address space to cleanse
  * @ww: An optional struct i915_gem_ww_ctx. If not NULL, i915_gem_evict_vm
  * will be able to evict vma's locked by the ww as well.
+ * @busy_bo: Optional pointer to struct drm_i915_gem_object. If not NULL, then
+ * in the event i915_gem_evict_vm() is unable to trylock an object for eviction,
+ * then @busy_bo will point to it. -EBUSY is also returned. The caller must drop
+ * the vm->mutex, before trying again to acquire the contended lock. The caller
+ * also owns a reference to the object.
  *
  * This function evicts all vmas from a vm.
  *
@@ -425,7 +430,8 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
  * To clarify: This is for freeing up virtual address space, not for freeing
  * memory in e.g. the shrinker.
  */
-int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
+int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww,
+                     struct drm_i915_gem_object **busy_bo)
 {
        int ret = 0;
 
@@ -457,15 +463,22 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
                         * the resv is shared among multiple objects, we still
                         * need the object ref.
                         */
-                       if (dying_vma(vma) ||
+                       if (!i915_gem_object_get_rcu(vma->obj) ||
                            (ww && (dma_resv_locking_ctx(vma->obj->base.resv) == &ww->ctx))) {
                                __i915_vma_pin(vma);
                                list_add(&vma->evict_link, &locked_eviction_list);
                                continue;
                        }
 
-                       if (!i915_gem_object_trylock(vma->obj, ww))
+                       if (!i915_gem_object_trylock(vma->obj, ww)) {
+                               if (busy_bo) {
+                                       *busy_bo = vma->obj; /* holds ref */
+                                       ret = -EBUSY;
+                                       break;
+                               }
+                               i915_gem_object_put(vma->obj);
                                continue;
+                       }
 
                        __i915_vma_pin(vma);
                        list_add(&vma->evict_link, &eviction_list);
@@ -473,25 +486,29 @@ int i915_gem_evict_vm(struct i915_address_space *vm, struct i915_gem_ww_ctx *ww)
                if (list_empty(&eviction_list) && list_empty(&locked_eviction_list))
                        break;
 
-               ret = 0;
                /* Unbind locked objects first, before unlocking the eviction_list */
                list_for_each_entry_safe(vma, vn, &locked_eviction_list, evict_link) {
                        __i915_vma_unpin(vma);
 
-                       if (ret == 0)
+                       if (ret == 0) {
                                ret = __i915_vma_unbind(vma);
-                       if (ret != -EINTR) /* "Get me out of here!" */
-                               ret = 0;
+                               if (ret != -EINTR) /* "Get me out of here!" */
+                                       ret = 0;
+                       }
+                       if (!dying_vma(vma))
+                               i915_gem_object_put(vma->obj);
                }
 
                list_for_each_entry_safe(vma, vn, &eviction_list, evict_link) {
                        __i915_vma_unpin(vma);
-                       if (ret == 0)
+                       if (ret == 0) {
                                ret = __i915_vma_unbind(vma);
-                       if (ret != -EINTR) /* "Get me out of here!" */
-                               ret = 0;
+                               if (ret != -EINTR) /* "Get me out of here!" */
+                                       ret = 0;
+                       }
 
                        i915_gem_object_unlock(vma->obj);
+                       i915_gem_object_put(vma->obj);
                }
        } while (ret == 0);
 
index e593c53..bf0ee0e 100644 (file)
@@ -11,6 +11,7 @@
 struct drm_mm_node;
 struct i915_address_space;
 struct i915_gem_ww_ctx;
+struct drm_i915_gem_object;
 
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
                                          struct i915_gem_ww_ctx *ww,
@@ -23,6 +24,7 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
                                         struct drm_mm_node *node,
                                         unsigned int flags);
 int i915_gem_evict_vm(struct i915_address_space *vm,
-                     struct i915_gem_ww_ctx *ww);
+                     struct i915_gem_ww_ctx *ww,
+                     struct drm_i915_gem_object **busy_bo);
 
 #endif /* __I915_GEM_EVICT_H__ */
index df54689..45c72f2 100644 (file)
@@ -1596,7 +1596,7 @@ static int __i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
                         * locked objects when called from execbuf when pinning
                         * is removed. This would probably regress badly.
                         */
-                       i915_gem_evict_vm(vm, NULL);
+                       i915_gem_evict_vm(vm, NULL, NULL);
                        mutex_unlock(&vm->mutex);
                }
        } while (1);
index 7236bb2..c364d78 100644 (file)
@@ -344,7 +344,7 @@ static int igt_evict_vm(void *arg)
 
        /* Everything is pinned, nothing should happen */
        mutex_lock(&ggtt->vm.mutex);
-       err = i915_gem_evict_vm(&ggtt->vm, NULL);
+       err = i915_gem_evict_vm(&ggtt->vm, NULL, NULL);
        mutex_unlock(&ggtt->vm.mutex);
        if (err) {
                pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n",
@@ -356,7 +356,7 @@ static int igt_evict_vm(void *arg)
 
        for_i915_gem_ww(&ww, err, false) {
                mutex_lock(&ggtt->vm.mutex);
-               err = i915_gem_evict_vm(&ggtt->vm, &ww);
+               err = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
                mutex_unlock(&ggtt->vm.mutex);
        }