From b5b36f0ff309f84c94d496dda0350e7c0ae91391 Mon Sep 17 00:00:00 2001 From: mpi Date: Fri, 24 Feb 2023 15:17:48 +0000 Subject: [PATCH] Do not held the vm_map lock while flushing pages in msync(2) and madvise(2). Mark the VM map as busy instead to prevent any sibling thread to request an exclusive version of the vm_map. This is necessary to let any PG_BUSY page, found in the UVM vnode object, to be released by a sibling in the middle of a page-fault. Note: the page-fault handler releases & re-grab a shared version of the vm_map lock and expect it to be available to make progress. Prevent a 3-Threads deadlock between msync(2), page-fault and mmap(2). The deadlock reported on bugs@ by many occured as follow: ..ThreadA faults & grabs the shared `vmmaplk' then release it before calling ..uvn_get() which might sleep to allocate pages and mark them as PG_BUSY. ..Once the lock is released, threadB calls uvn_flush(). It sees at least a ..PG_BUSY page and sleeps on the `vmmaplk' waiting for threadA to un-busy ..the page. ..At the same time threadC asked for an exclusive version of the lock and ..sleeps until all reader are done with it. This prevents threadA to ..acquire a shared-version of the lock and finish the page fault. This issue is similar to NetBSD's PR #56952 and the fix is from Chuck Silvers. Tested by many on bugs@, thanks! ok kettenis@ --- sys/uvm/uvm_map.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index 71fb9feaeed..cde8d16cee6 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_map.c,v 1.312 2023/02/13 14:52:55 deraadt Exp $ */ +/* $OpenBSD: uvm_map.c,v 1.313 2023/02/24 15:17:48 mpi Exp $ */ /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */ /* @@ -4569,8 +4569,7 @@ fail: * => never a need to flush amap layer since the anonymous memory has * no permanent home, but may deactivate pages there * => called from sys_msync() and sys_madvise() - * => caller must not write-lock map (read OK). - * => we may sleep while cleaning if SYNCIO [with map read-locked] + * => caller must not have map locked */ int @@ -4592,25 +4591,27 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags) if (start > end || start < map->min_offset || end > map->max_offset) return EINVAL; - vm_map_lock_read(map); + vm_map_lock(map); first = uvm_map_entrybyaddr(&map->addr, start); /* Make a first pass to check for holes. */ for (entry = first; entry != NULL && entry->start < end; entry = RBT_NEXT(uvm_map_addr, entry)) { if (UVM_ET_ISSUBMAP(entry)) { - vm_map_unlock_read(map); + vm_map_unlock(map); return EINVAL; } if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISHOLE(entry) || (entry->end < end && VMMAP_FREE_END(entry) != entry->end)) { - vm_map_unlock_read(map); + vm_map_unlock(map); return EFAULT; } } + vm_map_busy(map); + vm_map_unlock(map); error = 0; for (entry = first; entry != NULL && entry->start < end; entry = RBT_NEXT(uvm_map_addr, entry)) { @@ -4722,7 +4723,7 @@ flush_object: } } - vm_map_unlock_read(map); + vm_map_unbusy(map); return error; } -- 2.20.1