From fc6d48fd3b921041134581a7d748516fe2a9e57f Mon Sep 17 00:00:00 2001 From: krw Date: Tue, 4 Jun 2024 20:31:35 +0000 Subject: [PATCH] Enable hibernate/resume to nvme(4) disks with 4096 byte sectors. testing by florian@ mglocker@ mlarkin@ ok deraadt@ mglocker@ mlarkin@ --- sys/arch/amd64/stand/efiboot/diskprobe.c | 13 +-- sys/arch/amd64/stand/libsa/diskprobe.c | 14 +-- sys/arch/i386/stand/libsa/diskprobe.c | 13 +-- sys/dev/ic/nvme.c | 15 +++- sys/kern/subr_hibernate.c | 105 ++++++++++++----------- sys/sys/hibernate.h | 7 +- 6 files changed, 93 insertions(+), 74 deletions(-) diff --git a/sys/arch/amd64/stand/efiboot/diskprobe.c b/sys/arch/amd64/stand/efiboot/diskprobe.c index 48f089c296f..5522d10cdbd 100644 --- a/sys/arch/amd64/stand/efiboot/diskprobe.c +++ b/sys/arch/amd64/stand/efiboot/diskprobe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: diskprobe.c,v 1.2 2020/12/09 18:10:18 krw Exp $ */ +/* $OpenBSD: diskprobe.c,v 1.3 2024/06/04 20:31:35 krw Exp $ */ /* * Copyright (c) 1997 Tobias Weingartner @@ -282,9 +282,10 @@ bootdev_has_hibernate(void) void check_hibernate(struct diskinfo *dip) { + uint8_t buf[DEV_BSIZE]; daddr_t sec; int error; - union hibernate_info hib; + union hibernate_info *hib = (union hibernate_info *)&buf; /* read hibernate */ if (dip->disklabel.d_partitions[1].p_fstype != FS_SWAP || @@ -292,10 +293,10 @@ check_hibernate(struct diskinfo *dip) return; sec = DL_GETPOFFSET(&dip->disklabel.d_partitions[1]) + - DL_GETPSIZE(&dip->disklabel.d_partitions[1]) - - (sizeof(union hibernate_info) / DEV_BSIZE); + DL_GETPSIZE(&dip->disklabel.d_partitions[1]) - 1; - error = dip->strategy(dip, F_READ, sec, sizeof hib, &hib, NULL); - if (error == 0 && hib.magic == HIBERNATE_MAGIC) + error = dip->strategy(dip, F_READ, DL_SECTOBLK(&dip->disklabel, sec), + sizeof buf, &buf, NULL); + if (error == 0 && hib->magic == HIBERNATE_MAGIC) dip->bios_info.flags |= BDI_HIBVALID; /* Hibernate present */ } diff --git a/sys/arch/amd64/stand/libsa/diskprobe.c b/sys/arch/amd64/stand/libsa/diskprobe.c index 5b15988bf54..66e068b3196 100644 --- a/sys/arch/amd64/stand/libsa/diskprobe.c +++ b/sys/arch/amd64/stand/libsa/diskprobe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: diskprobe.c,v 1.27 2022/09/01 13:45:26 krw Exp $ */ +/* $OpenBSD: diskprobe.c,v 1.28 2024/06/04 20:31:35 krw Exp $ */ /* * Copyright (c) 1997 Tobias Weingartner @@ -390,9 +390,10 @@ bootdev_has_hibernate(void) void check_hibernate(struct diskinfo *dip) { + uint8_t buf[DEV_BSIZE]; daddr_t sec; int error; - union hibernate_info hib; + union hibernate_info *hib = (union hibernate_info *)&buf; /* read hibernate */ if (dip->disklabel.d_partitions[1].p_fstype != FS_SWAP || @@ -400,11 +401,10 @@ check_hibernate(struct diskinfo *dip) return; sec = DL_GETPOFFSET(&dip->disklabel.d_partitions[1]) + - DL_GETPSIZE(&dip->disklabel.d_partitions[1]) - - (sizeof(union hibernate_info) / DEV_BSIZE); + DL_GETPSIZE(&dip->disklabel.d_partitions[1]) - 1; - error = dip->strategy(dip, F_READ, sec, sizeof hib, &hib, - NULL); - if (error == 0 && hib.magic == HIBERNATE_MAGIC) + error = dip->strategy(dip, F_READ, DL_SECTOBLK(&dip->disklabel, sec), + sizeof buf, &buf, NULL); + if (error == 0 && hib->magic == HIBERNATE_MAGIC) dip->bios_info.flags |= BDI_HIBVALID; /* Hibernate present */ } diff --git a/sys/arch/i386/stand/libsa/diskprobe.c b/sys/arch/i386/stand/libsa/diskprobe.c index d2876cb8d9a..b309a5c0ab4 100644 --- a/sys/arch/i386/stand/libsa/diskprobe.c +++ b/sys/arch/i386/stand/libsa/diskprobe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: diskprobe.c,v 1.48 2022/09/01 13:45:26 krw Exp $ */ +/* $OpenBSD: diskprobe.c,v 1.49 2024/06/04 20:31:35 krw Exp $ */ /* * Copyright (c) 1997 Tobias Weingartner @@ -467,9 +467,10 @@ bootdev_has_hibernate(void) void check_hibernate(struct diskinfo *dip) { + uint8_t buf[DEV_BSIZE]; daddr_t sec; int error; - union hibernate_info hib; + union hibernate_info *hib = (union hibernate_info *)&buf; /* read hibernate */ if (dip->disklabel.d_partitions[1].p_fstype != FS_SWAP || @@ -477,10 +478,10 @@ check_hibernate(struct diskinfo *dip) return; sec = DL_GETPOFFSET(&dip->disklabel.d_partitions[1]) + - DL_GETPSIZE(&dip->disklabel.d_partitions[1]) - - (sizeof(union hibernate_info) / DEV_BSIZE); + DL_GETPSIZE(&dip->disklabel.d_partitions[1]) - 1; - error = dip->strategy(dip, F_READ, sec, sizeof hib, &hib, NULL); - if (error == 0 && hib.magic == HIBERNATE_MAGIC) + error = dip->strategy(dip, F_READ, DL_SECTOBLK(&dip->disklabel, sec), + sizeof buf, &buf, NULL); + if (error == 0 && hib->magic == HIBERNATE_MAGIC) dip->bios_info.flags |= BDI_HIBVALID; /* Hibernate present */ } diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c index 4f0f2a3362c..fe8764b0323 100644 --- a/sys/dev/ic/nvme.c +++ b/sys/dev/ic/nvme.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nvme.c,v 1.116 2024/06/03 12:01:57 mglocker Exp $ */ +/* $OpenBSD: nvme.c,v 1.117 2024/06/04 20:31:35 krw Exp $ */ /* * Copyright (c) 2014 David Gwynne @@ -1695,6 +1695,7 @@ nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, daddr_t poffset; size_t psize; + u_int32_t secsize; } *my = page; struct nvme_sqe_io *isqe; struct nvme_cqe *icqe; @@ -1707,6 +1708,8 @@ nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, if (op == HIB_INIT) { struct device *disk; struct device *scsibus; + struct nvm_identify_namespace *ns; + struct nvm_namespace_format *f; extern struct cfdriver sd_cd; struct scsi_link *link; struct scsibus_softc *bus_sc; @@ -1729,9 +1732,12 @@ nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, } if (my->nsid == 0) return (EIO); + ns = my->sc->sc_namespaces[my->nsid].ident; + f = &ns->lbaf[NVME_ID_NS_FLBAS(ns->flbas)]; my->poffset = blkno; my->psize = size; + my->secsize = 1 << f->lbads; memset(NVME_DMA_KVA(my->sc->sc_hib_q->q_cq_dmamem), 0, my->sc->sc_hib_q->q_entries * sizeof(struct nvme_cqe)); @@ -1771,6 +1777,9 @@ nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, if (op != HIB_W) return (0); + if (blkno + (size / DEV_BSIZE) > my->psize) + return E2BIG; + isqe = NVME_DMA_KVA(my->sc->sc_hib_q->q_sq_dmamem); isqe += my->sq_tail; if (++my->sq_tail == my->sc->sc_hib_q->q_entries) @@ -1796,8 +1805,8 @@ nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, } } - isqe->slba = blkno + my->poffset; - isqe->nlb = (size / DEV_BSIZE) - 1; + isqe->slba = (blkno + my->poffset) / (my->secsize / DEV_BSIZE); + isqe->nlb = (size / my->secsize) - 1; isqe->cid = blkno % 0xffff; nvme_write4(my->sc, NVME_SQTDBL(NVME_HIB_Q, my->sc->sc_dstrd), diff --git a/sys/kern/subr_hibernate.c b/sys/kern/subr_hibernate.c index 2fe5eb2cbfc..08f641dad82 100644 --- a/sys/kern/subr_hibernate.c +++ b/sys/kern/subr_hibernate.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_hibernate.c,v 1.139 2024/04/30 17:12:19 krw Exp $ */ +/* $OpenBSD: subr_hibernate.c,v 1.140 2024/06/04 20:31:35 krw Exp $ */ /* * Copyright (c) 2011 Ariane van der Steldt @@ -36,7 +36,7 @@ #include /* Make sure the signature can fit in one block */ -CTASSERT(sizeof(union hibernate_info) <= DEV_BSIZE); +CTASSERT((offsetof(union hibernate_info, sec_size) + sizeof(u_int32_t)) <= DEV_BSIZE); /* * Hibernate piglet layout information @@ -98,6 +98,8 @@ int hib_debug = 99; #define DNPRINTF(n,x...) #endif +#define ROUNDUP(_x, _y) ((((_x)+(_y)-1)/(_y))*(_y)) + #ifndef NO_PROPOLICE extern long __guard_local; #endif /* ! NO_PROPOLICE */ @@ -592,6 +594,7 @@ get_hibernate_info(union hibernate_info *hib, int suspend) /* Make sure we have a swap partition. */ part = DISKPART(hib->dev); if (dl.d_npartitions <= part || + dl.d_secsize > sizeof(union hibernate_info) || dl.d_partitions[part].p_fstype != FS_SWAP || DL_GETPSIZE(&dl.d_partitions[part]) == 0) return (1); @@ -600,8 +603,9 @@ get_hibernate_info(union hibernate_info *hib, int suspend) hib->magic = HIBERNATE_MAGIC; /* Calculate signature block location */ - hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - - sizeof(union hibernate_info)/DEV_BSIZE; + hib->sec_size = dl.d_secsize; + hib->sig_offset = DL_GETPSIZE(&dl.d_partitions[part]) - 1; + hib->sig_offset = DL_SECTOBLK(&dl, hib->sig_offset); SHA256Init(&ctx); SHA256Update(&ctx, version, strlen(version)); @@ -629,8 +633,10 @@ get_hibernate_info(union hibernate_info *hib, int suspend) * a matching HIB_DONE call performed after the write is * completed. */ - if (hib->io_func(hib->dev, DL_GETPOFFSET(&dl.d_partitions[part]), - (vaddr_t)NULL, DL_GETPSIZE(&dl.d_partitions[part]), + if (hib->io_func(hib->dev, + DL_SECTOBLK(&dl, DL_GETPOFFSET(&dl.d_partitions[part])), + (vaddr_t)NULL, + DL_SECTOBLK(&dl, DL_GETPSIZE(&dl.d_partitions[part])), HIB_INIT, hib->io_page)) goto fail; @@ -877,9 +883,12 @@ hibernate_deflate(union hibernate_info *hib, paddr_t src, int hibernate_write_signature(union hibernate_info *hib) { + memset(&disk_hib, 0, hib->sec_size); + memcpy(&disk_hib, hib, DEV_BSIZE); + /* Write hibernate info to disk */ return (hib->io_func(hib->dev, hib->sig_offset, - (vaddr_t)hib, DEV_BSIZE, HIB_W, + (vaddr_t)&disk_hib, hib->sec_size, HIB_W, hib->io_page)); } @@ -921,19 +930,21 @@ hibernate_write_chunktable(union hibernate_info *hib) int hibernate_clear_signature(union hibernate_info *hib) { - union hibernate_info blank_hiber_info; + uint8_t buf[DEV_BSIZE]; /* Zero out a blank hiber_info */ - memset(&blank_hiber_info, 0, sizeof(union hibernate_info)); + memcpy(&buf, &disk_hib, sizeof(buf)); + memset(&disk_hib, 0, hib->sec_size); /* Write (zeroed) hibernate info to disk */ DPRINTF("clearing hibernate signature block location: %lld\n", hib->sig_offset); if (hibernate_block_io(hib, hib->sig_offset, - DEV_BSIZE, (vaddr_t)&blank_hiber_info, 1)) + hib->sec_size, (vaddr_t)&disk_hib, 1)) printf("Warning: could not clear hibernate signature\n"); + memcpy(&disk_hib, buf, sizeof(buf)); return (0); } @@ -1110,7 +1121,8 @@ hibernate_reprotect_ssp(vaddr_t va) void hibernate_resume(void) { - union hibernate_info hib; + uint8_t buf[DEV_BSIZE]; + union hibernate_info *hib = (union hibernate_info *)&buf; int s; #ifndef NO_PROPOLICE vsize_t off = (vaddr_t)&__guard_local - @@ -1119,8 +1131,8 @@ hibernate_resume(void) #endif /* Get current running machine's hibernate info */ - memset(&hib, 0, sizeof(hib)); - if (get_hibernate_info(&hib, 0)) { + memset(buf, 0, sizeof(buf)); + if (get_hibernate_info(hib, 0)) { DPRINTF("couldn't retrieve machine's hibernate info\n"); return; } @@ -1129,11 +1141,11 @@ hibernate_resume(void) s = splbio(); DPRINTF("reading hibernate signature block location: %lld\n", - hib.sig_offset); + hib->sig_offset); - if (hibernate_block_io(&hib, - hib.sig_offset, - DEV_BSIZE, (vaddr_t)&disk_hib, 0)) { + if (hibernate_block_io(hib, + hib->sig_offset, + hib->sec_size, (vaddr_t)&disk_hib, 0)) { DPRINTF("error in hibernate read\n"); splx(s); return; @@ -1151,7 +1163,7 @@ hibernate_resume(void) * We (possibly) found a hibernate signature. Clear signature first, * to prevent accidental resume or endless resume cycles later. */ - if (hibernate_clear_signature(&hib)) { + if (hibernate_clear_signature(hib)) { DPRINTF("error clearing hibernate signature block\n"); splx(s); return; @@ -1161,12 +1173,12 @@ hibernate_resume(void) * If on-disk and in-memory hibernate signatures match, * this means we should do a resume from hibernate. */ - if (hibernate_compare_signature(&hib, &disk_hib)) { + if (hibernate_compare_signature(hib, &disk_hib)) { DPRINTF("mismatched hibernate signature block\n"); splx(s); return; } - disk_hib.dev = hib.dev; + disk_hib.dev = hib->dev; #ifdef MULTIPROCESSOR /* XXX - if we fail later, we may need to rehatch APs on some archs */ @@ -1237,8 +1249,9 @@ fail: void hibernate_unpack_image(union hibernate_info *hib) { + uint8_t buf[DEV_BSIZE]; struct hibernate_disk_chunk *chunks; - union hibernate_info local_hib; + union hibernate_info *local_hib = (union hibernate_info *)&buf; paddr_t image_cur = global_pig_start; short i, *fchunks; char *pva; @@ -1251,11 +1264,11 @@ hibernate_unpack_image(union hibernate_info *hib) chunks = (struct hibernate_disk_chunk *)(pva + HIBERNATE_CHUNK_SIZE); /* Can't use hiber_info that's passed in after this point */ - bcopy(hib, &local_hib, sizeof(union hibernate_info)); - local_hib.retguard_ofs = 0; + memcpy(buf, hib, sizeof(buf)); + local_hib->retguard_ofs = 0; /* VA == PA */ - local_hib.piglet_va = local_hib.piglet_pa; + local_hib->piglet_va = local_hib->piglet_pa; /* * Point of no return. Once we pass this point, only kernel code can @@ -1271,12 +1284,12 @@ hibernate_unpack_image(union hibernate_info *hib) DPRINTF("hibernate: activating alt. pagetable and starting unpack\n"); hibernate_activate_resume_pt_machdep(); - for (i = 0; i < local_hib.chunk_ctr; i++) { + for (i = 0; i < local_hib->chunk_ctr; i++) { /* Reset zlib for inflate */ - if (hibernate_zlib_reset(&local_hib, 0) != Z_OK) + if (hibernate_zlib_reset(local_hib, 0) != Z_OK) panic("hibernate failed to reset zlib for inflate"); - hibernate_process_chunk(&local_hib, &chunks[fchunks[i]], + hibernate_process_chunk(local_hib, &chunks[fchunks[i]], image_cur); image_cur += chunks[fchunks[i]].compressed_size; @@ -1446,7 +1459,7 @@ int hibernate_write_chunks(union hibernate_info *hib) { paddr_t range_base, range_end, inaddr, temp_inaddr; - size_t nblocks, out_remaining, used; + size_t out_remaining, used; struct hibernate_disk_chunk *chunks; vaddr_t hibernate_io_page = hib->piglet_va + PAGE_SIZE; daddr_t blkctr = 0; @@ -1553,8 +1566,6 @@ hibernate_write_chunks(union hibernate_info *hib) if (out_remaining == 0) { /* Filled up the page */ - nblocks = PAGE_SIZE / DEV_BSIZE; - if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset, (vaddr_t)hibernate_io_page, @@ -1563,8 +1574,7 @@ hibernate_write_chunks(union hibernate_info *hib) err); return (err); } - - blkctr += nblocks; + blkctr += PAGE_SIZE / DEV_BSIZE; } } } @@ -1600,22 +1610,18 @@ hibernate_write_chunks(union hibernate_info *hib) out_remaining = hibernate_state->hib_stream.avail_out; - used = 2 * PAGE_SIZE - out_remaining; - nblocks = used / DEV_BSIZE; - - /* Round up to next block if needed */ - if (used % DEV_BSIZE != 0) - nblocks ++; + /* Round up to next sector if needed */ + used = ROUNDUP(2 * PAGE_SIZE - out_remaining, hib->sec_size); /* Write final block(s) for this chunk */ if ((err = hib->io_func(hib->dev, blkctr + hib->image_offset, - (vaddr_t)hibernate_io_page, nblocks*DEV_BSIZE, + (vaddr_t)hibernate_io_page, used, HIB_W, hib->io_page))) { DPRINTF("hib final write error %d\n", err); return (err); } - blkctr += nblocks; + blkctr += used / DEV_BSIZE; chunks[i].compressed_size = (blkctr + hib->image_offset - chunks[i].offset) * DEV_BSIZE; @@ -1905,7 +1911,8 @@ hibernate_read_chunks(union hibernate_info *hib, paddr_t pig_start, int hibernate_suspend(void) { - union hibernate_info hib; + uint8_t buf[DEV_BSIZE]; + union hibernate_info *hib = (union hibernate_info *)&buf; u_long start, end; /* @@ -1913,13 +1920,13 @@ hibernate_suspend(void) * This also allocates a piglet whose physaddr is stored in * hib->piglet_pa and vaddr stored in hib->piglet_va */ - if (get_hibernate_info(&hib, 1)) { + if (get_hibernate_info(hib, 1)) { DPRINTF("failed to obtain hibernate info\n"); return (1); } /* Find a page-addressed region in swap [start,end] */ - if (uvm_hibswap(hib.dev, &start, &end)) { + if (uvm_hibswap(hib->dev, &start, &end)) { printf("hibernate: cannot find any swap\n"); return (1); } @@ -1936,26 +1943,26 @@ hibernate_suspend(void) &retguard_end_phys); /* Calculate block offsets in swap */ - hib.image_offset = ctod(start); + hib->image_offset = ctod(start); DPRINTF("hibernate @ block %lld max-length %lu blocks\n", - hib.image_offset, ctod(end) - ctod(start) + 1); + hib->image_offset, ctod(end) - ctod(start) + 1); pmap_activate(curproc); DPRINTF("hibernate: writing chunks\n"); - if (hibernate_write_chunks(&hib)) { + if (hibernate_write_chunks(hib)) { DPRINTF("hibernate_write_chunks failed\n"); return (1); } DPRINTF("hibernate: writing chunktable\n"); - if (hibernate_write_chunktable(&hib)) { + if (hibernate_write_chunktable(hib)) { DPRINTF("hibernate_write_chunktable failed\n"); return (1); } DPRINTF("hibernate: writing signature\n"); - if (hibernate_write_signature(&hib)) { + if (hibernate_write_signature(hib)) { DPRINTF("hibernate_write_signature failed\n"); return (1); } @@ -1967,7 +1974,7 @@ hibernate_suspend(void) * Give the device-specific I/O function a notification that we're * done, and that it can clean up or shutdown as needed. */ - hib.io_func(hib.dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib.io_page); + hib->io_func(hib->dev, 0, (vaddr_t)NULL, 0, HIB_DONE, hib->io_page); return (0); } diff --git a/sys/sys/hibernate.h b/sys/sys/hibernate.h index 75feaebac73..d6c37b2777c 100644 --- a/sys/sys/hibernate.h +++ b/sys/sys/hibernate.h @@ -1,4 +1,4 @@ -/* $OpenBSD: hibernate.h,v 1.45 2022/01/17 02:54:28 mlarkin Exp $ */ +/* $OpenBSD: hibernate.h,v 1.46 2024/06/04 20:31:35 krw Exp $ */ /* * Copyright (c) 2011 Ariane van der Steldt @@ -106,10 +106,11 @@ union hibernate_info { long guard; #endif /* ! NO_PROPOLICE */ u_int32_t retguard_ofs; + u_int32_t sec_size; }; - /* XXX - remove restriction to have this union fit in a single block */ - char pad[512]; /* Pad to 512 bytes */ + /* XXX - remove restriction to have the struct fit in a single block */ + char pad[4096]; /* Pad to largest allowable disk sector size in bytes */ }; void *hib_alloc(struct hiballoc_arena*, size_t); -- 2.20.1