From: krw Date: Mon, 13 May 2024 11:41:52 +0000 (+0000) Subject: Add bio(4) support to nvme(4). Provide information on controller and configured X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=7f4636ce12f84d7d396cff7bc4c91dd86e860113;p=openbsd Add bio(4) support to nvme(4). Provide information on controller and configured namespaces via bioctl(8)'s BIOCINQ, BIOCVOL, BIOCDISK requests. Based on work with dlg@ at h2k23 and many subsequent improvements and tests by jmatthew@. ok jmatthew@ --- diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c index a741a8532bd..997c5e40d44 100644 --- a/sys/dev/ic/nvme.c +++ b/sys/dev/ic/nvme.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nvme.c,v 1.110 2024/05/10 21:23:32 krw Exp $ */ +/* $OpenBSD: nvme.c,v 1.111 2024/05/13 11:41:52 krw Exp $ */ /* * Copyright (c) 2014 David Gwynne @@ -16,7 +16,10 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "bio.h" + #include +#include #include #include #include @@ -25,6 +28,7 @@ #include #include #include +#include #include @@ -33,7 +37,9 @@ #include #include #include +#include +#include #include #include @@ -83,17 +89,27 @@ void nvme_scsi_cmd(struct scsi_xfer *); void nvme_minphys(struct buf *, struct scsi_link *); int nvme_scsi_probe(struct scsi_link *); void nvme_scsi_free(struct scsi_link *); -uint64_t nvme_scsi_size(struct nvm_identify_namespace *); +uint64_t nvme_scsi_size(const struct nvm_identify_namespace *); #ifdef HIBERNATE #include #include -#include #include int nvme_hibernate_io(dev_t, daddr_t, vaddr_t, size_t, int, void *); #endif +#if NBIO > 0 +void nvme_bio_status(struct bio_status *, const char *, ...); + +const char *nvme_bioctl_sdname(const struct nvme_softc *, int); + +int nvme_bioctl(struct device *, u_long, caddr_t); +int nvme_bioctl_inq(struct nvme_softc *, struct bioc_inq *); +int nvme_bioctl_vol(struct nvme_softc *, struct bioc_vol *); +int nvme_bioctl_disk(struct nvme_softc *, struct bioc_disk *); +#endif /* NBIO > 0 */ + const struct scsi_adapter nvme_switch = { nvme_scsi_cmd, nvme_minphys, nvme_scsi_probe, nvme_scsi_free, NULL }; @@ -283,6 +299,7 @@ nvme_attach(struct nvme_softc *sc) u_int nccbs = 0; mtx_init(&sc->sc_ccb_mtx, IPL_BIO); + rw_init(&sc->sc_lock, "nvme_lock"); SIMPLEQ_INIT(&sc->sc_ccb_list); scsi_iopool_init(&sc->sc_iopool, sc, nvme_ccb_get, nvme_ccb_put); if (sc->sc_ops == NULL) @@ -384,7 +401,12 @@ nvme_attach(struct nvme_softc *sc) saa.saa_quirks = saa.saa_flags = 0; saa.saa_wwpn = saa.saa_wwnn = 0; - config_found(&sc->sc_dev, &saa, scsiprint); + sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev, + &saa, scsiprint); +#if NBIO > 0 + if (bio_register(&sc->sc_dev, nvme_bioctl) != 0) + printf("%s: unable to register bioctl\n", DEVNAME(sc)); +#endif /* NBIO > 0 */ return (0); @@ -889,7 +911,7 @@ nvme_scsi_free(struct scsi_link *link) } uint64_t -nvme_scsi_size(struct nvm_identify_namespace *ns) +nvme_scsi_size(const struct nvm_identify_namespace *ns) { uint64_t ncap, nsze; @@ -1716,3 +1738,274 @@ nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size, } #endif + +#if NBIO > 0 +int +nvme_bioctl(struct device *self, u_long cmd, caddr_t data) +{ + struct nvme_softc *sc = (struct nvme_softc *)self; + int error = 0; + + rw_enter_write(&sc->sc_lock); + + switch (cmd) { + case BIOCINQ: + error = nvme_bioctl_inq(sc, (struct bioc_inq *)data); + break; + case BIOCVOL: + error = nvme_bioctl_vol(sc, (struct bioc_vol *)data); + break; + case BIOCDISK: + error = nvme_bioctl_disk(sc, (struct bioc_disk *)data); + break; + default: + printf("nvme_bioctl() Unknown command (%lu)\n", cmd); + error = ENOTTY; + } + + rw_exit_write(&sc->sc_lock); + + return error; +} + +void +nvme_bio_status(struct bio_status *bs, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + bio_status(bs, 0, BIO_MSG_INFO, fmt, &ap); + va_end(ap); +} + +const char * +nvme_bioctl_sdname(const struct nvme_softc *sc, int target) +{ + const struct scsi_link *link; + const struct sd_softc *sd; + + link = scsi_get_link(sc->sc_scsibus, target, 0); + if (link) { + sd = (struct sd_softc *)(link->device_softc); + if (ISSET(link->state, SDEV_S_DYING) || sd == NULL || + ISSET(sd->flags, SDF_DYING)) + return NULL; + } + + if (nvme_read4(sc, NVME_VS) == 0xffffffff) + return NULL; + + return DEVNAME(sd); +} + +int +nvme_bioctl_inq(struct nvme_softc *sc, struct bioc_inq *bi) +{ + char sn[41], mn[81], fr[17]; + struct nvm_identify_controller *idctrl = &sc->sc_identify; + struct bio_status *bs; + unsigned int nn; + uint32_t cc, csts, vs; + + /* Don't tell bioctl about namespaces > last configured namespace. */ + for (nn = sc->sc_nn; nn > 0; nn--) { + if (sc->sc_namespaces[nn].ident) + break; + } + bi->bi_novol = bi->bi_nodisk = nn; + strlcpy(bi->bi_dev, DEVNAME(sc), sizeof(bi->bi_dev)); + + bs = &bi->bi_bio.bio_status; + bio_status_init(bs, &sc->sc_dev); + bs->bs_status = BIO_STATUS_SUCCESS; + + scsi_strvis(sn, idctrl->sn, sizeof(idctrl->sn)); + scsi_strvis(mn, idctrl->mn, sizeof(idctrl->mn)); + scsi_strvis(fr, idctrl->fr, sizeof(idctrl->fr)); + + nvme_bio_status(bs, "%s, %s, %s", mn, fr, sn); + nvme_bio_status(bs, "Max i/o %zu bytes%s%s%s, Sanitize 0x%b", + sc->sc_mdts, + ISSET(idctrl->lpa, NVM_ID_CTRL_LPA_PE) ? + ", Persisent Event Log" : "", + ISSET(idctrl->fna, NVM_ID_CTRL_FNA_CRYPTOFORMAT) ? + ", CryptoFormat" : "", + ISSET(idctrl->vwc, NVM_ID_CTRL_VWC_PRESENT) ? + ", Volatile Write Cache" : "", + lemtoh32(&idctrl->sanicap), NVM_ID_CTRL_SANICAP_FMT + ); + + if (idctrl->ctratt != 0) + nvme_bio_status(bs, "Features 0x%b", lemtoh32(&idctrl->ctratt), + NVM_ID_CTRL_CTRATT_FMT); + + if (idctrl->oacs || idctrl->oncs) { + nvme_bio_status(bs, "Admin commands 0x%b, NVM commands 0x%b", + lemtoh16(&idctrl->oacs), NVM_ID_CTRL_OACS_FMT, + lemtoh16(&idctrl->oncs), NVM_ID_CTRL_ONCS_FMT); + } + + cc = nvme_read4(sc, NVME_CC); + csts = nvme_read4(sc, NVME_CSTS); + vs = nvme_read4(sc, NVME_VS); + + if (vs == 0xffffffff) { + nvme_bio_status(bs, "Invalid PCIe register mapping"); + return 0; + } + + nvme_bio_status(bs, "NVMe %u.%u%s%s%sabled, %sReady%s%s%s%s", + NVME_VS_MJR(vs), NVME_VS_MNR(vs), + (NVME_CC_CSS_R(cc) == NVME_CC_CSS_NVM) ? ", NVM I/O command set" : "", + (NVME_CC_CSS_R(cc) == 0x7) ? ", Admin command set only" : "", + ISSET(cc, NVME_CC_EN) ? ", En" : "Dis", + ISSET(csts, NVME_CSTS_RDY) ? "" : "Not ", + ISSET(csts, NVME_CSTS_CFS) ? ", Fatal Error, " : "", + (NVME_CC_SHN_R(cc) == NVME_CC_SHN_NORMAL) ? ", Normal shutdown" : "", + (NVME_CC_SHN_R(cc) == NVME_CC_SHN_ABRUPT) ? ", Abrupt shutdown" : "", + ISSET(csts, NVME_CSTS_SHST_DONE) ? " complete" : ""); + + return 0; +} + +int +nvme_bioctl_vol(struct nvme_softc *sc, struct bioc_vol *bv) +{ + const struct nvm_identify_namespace *idns; + const char *sd; + int target; + unsigned int lbaf; + + target = bv->bv_volid + 1; + if (target > sc->sc_nn) { + bv->bv_status = BIOC_SVINVALID; + return 0; + } + + bv->bv_level = 'c'; + bv->bv_nodisk = 1; + + idns = sc->sc_namespaces[target].ident; + if (idns == NULL) { + bv->bv_status = BIOC_SVINVALID; + return 0; + } + + lbaf = NVME_ID_NS_FLBAS(idns->flbas); + if (idns->nlbaf > 16) + lbaf |= (idns->flbas >> 1) & 0x3f; + bv->bv_size = nvme_scsi_size(idns) << idns->lbaf[lbaf].lbads; + + sd = nvme_bioctl_sdname(sc, target); + if (sd) { + strlcpy(bv->bv_dev, sd, sizeof(bv->bv_dev)); + bv->bv_status = BIOC_SVONLINE; + } else + bv->bv_status = BIOC_SVOFFLINE; + + return 0; +} + +int +nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_disk *bd) +{ + const char *rpdesc[4] = { + " (Best)", + " (Better)", + " (Good)", + " (Degraded)" + }; + const char *protection[4] = { + "not enabled", + "Type 1", + "Type 2", + "Type 3", + }; + char buf[32], msg[BIO_MSG_LEN]; + struct nvm_identify_namespace *idns; + struct bio_status *bs; + uint64_t id1, id2; + unsigned int i, lbaf, target; + uint16_t ms; + uint8_t dps; + + target = bd->bd_volid + 1; + if (target > sc->sc_nn) + return EINVAL; + bd->bd_channel = sc->sc_scsibus->sc_dev.dv_unit; + bd->bd_target = target; + bd->bd_lun = 0; + snprintf(bd->bd_procdev, sizeof(bd->bd_procdev), "Namespace %u", target); + + bs = &bd->bd_bio.bio_status; + bs->bs_status = BIO_STATUS_SUCCESS; + snprintf(bs->bs_controller, sizeof(bs->bs_controller), "%11u", + bd->bd_diskid); + + idns = sc->sc_namespaces[target].ident; + if (idns == NULL) { + bd->bd_status = BIOC_SDUNUSED; + return 0; + } + + lbaf = NVME_ID_NS_FLBAS(idns->flbas); + if (idns->nlbaf > nitems(idns->lbaf)) + lbaf |= (idns->flbas >> 1) & 0x3f; + bd->bd_size = lemtoh64(&idns->nsze) << idns->lbaf[lbaf].lbads; + + if (memcmp(idns->nguid, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) { + memcpy(&id1, idns->nguid, sizeof(uint64_t)); + memcpy(&id2, idns->nguid + sizeof(uint64_t), sizeof(uint64_t)); + snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx%08llx", + id1, id2); + } else if (memcmp(idns->eui64, "\0\0\0\0\0\0\0\0", 8)) { + memcpy(&id1, idns->eui64, sizeof(uint64_t)); + snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx", id1); + } + + msg[0] = '\0'; + for (i = 0; i <= idns->nlbaf; i++) { + if (idns->lbaf[i].lbads == 0) + continue; + snprintf(buf, sizeof(buf), "%s%s%u", + strlen(msg) ? ", " : "", (i == lbaf) ? "*" : "", + 1 << idns->lbaf[i].lbads); + strlcat(msg, buf, sizeof(msg)); + ms = lemtoh16(&idns->lbaf[i].ms); + if (ms) { + snprintf(buf, sizeof(buf), "+%u", ms); + strlcat(msg, buf, sizeof(msg)); + } + strlcat(msg, rpdesc[idns->lbaf[i].rp], sizeof(msg)); + } + nvme_bio_status(bs, "Formats %s", msg); + + if (idns->nsfeat) + nvme_bio_status(bs, "Features 0x%b", idns->nsfeat, + NVME_ID_NS_NSFEAT_FMT); + + if (idns->dps) { + dps = idns->dps; + snprintf(msg, sizeof(msg), "Data Protection (0x%02x) " + "Protection Data in ", dps); + if (ISSET(dps, NVME_ID_NS_DPS_PIP)) + strlcat(msg, "first", sizeof(msg)); + else + strlcat(msg, "last", sizeof(msg)); + strlcat(msg, "bytes of metadata, Protection ", sizeof(msg)); + if (NVME_ID_NS_DPS_TYPE(dps) >= nitems(protection)) + strlcat(msg, "Type unknown", sizeof(msg)); + else + strlcat(msg, protection[NVME_ID_NS_DPS_TYPE(dps)], + sizeof(msg)); + nvme_bio_status(bs, "%s", msg); + } + + if (nvme_bioctl_sdname(sc, target) == NULL) + bd->bd_status = BIOC_SDOFFLINE; + else + bd->bd_status = BIOC_SDONLINE; + + return 0; +} +#endif /* NBIO > 0 */ diff --git a/sys/dev/ic/nvmereg.h b/sys/dev/ic/nvmereg.h index 59300bed27c..41886c858fb 100644 --- a/sys/dev/ic/nvmereg.h +++ b/sys/dev/ic/nvmereg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: nvmereg.h,v 1.13 2023/12/20 13:37:25 krw Exp $ */ +/* $OpenBSD: nvmereg.h,v 1.14 2024/05/13 11:41:52 krw Exp $ */ /* * Copyright (c) 2014 David Gwynne @@ -290,16 +290,30 @@ struct nvm_identify_controller { u_int8_t mdts; /* Maximum Data Transfer Size */ u_int16_t cntlid; /* Controller ID */ - u_int8_t _reserved1[176]; + u_int8_t _reserved1[16]; + u_int32_t ctratt; +#define NVM_ID_CTRL_CTRATT_FMT "\020" \ + "\016DELEG" "\017DEVNVM" "\020ELBAS" "\005ENDURGRPS" \ + "\014FIXCAPMGMT" "\001HOSTID" "\013MDS" "\002NOPSPM" \ + "\010NSGRAN" "\003NVMSETS" "\006PREDLATENCY" "\004READRCVRY" \ + "\011SQASSOC" "\007TBKAS" "\012UUIDLIST" "\015VARCAPMGMT" + + u_int8_t _reserved9[156]; /* Admin Command Set Attributes & Optional Controller Capabilities */ u_int16_t oacs; /* Optional Admin Command Support */ +#define NVM_ID_CTRL_OACS_FMT "\020" \ + "\013CAFL" "\011DBBC" "\006DIREC" "\005DST" "\012GLBAS" \ + "\002FORMAT" "\003FWCD" "\007MISR" "\004NSMGMT" "\001SECSR" \ + "\010VM" + u_int8_t acl; /* Abort Command Limit */ u_int8_t aerl; /* Asynchronous Event Request Limit */ u_int8_t frmw; /* Firmware Updates */ u_int8_t lpa; /* Log Page Attributes */ +#define NVM_ID_CTRL_LPA_PE (1 << 4) u_int8_t elpe; /* Error Log Page Entries */ u_int8_t npss; /* Number of Power States Support */ @@ -308,7 +322,11 @@ struct nvm_identify_controller { u_int8_t apsta; /* Autonomous Power State Transition Attributes */ - u_int8_t _reserved2[246]; + u_int8_t _reserved2[62]; + u_int32_t sanicap; +#define NVM_ID_CTRL_SANICAP_FMT "\020" \ + "\002BlockErase" "\001CryptoErase" "\003Overwrite" + u_int8_t _reserved10[180]; /* NVM Command Set Attributes */ @@ -319,10 +337,16 @@ struct nvm_identify_controller { u_int32_t nn; /* Number of Namespaces */ u_int16_t oncs; /* Optional NVM Command Support */ +#define NVM_ID_CTRL_ONCS_FMT "\020" \ + "\006RSV" "\001SCMP" "\011SCPY" "\003SDMGMT" "\005SF" \ + "\010SV" "\002SWU" "\004SWZ" "\007TS" + u_int16_t fuses; /* Fused Operation Support */ u_int8_t fna; /* Format NVM Attributes */ +#define NVM_ID_CTRL_FNA_CRYPTOFORMAT (1 << 2) u_int8_t vwc; /* Volatile Write Cache */ +#define NVM_ID_CTRL_VWC_PRESENT (1 << 0) u_int16_t awun; /* Atomic Write Unit Normal */ u_int16_t awupf; /* Atomic Write Unit Power Fail */ @@ -364,16 +388,24 @@ struct nvm_identify_namespace { u_int8_t nsfeat; /* Namespace Features */ #define NVME_ID_NS_NSFEAT_THIN_PROV (1 << 0) +#define NVME_ID_NS_NSFEAT_FMT "\020" \ + "\002NSABP" "\005OPTPERF" "\001THIN_PROV" "\004UIDREUSE" "\003DAE" + u_int8_t nlbaf; /* Number of LBA Formats */ u_int8_t flbas; /* Formatted LBA Size */ -#define NVME_ID_NS_FLBAS(_f) ((_f) & 0x0f) -#define NVME_ID_NS_FLBAS_MD 0x10 +#define NVME_ID_NS_FLBAS(_f) ((_f) & 0x0f) +#define NVME_ID_NS_FLBAS_MD 0x10 u_int8_t mc; /* Metadata Capabilities */ + u_int8_t dpc; /* End-to-end Data Protection Capabilities */ u_int8_t dps; /* End-to-end Data Protection Type Settings */ +#define NVME_ID_NS_DPS_PIP (1 << 3) +#define NVME_ID_NS_DPS_TYPE(_f) ((_f) & 0x7) - u_int8_t _reserved1[98]; + u_int8_t _reserved1[74]; + uint8_t nguid[16]; + uint8_t eui64[8]; /* BIG-endian */ struct nvm_namespace_format lbaf[16]; /* LBA Format Support */ diff --git a/sys/dev/ic/nvmevar.h b/sys/dev/ic/nvmevar.h index 2a7b59a1d24..4a7c1094e62 100644 --- a/sys/dev/ic/nvmevar.h +++ b/sys/dev/ic/nvmevar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: nvmevar.h,v 1.28 2021/08/29 12:02:52 kettenis Exp $ */ +/* $OpenBSD: nvmevar.h,v 1.29 2024/05/13 11:41:52 krw Exp $ */ /* * Copyright (c) 2014 David Gwynne @@ -125,6 +125,8 @@ struct nvme_softc { struct nvme_ccb_list sc_ccb_list; struct nvme_dmamem *sc_ccb_prpls; struct scsi_iopool sc_iopool; + struct rwlock sc_lock; + struct scsibus_softc *sc_scsibus; }; #define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname)