Add bio(4) support to nvme(4). Provide information on controller and configured
authorkrw <krw@openbsd.org>
Mon, 13 May 2024 11:41:52 +0000 (11:41 +0000)
committerkrw <krw@openbsd.org>
Mon, 13 May 2024 11:41:52 +0000 (11:41 +0000)
namespaces via bioctl(8)'s BIOCINQ, BIOCVOL, BIOCDISK requests.

Based on work with dlg@ at h2k23 and many subsequent improvements and tests by
jmatthew@.

ok jmatthew@

sys/dev/ic/nvme.c
sys/dev/ic/nvmereg.h
sys/dev/ic/nvmevar.h

index a741a85..997c5e4 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: nvme.c,v 1.110 2024/05/10 21:23:32 krw Exp $ */
+/*     $OpenBSD: nvme.c,v 1.111 2024/05/13 11:41:52 krw Exp $ */
 
 /*
  * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include "bio.h"
+
 #include <sys/param.h>
+#include <sys/ioctl.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/kernel.h>
@@ -25,6 +28,7 @@
 #include <sys/queue.h>
 #include <sys/mutex.h>
 #include <sys/pool.h>
+#include <sys/disk.h>
 
 #include <sys/atomic.h>
 
@@ -33,7 +37,9 @@
 #include <scsi/scsi_all.h>
 #include <scsi/scsi_disk.h>
 #include <scsi/scsiconf.h>
+#include <scsi/sdvar.h>
 
+#include <dev/biovar.h>
 #include <dev/ic/nvmereg.h>
 #include <dev/ic/nvmevar.h>
 
@@ -83,17 +89,27 @@ void        nvme_scsi_cmd(struct scsi_xfer *);
 void   nvme_minphys(struct buf *, struct scsi_link *);
 int    nvme_scsi_probe(struct scsi_link *);
 void   nvme_scsi_free(struct scsi_link *);
-uint64_t nvme_scsi_size(struct nvm_identify_namespace *);
+uint64_t nvme_scsi_size(const struct nvm_identify_namespace *);
 
 #ifdef HIBERNATE
 #include <uvm/uvm_extern.h>
 #include <sys/hibernate.h>
-#include <sys/disk.h>
 #include <sys/disklabel.h>
 
 int    nvme_hibernate_io(dev_t, daddr_t, vaddr_t, size_t, int, void *);
 #endif
 
+#if NBIO > 0
+void   nvme_bio_status(struct bio_status *, const char *, ...);
+
+const char *nvme_bioctl_sdname(const struct nvme_softc *, int);
+
+int    nvme_bioctl(struct device *, u_long, caddr_t);
+int    nvme_bioctl_inq(struct nvme_softc *, struct bioc_inq *);
+int    nvme_bioctl_vol(struct nvme_softc *, struct bioc_vol *);
+int    nvme_bioctl_disk(struct nvme_softc *, struct bioc_disk *);
+#endif /* NBIO > 0 */
+
 const struct scsi_adapter nvme_switch = {
        nvme_scsi_cmd, nvme_minphys, nvme_scsi_probe, nvme_scsi_free, NULL
 };
@@ -283,6 +299,7 @@ nvme_attach(struct nvme_softc *sc)
        u_int nccbs = 0;
 
        mtx_init(&sc->sc_ccb_mtx, IPL_BIO);
+       rw_init(&sc->sc_lock, "nvme_lock");
        SIMPLEQ_INIT(&sc->sc_ccb_list);
        scsi_iopool_init(&sc->sc_iopool, sc, nvme_ccb_get, nvme_ccb_put);
        if (sc->sc_ops == NULL)
@@ -384,7 +401,12 @@ nvme_attach(struct nvme_softc *sc)
        saa.saa_quirks = saa.saa_flags = 0;
        saa.saa_wwpn = saa.saa_wwnn = 0;
 
-       config_found(&sc->sc_dev, &saa, scsiprint);
+       sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
+           &saa, scsiprint);
+#if NBIO > 0
+       if (bio_register(&sc->sc_dev, nvme_bioctl) != 0)
+               printf("%s: unable to register bioctl\n", DEVNAME(sc));
+#endif /* NBIO > 0 */
 
        return (0);
 
@@ -889,7 +911,7 @@ nvme_scsi_free(struct scsi_link *link)
 }
 
 uint64_t
-nvme_scsi_size(struct nvm_identify_namespace *ns)
+nvme_scsi_size(const struct nvm_identify_namespace *ns)
 {
        uint64_t                ncap, nsze;
 
@@ -1716,3 +1738,274 @@ nvme_hibernate_io(dev_t dev, daddr_t blkno, vaddr_t addr, size_t size,
 }
 
 #endif
+
+#if NBIO > 0
+int
+nvme_bioctl(struct device *self, u_long cmd, caddr_t data)
+{
+       struct nvme_softc       *sc = (struct nvme_softc *)self;
+       int error = 0;
+
+       rw_enter_write(&sc->sc_lock);
+
+       switch (cmd) {
+       case BIOCINQ:
+               error = nvme_bioctl_inq(sc, (struct bioc_inq *)data);
+               break;
+       case BIOCVOL:
+               error = nvme_bioctl_vol(sc, (struct bioc_vol *)data);
+               break;
+       case BIOCDISK:
+               error = nvme_bioctl_disk(sc, (struct bioc_disk *)data);
+               break;
+       default:
+               printf("nvme_bioctl() Unknown command (%lu)\n", cmd);
+               error = ENOTTY;
+       }
+
+       rw_exit_write(&sc->sc_lock);
+
+       return error;
+}
+
+void
+nvme_bio_status(struct bio_status *bs, const char *fmt, ...)
+{
+       va_list                 ap;
+
+       va_start(ap, fmt);
+       bio_status(bs, 0, BIO_MSG_INFO, fmt, &ap);
+       va_end(ap);
+}
+
+const char *
+nvme_bioctl_sdname(const struct nvme_softc *sc, int target)
+{
+       const struct scsi_link          *link;
+       const struct sd_softc           *sd;
+
+       link = scsi_get_link(sc->sc_scsibus, target, 0);
+       if (link) {
+               sd = (struct sd_softc *)(link->device_softc);
+               if (ISSET(link->state, SDEV_S_DYING) || sd == NULL ||
+                   ISSET(sd->flags, SDF_DYING))
+                       return NULL;
+       }
+
+       if (nvme_read4(sc, NVME_VS) == 0xffffffff)
+               return NULL;
+
+       return DEVNAME(sd);
+}
+
+int
+nvme_bioctl_inq(struct nvme_softc *sc, struct bioc_inq *bi)
+{
+       char                             sn[41], mn[81], fr[17];
+       struct nvm_identify_controller  *idctrl = &sc->sc_identify;
+       struct bio_status               *bs;
+       unsigned int                     nn;
+       uint32_t                         cc, csts, vs;
+
+       /* Don't tell bioctl about namespaces > last configured namespace. */
+       for (nn = sc->sc_nn; nn > 0; nn--) {
+               if (sc->sc_namespaces[nn].ident)
+                       break;
+       }
+       bi->bi_novol = bi->bi_nodisk = nn;
+       strlcpy(bi->bi_dev, DEVNAME(sc), sizeof(bi->bi_dev));
+
+       bs = &bi->bi_bio.bio_status;
+       bio_status_init(bs, &sc->sc_dev);
+       bs->bs_status = BIO_STATUS_SUCCESS;
+
+       scsi_strvis(sn, idctrl->sn, sizeof(idctrl->sn));
+       scsi_strvis(mn, idctrl->mn, sizeof(idctrl->mn));
+       scsi_strvis(fr, idctrl->fr, sizeof(idctrl->fr));
+
+       nvme_bio_status(bs, "%s, %s, %s", mn, fr, sn);
+       nvme_bio_status(bs, "Max i/o %zu bytes%s%s%s, Sanitize 0x%b",
+           sc->sc_mdts,
+           ISSET(idctrl->lpa, NVM_ID_CTRL_LPA_PE) ?
+           ", Persisent Event Log" : "",
+           ISSET(idctrl->fna, NVM_ID_CTRL_FNA_CRYPTOFORMAT) ?
+           ", CryptoFormat" : "",
+           ISSET(idctrl->vwc, NVM_ID_CTRL_VWC_PRESENT) ?
+           ", Volatile Write Cache" : "",
+           lemtoh32(&idctrl->sanicap), NVM_ID_CTRL_SANICAP_FMT
+       );
+
+       if (idctrl->ctratt != 0)
+               nvme_bio_status(bs, "Features 0x%b", lemtoh32(&idctrl->ctratt),
+                   NVM_ID_CTRL_CTRATT_FMT);
+
+       if (idctrl->oacs || idctrl->oncs) {
+               nvme_bio_status(bs, "Admin commands 0x%b, NVM commands 0x%b",
+                   lemtoh16(&idctrl->oacs), NVM_ID_CTRL_OACS_FMT,
+                   lemtoh16(&idctrl->oncs), NVM_ID_CTRL_ONCS_FMT);
+       }
+
+       cc = nvme_read4(sc, NVME_CC);
+       csts = nvme_read4(sc, NVME_CSTS);
+       vs = nvme_read4(sc, NVME_VS);
+
+       if (vs == 0xffffffff) {
+               nvme_bio_status(bs, "Invalid PCIe register mapping");
+               return 0;
+       }
+
+       nvme_bio_status(bs, "NVMe %u.%u%s%s%sabled, %sReady%s%s%s%s",
+           NVME_VS_MJR(vs), NVME_VS_MNR(vs),
+           (NVME_CC_CSS_R(cc) == NVME_CC_CSS_NVM) ? ", NVM I/O command set" : "",
+           (NVME_CC_CSS_R(cc) == 0x7) ? ", Admin command set only" : "",
+           ISSET(cc, NVME_CC_EN) ? ", En" : "Dis",
+           ISSET(csts, NVME_CSTS_RDY) ? "" : "Not ",
+           ISSET(csts, NVME_CSTS_CFS) ? ", Fatal Error, " : "",
+           (NVME_CC_SHN_R(cc) == NVME_CC_SHN_NORMAL) ? ", Normal shutdown" : "",
+           (NVME_CC_SHN_R(cc) == NVME_CC_SHN_ABRUPT) ? ", Abrupt shutdown" : "",
+           ISSET(csts, NVME_CSTS_SHST_DONE) ? " complete" : "");
+
+       return 0;
+}
+
+int
+nvme_bioctl_vol(struct nvme_softc *sc, struct bioc_vol *bv)
+{
+       const struct nvm_identify_namespace     *idns;
+       const char                              *sd;
+       int                                      target;
+       unsigned int                             lbaf;
+
+       target = bv->bv_volid + 1;
+       if (target > sc->sc_nn) {
+               bv->bv_status = BIOC_SVINVALID;
+               return 0;
+       }
+
+       bv->bv_level = 'c';
+       bv->bv_nodisk = 1;
+
+       idns = sc->sc_namespaces[target].ident;
+       if (idns == NULL) {
+               bv->bv_status = BIOC_SVINVALID;
+               return 0;
+       }
+
+       lbaf = NVME_ID_NS_FLBAS(idns->flbas);
+       if (idns->nlbaf > 16)
+               lbaf |= (idns->flbas >> 1) & 0x3f;
+       bv->bv_size = nvme_scsi_size(idns) << idns->lbaf[lbaf].lbads;
+
+       sd = nvme_bioctl_sdname(sc, target);
+       if (sd) {
+               strlcpy(bv->bv_dev, sd, sizeof(bv->bv_dev));
+               bv->bv_status = BIOC_SVONLINE;
+       } else
+               bv->bv_status = BIOC_SVOFFLINE;
+
+       return 0;
+}
+
+int
+nvme_bioctl_disk(struct nvme_softc *sc, struct bioc_disk *bd)
+{
+       const char                      *rpdesc[4] = {
+               " (Best)",
+               " (Better)",
+               " (Good)",
+               " (Degraded)"
+       };
+       const char                      *protection[4] = {
+               "not enabled",
+               "Type 1",
+               "Type 2",
+               "Type 3",
+       };
+       char                             buf[32], msg[BIO_MSG_LEN];
+       struct nvm_identify_namespace   *idns;
+       struct bio_status               *bs;
+       uint64_t                         id1, id2;
+       unsigned int                     i, lbaf, target;
+       uint16_t                         ms;
+       uint8_t                          dps;
+
+       target = bd->bd_volid + 1;
+       if (target > sc->sc_nn)
+               return EINVAL;
+       bd->bd_channel = sc->sc_scsibus->sc_dev.dv_unit;
+       bd->bd_target = target;
+       bd->bd_lun = 0;
+       snprintf(bd->bd_procdev, sizeof(bd->bd_procdev), "Namespace %u", target);
+
+       bs = &bd->bd_bio.bio_status;
+       bs->bs_status = BIO_STATUS_SUCCESS;
+       snprintf(bs->bs_controller, sizeof(bs->bs_controller), "%11u",
+           bd->bd_diskid);
+
+       idns = sc->sc_namespaces[target].ident;
+       if (idns == NULL) {
+               bd->bd_status = BIOC_SDUNUSED;
+               return 0;
+       }
+
+       lbaf = NVME_ID_NS_FLBAS(idns->flbas);
+       if (idns->nlbaf > nitems(idns->lbaf))
+               lbaf |= (idns->flbas >> 1) & 0x3f;
+       bd->bd_size = lemtoh64(&idns->nsze) << idns->lbaf[lbaf].lbads;
+
+       if (memcmp(idns->nguid, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", 16)) {
+               memcpy(&id1, idns->nguid, sizeof(uint64_t));
+               memcpy(&id2, idns->nguid + sizeof(uint64_t), sizeof(uint64_t));
+               snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx%08llx",
+                   id1, id2);
+       } else if (memcmp(idns->eui64, "\0\0\0\0\0\0\0\0", 8)) {
+               memcpy(&id1, idns->eui64, sizeof(uint64_t));
+               snprintf(bd->bd_serial, sizeof(bd->bd_serial), "%08llx", id1);
+       }
+
+       msg[0] = '\0';
+       for (i = 0; i <= idns->nlbaf; i++) {
+               if (idns->lbaf[i].lbads == 0)
+                       continue;
+               snprintf(buf, sizeof(buf), "%s%s%u",
+                   strlen(msg) ? ", " : "", (i == lbaf) ? "*" : "",
+                   1 << idns->lbaf[i].lbads);
+               strlcat(msg, buf, sizeof(msg));
+               ms = lemtoh16(&idns->lbaf[i].ms);
+               if (ms) {
+                       snprintf(buf, sizeof(buf), "+%u", ms);
+                       strlcat(msg, buf, sizeof(msg));
+               }
+               strlcat(msg, rpdesc[idns->lbaf[i].rp], sizeof(msg));
+       }
+       nvme_bio_status(bs, "Formats %s", msg);
+
+       if (idns->nsfeat)
+               nvme_bio_status(bs, "Features 0x%b", idns->nsfeat,
+                   NVME_ID_NS_NSFEAT_FMT);
+
+       if (idns->dps) {
+               dps = idns->dps;
+               snprintf(msg, sizeof(msg), "Data Protection (0x%02x) "
+                   "Protection Data in ", dps);
+               if (ISSET(dps, NVME_ID_NS_DPS_PIP))
+                       strlcat(msg, "first", sizeof(msg));
+               else
+                       strlcat(msg, "last", sizeof(msg));
+               strlcat(msg, "bytes of metadata, Protection ", sizeof(msg));
+               if (NVME_ID_NS_DPS_TYPE(dps) >= nitems(protection))
+                       strlcat(msg, "Type unknown", sizeof(msg));
+               else
+                       strlcat(msg, protection[NVME_ID_NS_DPS_TYPE(dps)],
+                           sizeof(msg));
+               nvme_bio_status(bs, "%s", msg);
+       }
+
+       if (nvme_bioctl_sdname(sc, target) == NULL)
+               bd->bd_status = BIOC_SDOFFLINE;
+       else
+               bd->bd_status = BIOC_SDONLINE;
+
+       return 0;
+}
+#endif /* NBIO > 0 */
index 59300be..41886c8 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: nvmereg.h,v 1.13 2023/12/20 13:37:25 krw Exp $ */
+/*     $OpenBSD: nvmereg.h,v 1.14 2024/05/13 11:41:52 krw Exp $ */
 
 /*
  * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
@@ -290,16 +290,30 @@ struct nvm_identify_controller {
        u_int8_t        mdts;           /* Maximum Data Transfer Size */
        u_int16_t       cntlid;         /* Controller ID */
 
-       u_int8_t        _reserved1[176];
+       u_int8_t        _reserved1[16];
+       u_int32_t       ctratt;
+#define NVM_ID_CTRL_CTRATT_FMT                 "\020" \
+       "\016DELEG" "\017DEVNVM" "\020ELBAS" "\005ENDURGRPS" \
+       "\014FIXCAPMGMT" "\001HOSTID" "\013MDS" "\002NOPSPM" \
+       "\010NSGRAN" "\003NVMSETS" "\006PREDLATENCY" "\004READRCVRY" \
+       "\011SQASSOC" "\007TBKAS" "\012UUIDLIST" "\015VARCAPMGMT"
+
+       u_int8_t        _reserved9[156];
 
        /* Admin Command Set Attributes & Optional Controller Capabilities */
 
        u_int16_t       oacs;           /* Optional Admin Command Support */
+#define NVM_ID_CTRL_OACS_FMT                   "\020" \
+       "\013CAFL" "\011DBBC" "\006DIREC" "\005DST" "\012GLBAS" \
+       "\002FORMAT" "\003FWCD" "\007MISR" "\004NSMGMT" "\001SECSR" \
+       "\010VM"
+
        u_int8_t        acl;            /* Abort Command Limit */
        u_int8_t        aerl;           /* Asynchronous Event Request Limit */
 
        u_int8_t        frmw;           /* Firmware Updates */
        u_int8_t        lpa;            /* Log Page Attributes */
+#define NVM_ID_CTRL_LPA_PE             (1 << 4)
        u_int8_t        elpe;           /* Error Log Page Entries */
        u_int8_t        npss;           /* Number of Power States Support */
 
@@ -308,7 +322,11 @@ struct nvm_identify_controller {
        u_int8_t        apsta;          /* Autonomous Power State Transition
                                           Attributes */
 
-       u_int8_t        _reserved2[246];
+       u_int8_t        _reserved2[62];
+       u_int32_t       sanicap;
+#define NVM_ID_CTRL_SANICAP_FMT                        "\020" \
+       "\002BlockErase" "\001CryptoErase" "\003Overwrite"
+       u_int8_t        _reserved10[180];
 
        /* NVM Command Set Attributes */
 
@@ -319,10 +337,16 @@ struct nvm_identify_controller {
        u_int32_t       nn;             /* Number of Namespaces */
 
        u_int16_t       oncs;           /* Optional NVM Command Support */
+#define NVM_ID_CTRL_ONCS_FMT                   "\020" \
+       "\006RSV" "\001SCMP" "\011SCPY" "\003SDMGMT" "\005SF" \
+       "\010SV" "\002SWU" "\004SWZ" "\007TS"
+
        u_int16_t       fuses;          /* Fused Operation Support */
 
        u_int8_t        fna;            /* Format NVM Attributes */
+#define NVM_ID_CTRL_FNA_CRYPTOFORMAT           (1 << 2)
        u_int8_t        vwc;            /* Volatile Write Cache */
+#define NVM_ID_CTRL_VWC_PRESENT                        (1 << 0)
        u_int16_t       awun;           /* Atomic Write Unit Normal */
 
        u_int16_t       awupf;          /* Atomic Write Unit Power Fail */
@@ -364,16 +388,24 @@ struct nvm_identify_namespace {
 
        u_int8_t        nsfeat;         /* Namespace Features */
 #define        NVME_ID_NS_NSFEAT_THIN_PROV     (1 << 0)
+#define NVME_ID_NS_NSFEAT_FMT          "\020" \
+       "\002NSABP" "\005OPTPERF" "\001THIN_PROV" "\004UIDREUSE" "\003DAE"
+
        u_int8_t        nlbaf;          /* Number of LBA Formats */
        u_int8_t        flbas;          /* Formatted LBA Size */
-#define NVME_ID_NS_FLBAS(_f)                   ((_f) & 0x0f)
-#define NVME_ID_NS_FLBAS_MD                    0x10
+#define NVME_ID_NS_FLBAS(_f)           ((_f) & 0x0f)
+#define NVME_ID_NS_FLBAS_MD            0x10
        u_int8_t        mc;             /* Metadata Capabilities */
+
        u_int8_t        dpc;            /* End-to-end Data Protection
                                           Capabilities */
        u_int8_t        dps;            /* End-to-end Data Protection Type Settings */
+#define NVME_ID_NS_DPS_PIP             (1 << 3)
+#define NVME_ID_NS_DPS_TYPE(_f)                ((_f) & 0x7)
 
-       u_int8_t        _reserved1[98];
+       u_int8_t        _reserved1[74];
+       uint8_t         nguid[16];
+       uint8_t         eui64[8];       /* BIG-endian */
 
        struct nvm_namespace_format
                        lbaf[16];       /* LBA Format Support */
index 2a7b59a..4a7c109 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: nvmevar.h,v 1.28 2021/08/29 12:02:52 kettenis Exp $ */
+/*     $OpenBSD: nvmevar.h,v 1.29 2024/05/13 11:41:52 krw Exp $ */
 
 /*
  * Copyright (c) 2014 David Gwynne <dlg@openbsd.org>
@@ -125,6 +125,8 @@ struct nvme_softc {
        struct nvme_ccb_list    sc_ccb_list;
        struct nvme_dmamem      *sc_ccb_prpls;
        struct scsi_iopool      sc_iopool;
+       struct rwlock           sc_lock;
+       struct scsibus_softc    *sc_scsibus;
 };
 
 #define DEVNAME(_sc) ((_sc)->sc_dev.dv_xname)