Full rewrite of metadata handling. This fixes power failures and crashes
authormarco <marco@openbsd.org>
Sat, 19 Jul 2008 22:41:58 +0000 (22:41 +0000)
committermarco <marco@openbsd.org>
Sat, 19 Jul 2008 22:41:58 +0000 (22:41 +0000)
that caused illegal checksums.  The new metadata code is more or less ready
to deal with other vendor's metadata formats.

While here clean up the name space.

Fix thib's pool mess by adding removing bad flags in interrupt context.

tested on macppc, amd64, i386, sparc64 & hppa

sparc64 has issues with crypto however those do not seem to be softraid
specific.

help from okan@ ckuethe@ Will Backman and others

sys/dev/softraid.c
sys/dev/softraid_crypto.c
sys/dev/softraid_raid0.c
sys/dev/softraid_raid1.c
sys/dev/softraidvar.h

index a60ee5c..b727b10 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraid.c,v 1.116 2008/06/25 17:43:09 thib Exp $ */
+/* $OpenBSD: softraid.c,v 1.117 2008/07/19 22:41:58 marco Exp $ */
 /*
  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
@@ -64,75 +64,1069 @@ uint32_t  sr_debug = 0
                ;
 #endif
 
-void           sr_init(void);
+int            sr_match(struct device *, void *, void *);
+void           sr_attach(struct device *, struct device *, void *);
+int            sr_detach(struct device *, int);
+int            sr_activate(struct device *, enum devact);
+
+struct cfattach softraid_ca = {
+       sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
+       sr_activate
+};
+
+struct cfdriver softraid_cd = {
+       NULL, "softraid", DV_DULL
+};
+
+/* scsi & discipline */
+int                    sr_scsi_cmd(struct scsi_xfer *);
+void                   sr_minphys(struct buf *bp);
+void                   sr_copy_internal_data(struct scsi_xfer *,
+                           void *, size_t);
+int                    sr_scsi_ioctl(struct scsi_link *, u_long,
+                           caddr_t, int, struct proc *);
+int                    sr_ioctl(struct device *, u_long, caddr_t);
+int                    sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
+int                    sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
+int                    sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
+int                    sr_ioctl_setstate(struct sr_softc *,
+                           struct bioc_setstate *);
+int                    sr_ioctl_createraid(struct sr_softc *,
+                           struct bioc_createraid *, int);
+int                    sr_ioctl_deleteraid(struct sr_softc *,
+                           struct bioc_deleteraid *);
+void                   sr_chunks_unwind(struct sr_softc *,
+                           struct sr_chunk_head *);
+void                   sr_discipline_free(struct sr_discipline *);
+void                   sr_discipline_shutdown(struct sr_discipline *);
+
+/* utility functions */
+void                   sr_shutdown(void *);
+void                   sr_uuid_get(struct sr_uuid *);
+void                   sr_uuid_print(struct sr_uuid *, int);
+void                   sr_checksum_print(u_int8_t *);
+void                   sr_checksum(struct sr_softc *, void *, void *,
+                           u_int32_t);
+int                    sr_boot_assembly(struct sr_softc *);
+int                    sr_already_assembled(struct sr_discipline *);
+
+/* don't include these on RAMDISK */
+#ifndef SMALL_KERNEL
+void                   sr_sensors_refresh(void *);
+int                    sr_sensors_create(struct sr_discipline *);
+void                   sr_sensors_delete(struct sr_discipline *);
+#endif
+
+/* metadata */
+int                    sr_meta_probe(struct sr_discipline *, dev_t *, int);
+int                    sr_meta_attach(struct sr_discipline *, int);
+void                   sr_meta_getdevname(struct sr_softc *, dev_t, char *,
+                           int);
+int                    sr_meta_rw(struct sr_discipline *, dev_t, void *,
+                           size_t, daddr64_t, long);
+int                    sr_meta_clear(struct sr_discipline *);
+int                    sr_meta_read(struct sr_discipline *);
+int                    sr_meta_save(struct sr_discipline *, u_int32_t);
+int                    sr_meta_validate(struct sr_discipline *, dev_t,
+                           struct sr_metadata *, void *);
+void                   sr_meta_chunks_create(struct sr_softc *,
+                           struct sr_chunk_head *);
+void                   sr_meta_init(struct sr_discipline *,
+                           struct sr_chunk_head *);
+
+/* native metadata format */
+int                    sr_meta_native_bootprobe(struct sr_softc *,
+                           struct device *, struct sr_metadata_list_head *);
+#define SR_META_NOTCLAIMED     (0)
+#define SR_META_CLAIMED                (1)
+int                    sr_meta_native_probe(struct sr_softc *,
+                          struct sr_chunk *);
+int                    sr_meta_native_attach(struct sr_discipline *, int);
+int                    sr_meta_native_read(struct sr_discipline *, dev_t,
+                           struct sr_metadata *, void *);
+int                    sr_meta_native_write(struct sr_discipline *, dev_t,
+                           struct sr_metadata *,void *);
+
+#ifdef SR_DEBUG
+void                   sr_meta_print(struct sr_metadata *);
+#else
+#define                        sr_meta_print(m)
+#endif
+
+/* the metadata driver should remain stateless */
+struct sr_meta_driver {
+       daddr64_t               smd_offset;     /* metadata location */
+       u_int32_t               smd_size;       /* size of metadata */
+
+       int                     (*smd_probe)(struct sr_softc *,
+                                  struct sr_chunk *);
+       int                     (*smd_attach)(struct sr_discipline *, int);
+       int                     (*smd_detach)(struct sr_discipline *);
+       int                     (*smd_read)(struct sr_discipline *, dev_t,
+                                   struct sr_metadata *, void *);
+       int                     (*smd_write)(struct sr_discipline *, dev_t,
+                                   struct sr_metadata *, void *);
+       int                     (*smd_validate)(struct sr_discipline *,
+                                   struct sr_metadata *, void *);
+} smd[] = {
+       { SR_META_OFFSET, SR_META_SIZE * 512,
+         sr_meta_native_probe, sr_meta_native_attach, NULL,
+         sr_meta_native_read , sr_meta_native_write, NULL },
+#define SR_META_F_NATIVE       0
+       { 0, 0, NULL, NULL, NULL, NULL }
+#define SR_META_F_INVALID      -1
+};
+
+int
+sr_meta_attach(struct sr_discipline *sd, int force)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       struct sr_chunk_head    *cl;
+       struct sr_chunk         *ch_entry;
+       int                     rv = 1, i;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc));
+
+       /* in memory copy of metadata */
+       sd->sd_meta = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
+       if (!sd->sd_meta) {
+               printf("%s: could not allocate memory for metadata\n",
+                   DEVNAME(sc));
+               goto bad;
+       }
+
+       if (sd->sd_meta_type != SR_META_F_NATIVE) {
+               /* in memory copy of foreign metadata */
+               sd->sd_meta_foreign =  malloc(smd[sd->sd_meta_type].smd_size ,
+                   M_DEVBUF, M_ZERO);
+               if (!sd->sd_meta_foreign) {
+                       /* unwind frees sd_meta */
+                       printf("%s: could not allocate memory for foreign "
+                           "metadata\n", DEVNAME(sc));
+                       goto bad;
+               }
+       }
+
+       if (smd[sd->sd_meta_type].smd_attach(sd, force))
+               goto bad;
+
+       /* fill out chunk array */
+       cl = &sd->sd_vol.sv_chunk_list;
+       i = 0;
+       SLIST_FOREACH(ch_entry, cl, src_link)
+               sd->sd_vol.sv_chunks[i++] = ch_entry;
+
+       rv = 0;
+bad:
+       return (rv);
+}
+
+int
+sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       struct bdevsw           *bdsw;
+       struct sr_chunk         *ch_entry, *ch_prev = NULL;
+       struct sr_chunk_head    *cl;
+       char                    devname[32];
+       int                     i, d, type, found, prevf, error;
+       dev_t                   dev;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk);
+
+       if (no_chunk == 0)
+               goto unwind;
+
+
+       cl = &sd->sd_vol.sv_chunk_list;
+
+       for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) {
+               dev = dt[d];
+               sr_meta_getdevname(sc, dev, devname, sizeof(devname));
+               bdsw = bdevsw_lookup(dev);
+
+               /*
+                * XXX leaving dev open for now; move this to attach and figure
+                * out the open/close dance for unwind.
+                */
+               error = bdsw->d_open(dev, FREAD | FWRITE , S_IFBLK, curproc);
+               if (error) {
+                       DNPRINTF(SR_D_META,"%s: sr_meta_probe can't open %s\n",
+                           DEVNAME(sc), devname);
+                       /* XXX device isn't open but will be closed anyway */
+                       goto unwind;
+               }
+
+               ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
+                   M_WAITOK | M_ZERO);
+               /* keep disks in user supplied order */
+               if (ch_prev)
+                       SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
+               else
+                       SLIST_INSERT_HEAD(cl, ch_entry, src_link);
+               ch_prev = ch_entry;
+               strlcpy(ch_entry->src_devname, devname,
+                  sizeof(ch_entry->src_devname));
+               ch_entry->src_dev_mm = dev;
+
+               /* determine if this is a device we understand */
+               for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) {
+                       type = smd[i].smd_probe(sc, ch_entry);
+                       if (type == SR_META_F_INVALID)
+                               continue;
+                       else {
+                               found = type;
+                               break;
+                       }
+               }
+               if (found == SR_META_F_INVALID)
+                       goto unwind;
+               if (prevf == SR_META_F_INVALID)
+                       prevf = found;
+               if (prevf != found) {
+                       DNPRINTF(SR_D_META, "%s: prevf != found\n",
+                           DEVNAME(sc));
+                       goto unwind;
+               }
+       }
+
+       return (prevf);
+unwind:
+       return (SR_META_F_INVALID);
+}
+
+void
+sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size)
+{
+       int                     maj, unit, part;
+       char                    *name;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n",
+           DEVNAME(sc), buf, size);
+
+       if (!buf)
+               return;
+
+       maj = major(dev);
+       part = DISKPART(dev);
+       unit = DISKUNIT(dev);
+
+       name = findblkname(maj);
+       if (name == NULL)
+               return;
+
+       snprintf(buf, size, "%s%d%c", name, unit, part + 'a');
+}
+
+int
+sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz,
+    daddr64_t ofs, long flags)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       struct buf              b;
+       int                     rv = 1;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n",
+           DEVNAME(sc), dev, md, sz, ofs, flags);
+
+       if (md == NULL) {
+               printf("%s: read invalid metadata pointer\n", sc);
+               goto done;
+       }
+
+       bzero(&b, sizeof(b));
+       b.b_flags = flags;
+       b.b_blkno = ofs;
+       b.b_bcount = sz;
+       b.b_bufsize = sz;
+       b.b_resid = sz;
+       b.b_data = md;
+       b.b_error = 0;
+       b.b_proc = curproc;
+       b.b_dev = dev;
+       b.b_vp = NULL;
+       b.b_iodone = NULL;
+       LIST_INIT(&b.b_dep);
+       bdevsw_lookup(b.b_dev)->d_strategy(&b);
+       biowait(&b);
+
+       if (b.b_flags & B_ERROR) {
+               printf("%s: 0x%x i/o error on block %lld while reading "
+                   "metadata %d\n", sc, dev, b.b_blkno, b.b_error);
+               goto done;
+       }
+       rv = 0;
+done:
+       return (rv);
+}
+
+int
+sr_meta_clear(struct sr_discipline *sd)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
+       struct sr_chunk         *ch_entry;
+       void                    *m;
+       int                     rv = 1;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc));
+
+       if (sd->sd_meta_type != SR_META_F_NATIVE) {
+               printf("%s: sr_meta_clear can not clear foreign metadata\n",
+                   DEVNAME(sc));
+               goto done;
+       }
+
+       m = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_WAITOK | M_ZERO);
+       SLIST_FOREACH(ch_entry, cl, src_link) {
+               if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) {
+                       /* XXX mark disk offline */
+                       DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to "
+                           "clear %s\n", ch_entry->src_devname);
+                       rv++;
+                       continue;
+               }
+               bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta));
+               bzero(&ch_entry->src_opt, sizeof(ch_entry->src_opt));
+       }
+
+       bzero(sd->sd_meta, SR_META_SIZE * 512);
+
+       free(m, M_DEVBUF);
+       rv = 0;
+done:
+       return (rv);
+}
+
+void
+sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl)
+{
+       struct sr_chunk         *ch_entry;
+       struct sr_uuid          uuid;
+       int                     cid = 0;
+       char                    *name;
+       u_int64_t               max_chunk_sz = 0, min_chunk_sz;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc));
+
+       sr_uuid_get(&uuid);
+
+       /* fill out stuff and get largest chunk size while looping */
+       SLIST_FOREACH(ch_entry, cl, src_link) {
+               name = ch_entry->src_devname;
+               ch_entry->src_meta.scmi.scm_size = ch_entry->src_size;
+               ch_entry->src_meta.scmi.scm_chunk_id = cid++;
+               ch_entry->src_meta.scm_status = BIOC_SDONLINE;
+               strlcpy(ch_entry->src_meta.scmi.scm_devname, name,
+                   sizeof(ch_entry->src_meta.scmi.scm_devname));
+               bcopy(&uuid,  &ch_entry->src_meta.scmi.scm_uuid,
+                   sizeof(ch_entry->src_meta.scmi.scm_uuid));
+
+               if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz)
+                       max_chunk_sz = ch_entry->src_meta.scmi.scm_size;
+       }
+
+       /* get smallest chunk size */
+       min_chunk_sz = max_chunk_sz;
+       SLIST_FOREACH(ch_entry, cl, src_link)
+               if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz)
+                       min_chunk_sz = ch_entry->src_meta.scmi.scm_size;
+
+       /* equalize all sizes */
+       SLIST_FOREACH(ch_entry, cl, src_link)
+               ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz;
+
+       /* whine if chunks are not the same size */
+       if (min_chunk_sz != max_chunk_sz)
+               printf("%s: chunk sizes are not equal; up to %llu blocks "
+                   "wasted per chunk\n",
+                   DEVNAME(sc), max_chunk_sz - min_chunk_sz);
+}
+
+void
+sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       struct sr_metadata      *sm = sd->sd_meta;
+       struct sr_meta_chunk    *im_sc;
+       struct sr_meta_opt      *im_so;
+       int                     i, chunk_no;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc));
+
+       if (!sm)
+               return;
+
+       /* initial metadata */
+       sm->ssdi.ssd_magic = SR_MAGIC;
+       sm->ssdi.ssd_version = SR_META_VERSION;
+       sm->ssd_ondisk = 0;
+       sm->ssdi.ssd_flags = sd->sd_meta_flags;
+       /* get uuid from chunk 0 */
+       bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid,
+           &sm->ssdi.ssd_uuid,
+           sizeof(struct sr_uuid));
+
+       /* volume is filled in createraid */
+
+       /* add missing chunk bits */
+       chunk_no = sm->ssdi.ssd_chunk_no;
+       for (i = 0; i < chunk_no; i++) {
+               im_sc = &sd->sd_vol.sv_chunks[i]->src_meta;
+               im_sc->scmi.scm_volid = sm->ssdi.ssd_volid;
+               sr_checksum(sc, im_sc, &im_sc->scm_checksum,
+                   sizeof(struct sr_meta_chunk_invariant));
+
+               /* carry optional meta also in chunk area */
+               im_so = &sd->sd_vol.sv_chunks[i]->src_opt;
+               bzero(im_so, sizeof(*im_so));
+               if (sd->sd_type == SR_MD_CRYPTO) {
+                       sm->ssdi.ssd_opt_no = 1;
+                       im_so->somi.som_type = SR_OPT_CRYPTO;
+
+                       /*
+                        * copy encrypted key / passphrase into optional
+                        * metadata area
+                        */
+                       bcopy(&sd->mds.mdd_crypto.scr_meta,
+                           &im_so->somi.som_meta.smm_crypto,
+                           sizeof(im_so->somi.som_meta.smm_crypto));
+
+                       sr_checksum(sc, im_so, im_so->som_checksum,
+                           sizeof(struct sr_meta_opt_invariant));
+               }
+       }
+}
+
+void
+sr_meta_save_callback(void *arg1, void *arg2)
+{
+       struct sr_discipline    *sd = arg1;
+       int                     s;
+
+       s = splbio();
+
+       if (sr_meta_save(arg1, SR_META_DIRTY))
+               printf("%s: save metadata failed\n",
+                   DEVNAME(sd->sd_sc));
+
+       sd->sd_must_flush = 0;
+       splx(s);
+}
+
+int
+sr_meta_save(struct sr_discipline *sd, u_int32_t flags)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       struct sr_metadata      *sm = sd->sd_meta, *m;
+       struct sr_meta_driver   *s;
+       struct sr_chunk         *src;
+       struct sr_meta_chunk    *cm;
+       struct sr_workunit      wu;
+       struct sr_meta_opt      *om;
+       int                     i;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n",
+           DEVNAME(sc), sd->sd_meta->ssd_devname);
+
+       if (!sm) {
+               printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
+               goto bad;
+       }
+
+       /* meta scratchpad */
+       s = &smd[sd->sd_meta_type];
+       m = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
+       if (!m) {
+               printf("%s: could not allocate metadata scratch area\n",
+                   DEVNAME(sc));
+               goto bad;
+       }
+
+       if (sm->ssdi.ssd_opt_no > 1)
+               panic("not yet save > 1 optional metadata members");
+
+       /* from here on out metadata is updated */
+restart:
+       sm->ssd_ondisk++;
+       sm->ssd_meta_flags = flags;
+       bcopy(sm, m, sizeof(*m));
+
+       for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
+               src = sd->sd_vol.sv_chunks[i];
+               cm = (struct sr_meta_chunk *)(m + 1);
+               bcopy(&src->src_meta, cm + i, sizeof(*cm));
+       }
+
+       /* optional metadata */
+       om = (struct sr_meta_opt *)(cm + i);
+       for (i = 0; i < sm->ssdi.ssd_opt_no; i++) {
+               bcopy(&src->src_opt, om + i, sizeof(*om));
+               sr_checksum(sc, om, &om->som_checksum,
+                   sizeof(struct sr_meta_opt_invariant));
+       }
+
+       for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) {
+               src = sd->sd_vol.sv_chunks[i];
+
+               /* skip disks that are offline */
+               if (src->src_meta.scm_status == BIOC_SDOFFLINE)
+                       continue;
+
+               /* calculate metdata checksum for correct chunk */
+               m->ssdi.ssd_chunk_id = i;
+               sr_checksum(sc, m, &m->ssd_checksum,
+                   sizeof(struct sr_meta_invariant));
+
+#ifdef SR_DEBUG
+               DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d "
+                   "chunkid: %d checksum: ",
+                   DEVNAME(sc), src->src_meta.scmi.scm_devname,
+                   m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id);
+
+               if (sr_debug &= SR_D_META)
+                       sr_checksum_print((u_int8_t *)&m->ssd_checksum);
+               DNPRINTF(SR_D_META, "\n");
+               sr_meta_print(m);
+#endif
+
+               /* translate and write to disk */
+               if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) {
+                       printf("%s: could not write metadata to %s\n",
+                           DEVNAME(sc), src->src_devname);
+                       /* restart the meta write */
+                       src->src_meta.scm_status = BIOC_SDOFFLINE;
+                       /* XXX recalculate volume status */
+                       goto restart;
+               }
+       }
+
+       bzero(&wu, sizeof(wu));
+       wu.swu_fake = 1;
+       wu.swu_dis = sd;
+       sd->sd_scsi_sync(&wu);
+
+       free(m, M_DEVBUF);
+       return (0);
+bad:
+       return (1);
+}
+
+int
+sr_meta_read(struct sr_discipline *sd)
+{
+#ifdef SR_DEBUG
+       struct sr_softc         *sc = sd->sd_sc;
+#endif
+       struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
+       struct sr_metadata      *sm;
+       struct sr_chunk         *ch_entry;
+       struct sr_meta_chunk    *cp;
+       struct sr_meta_driver   *s;
+       struct sr_meta_opt      *om;
+       void                    *fm = NULL;
+       int                     no_disk = 0;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc));
+
+       sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO);
+       s = &smd[sd->sd_meta_type];
+       if (sd->sd_meta_type != SR_META_F_NATIVE)
+               fm = malloc(s->smd_size , M_DEVBUF, M_WAITOK | M_ZERO);
+
+       cp = (struct sr_meta_chunk *)(sm + 1);
+       SLIST_FOREACH(ch_entry, cl, src_link) {
+               /* read and translate */
+               if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) {
+                       /* XXX mark disk offline */
+                       DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n",
+                           DEVNAME(sc));
+                       continue;
+               }
+
+               if (sm->ssdi.ssd_magic != SR_MAGIC) {
+                       DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n",
+                           DEVNAME(sc));
+                       continue;
+               }
+
+               /* validate metadata */
+               if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) {
+                       DNPRINTF(SR_D_META, "%s: invalid metadata\n",
+                           DEVNAME(sc));
+                       no_disk = -1;
+                       goto done;
+               }
+
+               /* assume chunk 0 contains metadata */
+               if (no_disk == 0)
+                       bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta));
+
+               bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta));
+
+               if (sm->ssdi.ssd_opt_no > 1)
+                       panic("not yet read > 1 optional metadata members");
+
+               if (sm->ssdi.ssd_opt_no) {
+                       om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) +
+                           sizeof(struct sr_meta_chunk) *
+                           sm->ssdi.ssd_chunk_no);
+                       bcopy(om, &ch_entry->src_opt, sizeof(ch_entry->src_opt));
+
+                       if (om->somi.som_type == SR_OPT_CRYPTO) {
+                               bcopy(&ch_entry->src_opt.somi.som_meta.smm_crypto,
+                                   &sd->mds.mdd_crypto.scr_meta,
+                                   sizeof(sd->mds.mdd_crypto.scr_meta));
+                       }
+
+               }
+
+               cp++;
+               no_disk++;
+       }
+
+       free(sm, M_DEVBUF);
+       if (fm)
+               free(fm, M_DEVBUF);
+
+done:
+       DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc),
+           no_disk);
+       return (no_disk);
+}
+
+int
+sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm,
+    void *fm)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       struct sr_meta_driver   *s;
+       struct sr_meta_chunk    *mc;
+       char                    devname[32];
+       int                     rv = 1;
+       u_int8_t                checksum[MD5_DIGEST_LENGTH];
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm);
+
+       sr_meta_getdevname(sc, dev, devname, sizeof(devname));
+
+       s = &smd[sd->sd_meta_type];
+       if (sd->sd_meta_type != SR_META_F_NATIVE)
+               if (s->smd_validate(sd, sm, fm)) {
+                       printf("%s: invalid foreign metadata\n", DEVNAME(sc));
+                       goto done;
+               }
+
+       /*
+        * at this point all foreign metadata has been translated to the native
+        * format and will be treated just like the native format
+        */
+
+       if (sm->ssdi.ssd_version != SR_META_VERSION) {
+               printf("%s: %s can not read metadata version %d, expected %d\n",
+                   DEVNAME(sc), devname, sm->ssdi.ssd_version,
+                   SR_META_VERSION);
+               goto done;
+       }
+
+       sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant));
+       if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) {
+               printf("%s: invalid metadata checksum\n", DEVNAME(sc));
+               goto done;
+       }
+
+       /* XXX do other checksums */
+
+       /* warn if disk changed order */
+       mc = (struct sr_meta_chunk *)(sm + 1);
+       if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname,
+           sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname)))
+               printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
+                   mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname);
+
+       /* we have meta data on disk */
+       DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n",
+           DEVNAME(sc), devname);
+
+       rv = 0;
+done:
+       return (rv);
+}
+
+int
+sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv,
+    struct sr_metadata_list_head *mlh)
+{
+       struct bdevsw           *bdsw;
+       struct disklabel        label;
+       struct sr_metadata      *md;
+       struct sr_discipline    *fake_sd;
+       struct sr_metadata_list *mle;
+       char                    devname[32];
+       dev_t                   dev, devr;
+       int                     error, i, majdev;
+       int                     rv = SR_META_NOTCLAIMED;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc));
+
+       majdev = findblkmajor(dv);
+       if (majdev == -1)
+               goto done;
+       dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART);
+       bdsw = &bdevsw[majdev];
+
+       /*
+        * The devices are being opened with S_IFCHR instead of
+        * S_IFBLK so that the SCSI mid-layer does not whine when
+        * media is not inserted in certain devices like zip drives
+        * and such.
+        */
+
+       /* open device */
+       error = (*bdsw->d_open)(dev, FREAD, S_IFCHR, curproc);
+       if (error) {
+               DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
+                   "failed\n" , DEVNAME(sc));
+               goto done;
+       }
+
+       /* get disklabel */
+       error = (*bdsw->d_ioctl)(dev, DIOCGDINFO, (void *)&label,
+           FREAD, curproc);
+       if (error) {
+               DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl "
+                   "failed\n", DEVNAME(sc));
+               error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
+               goto done;
+       }
+
+       /* we are done, close device */
+       error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
+       if (error) {
+               DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close "
+                   "failed\n", DEVNAME(sc));
+               goto done;
+       }
+
+       md = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
+       if (md == NULL) {
+               printf("%s: not enough memory for metadata buffer\n",
+                   DEVNAME(sc));
+               goto done;
+       }
+
+       /* create fake sd to use utility functions */
+       fake_sd = malloc(sizeof(struct sr_discipline) , M_DEVBUF, M_ZERO);
+       if (fake_sd == NULL) {
+               printf("%s: not enough memory for fake discipline\n",
+                   DEVNAME(sc));
+               goto nosd;
+       }
+       fake_sd->sd_sc = sc;
+       fake_sd->sd_meta_type = SR_META_F_NATIVE;
+
+       for (i = 0; i < MAXPARTITIONS; i++) {
+               if (label.d_partitions[i].p_fstype != FS_RAID)
+                       continue;
+
+               /* open partition */
+               devr = MAKEDISKDEV(majdev, dv->dv_unit, i);
+               error = (*bdsw->d_open)(devr, FREAD, S_IFCHR, curproc);
+               if (error) {
+                       DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
+                           "open failed, partition %d\n",
+                           DEVNAME(sc), i);
+                       continue;
+               }
+
+               if (sr_meta_native_read(fake_sd, devr, md, NULL)) {
+                       printf("%s: native bootprobe could not read native "
+                           "metadata\n", DEVNAME(sc));
+                       continue;
+               }
+
+               /* are we a softraid partition? */
+               sr_meta_getdevname(sc, devr, devname, sizeof(devname));
+               if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) {
+                       if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) {
+                               DNPRINTF(SR_D_META, "%s: don't save %s\n",
+                                   DEVNAME(sc), devname);
+                       } else {
+                               /* XXX fix M_WAITOK, this is boot time */
+                               mle = malloc(sizeof(*mle), M_DEVBUF,
+                                   M_WAITOK | M_ZERO);
+                               bcopy(md, &mle->sml_metadata,
+                                   SR_META_SIZE * 512);
+                               mle->sml_mm = devr;
+                               SLIST_INSERT_HEAD(mlh, mle, sml_link);
+                               rv = SR_META_CLAIMED;
+                       }
+               }
+
+               /* we are done, close partition */
+               error = (*bdsw->d_close)(devr, FREAD, S_IFCHR, curproc);
+               if (error) {
+                       DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
+                           "close failed\n", DEVNAME(sc));
+                       continue;
+               }
+       }
+
+       free(fake_sd, M_DEVBUF);
+nosd:
+       free(md, M_DEVBUF);
+done:
+       return (rv);
+}
+
+int
+sr_boot_assembly(struct sr_softc *sc)
+{
+       struct device           *dv;
+       struct sr_metadata_list_head mlh;
+       struct sr_metadata_list *mle, *mle2;
+       struct sr_metadata      *m1, *m2;
+       struct bioc_createraid  bc;
+       int                     rv = 0, no_dev;
+       dev_t                   *dt = NULL;
+
+       DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
+
+       SLIST_INIT(&mlh);
+
+       TAILQ_FOREACH(dv, &alldevs, dv_list) {
+               if (dv->dv_class != DV_DISK)
+                       continue;
+
+               /* XXX is there  a better way of excluding some devices? */
+               if (!strncmp(dv->dv_xname, "fd", 2) ||
+                   !strncmp(dv->dv_xname, "cd", 2) ||
+                   !strncmp(dv->dv_xname, "rx", 2))
+                       continue;
+
+               /* native softraid uses partitions */
+               if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED)
+                       continue;
+
+               /* probe non-native disks */
+       }
+
+       /*
+        * XXX poor mans hack that doesn't keep disks in order and does not
+        * roam disks correctly.  replace this with something smarter that
+        * orders disks by volid, chunkid and uuid.
+        */
+       dt = malloc(BIOC_CRMAXLEN, M_DEVBUF, M_WAITOK);
+       SLIST_FOREACH(mle, &mlh, sml_link) {
+               /* chunk used already? */
+               if (mle->sml_used)
+                       continue;
+
+               no_dev = 0;
+               bzero(dt, BIOC_CRMAXLEN);
+               SLIST_FOREACH(mle2, &mlh, sml_link) {
+                       /* chunk used already? */
+                       if (mle2->sml_used)
+                               continue;
+
+                       m1 = (struct sr_metadata *)&mle->sml_metadata;
+                       m2 = (struct sr_metadata *)&mle2->sml_metadata;
+
+                       /* are we the same volume? */
+                       if (m1->ssdi.ssd_volid != m2->ssdi.ssd_volid)
+                               continue;
+
+                       /* same uuid? */
+                       if (bcmp(&m1->ssdi.ssd_uuid, &m2->ssdi.ssd_uuid,
+                           sizeof(m1->ssdi.ssd_uuid)))
+                               continue;
+
+                       /* sanity */
+                       if (dt[m2->ssdi.ssd_chunk_id]) {
+                               printf("%s: chunk id already in use; can not "
+                                   "assemble volume\n", DEVNAME(sc));
+                               goto unwind;
+                       }
+                       dt[m2->ssdi.ssd_chunk_id] = mle2->sml_mm;
+                       no_dev++;
+                       mle2->sml_used = 1;
+               }
+               if (m1->ssdi.ssd_chunk_no != no_dev) {
+                       printf("%s: not assembling partial disk that used to "
+                           "be volume %d\n", DEVNAME(sc),
+                           m1->ssdi.ssd_volid);
+                       continue;
+               }
+
+               bzero(&bc, sizeof(bc));
+               bc.bc_level = m1->ssdi.ssd_level;
+               bc.bc_dev_list_len = no_dev * sizeof(dev_t);
+               bc.bc_dev_list = dt;
+               bc.bc_flags = BIOC_SCDEVT;
+               sr_ioctl_createraid(sc, &bc, 0);
+               rv++;
+       }
+
+       /* done with metadata */
+unwind:
+       for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) {
+               mle2 = SLIST_NEXT(mle, sml_link);
+               free(mle, M_DEVBUF);
+       }
+       SLIST_INIT(&mlh);
+
+       if (dt)
+               free(dt, M_DEVBUF);
+
+       return (rv);
+}
+
+int
+sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry)
+{
+       struct disklabel        label;
+       char                    *devname;
+       int                     error, part;
+       daddr64_t               size;
+       struct bdevsw           *bdsw;
+       dev_t                   dev;
 
-int            sr_match(struct device *, void *, void *);
-void           sr_attach(struct device *, struct device *, void *);
-int            sr_detach(struct device *, int);
-int            sr_activate(struct device *, enum devact);
+       DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n",
+          DEVNAME(sc), ch_entry->src_devname);
 
-struct cfattach softraid_ca = {
-       sizeof(struct sr_softc), sr_match, sr_attach, sr_detach,
-       sr_activate
-};
+       dev = ch_entry->src_dev_mm;
+       devname = ch_entry->src_devname;
+       bdsw = bdevsw_lookup(dev);
+       part = DISKPART(dev);
 
-struct cfdriver softraid_cd = {
-       NULL, "softraid", DV_DULL
-};
+       /* get disklabel */
+       error = bdsw->d_ioctl(dev, DIOCGDINFO, (void *)&label, 0, NULL);
+       if (error) {
+               DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n",
+                   DEVNAME(sc), devname);
+               goto unwind;
+       }
 
-int                    sr_scsi_cmd(struct scsi_xfer *);
-void                   sr_minphys(struct buf *bp);
-void                   sr_copy_internal_data(struct scsi_xfer *,
-                           void *, size_t);
-int                    sr_scsi_ioctl(struct scsi_link *, u_long,
-                           caddr_t, int, struct proc *);
-int                    sr_ioctl(struct device *, u_long, caddr_t);
-int                    sr_ioctl_inq(struct sr_softc *, struct bioc_inq *);
-int                    sr_ioctl_vol(struct sr_softc *, struct bioc_vol *);
-int                    sr_ioctl_disk(struct sr_softc *, struct bioc_disk *);
-int                    sr_ioctl_setstate(struct sr_softc *,
-                           struct bioc_setstate *);
-int                    sr_ioctl_createraid(struct sr_softc *,
-                           struct bioc_createraid *, int);
-int                    sr_ioctl_deleteraid(struct sr_softc *,
-                           struct bioc_deleteraid *);
-int                    sr_open_chunks(struct sr_softc *,
-                           struct sr_chunk_head *, dev_t *, int);
-int                    sr_read_meta(struct sr_discipline *);
-int                    sr_create_chunk_meta(struct sr_softc *,
-                           struct sr_chunk_head *);
-void                   sr_unwind_chunks(struct sr_softc *,
-                           struct sr_chunk_head *);
-void                   sr_free_discipline(struct sr_discipline *);
-void                   sr_shutdown_discipline(struct sr_discipline *);
+       /* make sure the partition is of the right type */
+       if (label.d_partitions[part].p_fstype != FS_RAID) {
+               DNPRINTF(SR_D_META,
+                   "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc) ,
+                       devname,
+                   label.d_partitions[part].p_fstype);
+               goto unwind;
+       }
 
-/* utility functions */
-void                   sr_shutdown(void *);
-void                   sr_get_uuid(struct sr_uuid *);
-void                   sr_print_uuid(struct sr_uuid *, int);
-u_int32_t              sr_checksum(char *, u_int32_t *, u_int32_t);
-int                    sr_clear_metadata(struct sr_discipline *);
-int                    sr_save_metadata(struct sr_discipline *, u_int32_t);
-int                    sr_boot_assembly(struct sr_softc *);
-int                    sr_already_assembled(struct sr_discipline *);
-int                    sr_validate_metadata(struct sr_softc *, dev_t,
-                           struct sr_metadata *);
+       size = DL_GETPSIZE(&label.d_partitions[part]) -
+           SR_META_SIZE - SR_META_OFFSET;
+       if (size <= 0) {
+               DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc),
+                   devname);
+               goto unwind;
+       }
+       ch_entry->src_size = size;
 
-/* don't include these on RAMDISK */
-#ifndef SMALL_KERNEL
-void                   sr_refresh_sensors(void *);
-int                    sr_create_sensors(struct sr_discipline *);
-void                   sr_delete_sensors(struct sr_discipline *);
+       DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc),
+           devname, size);
+
+       return (SR_META_F_NATIVE);
+unwind:
+       DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc),
+           devname ? devname : "nodev");
+       return (SR_META_F_INVALID);
+}
+
+int
+sr_meta_native_attach(struct sr_discipline *sd, int force)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
+       struct sr_metadata      *md = NULL;
+       struct sr_chunk         *ch_entry;
+       struct sr_uuid          uuid;
+       int                     sr, not_sr, rv = 1, d, expected = -1;
+
+       DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc));
+
+       md = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO);
+       if (md == NULL) {
+               printf("%s: not enough memory for metadata buffer\n",
+                   DEVNAME(sc));
+               goto bad;
+       }
+
+       bzero(&uuid, sizeof uuid);
+
+       sr = not_sr = d = 0;
+       SLIST_FOREACH(ch_entry, cl, src_link) {
+               if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) {
+                       printf("%s: could not read native metadata\n",
+                           DEVNAME(sc));
+                       goto bad;
+               }
+
+               if (md->ssdi.ssd_magic == SR_MAGIC) {
+                       sr++;
+                       if (d == 0) {
+                               bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid);
+                               expected = md->ssdi.ssd_chunk_no;
+                               continue;
+                       } else if (bcmp(&md->ssdi.ssd_uuid, &uuid,
+                           sizeof uuid)) {
+                               printf("%s: not part of the same volume\n",
+                                   DEVNAME(sc));
+                               goto bad;
+                       }
+               } else
+                       not_sr++;
+
+               d++;
+       }
+
+       if (sr && not_sr) {
+               printf("%s: not all chunks are of the native metadata format",
+                    DEVNAME(sc));
+               goto bad;
+       }
+       if (expected != sr && !force && expected != -1) {
+               /* XXX make this smart so that we can bring up degraded disks */
+               printf("%s: not all chunks were provided\n", DEVNAME(sc));
+               goto bad;
+       }
+
+       rv = 0;
+bad:
+       if (md)
+               free(md, M_DEVBUF);
+       return (rv);
+}
+
+int
+sr_meta_native_read(struct sr_discipline *sd, dev_t dev,
+    struct sr_metadata *md, void *fm)
+{
+#ifdef SR_DEBUG
+       struct sr_softc         *sc = sd->sd_sc;
 #endif
+       DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n",
+           DEVNAME(sc), dev, md);
+
+       return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
+           B_READ));
+}
 
+int
+sr_meta_native_write(struct sr_discipline *sd, dev_t dev,
+    struct sr_metadata *md, void *fm)
+{
 #ifdef SR_DEBUG
-void                   sr_print_metadata(struct sr_metadata *);
-#else
-#define                        sr_print_metadata(m)
+       struct sr_softc         *sc = sd->sd_sc;
 #endif
+       DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n",
+           DEVNAME(sc), dev, md);
 
-struct pool sr_uiopl;
-struct pool sr_iovpl;
+       return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET,
+           B_WRITE));
+}
 
 struct scsi_adapter sr_switch = {
        sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl
@@ -142,23 +1136,9 @@ struct scsi_device sr_dev = {
        NULL, NULL, NULL, NULL
 };
 
-void
-sr_init(void)
-{
-       pool_init(&sr_uiopl, sizeof(struct uio), 0, 0, 0, "sr_uiopl", NULL);
-       pool_init(&sr_iovpl, sizeof(struct iovec), 0, 0, 0, "sr_iovpl", NULL);
-}
-
 int
 sr_match(struct device *parent, void *match, void *aux)
 {
-       static int called = 0;
-
-       if (!called) {
-               sr_init();
-               called = 1;
-       }
-
        return (1);
 }
 
@@ -219,7 +1199,7 @@ sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size)
 }
 
 int
-sr_alloc_ccb(struct sr_discipline *sd)
+sr_ccb_alloc(struct sr_discipline *sd)
 {
        struct sr_ccb           *ccb;
        int                     i;
@@ -227,7 +1207,7 @@ sr_alloc_ccb(struct sr_discipline *sd)
        if (!sd)
                return (1);
 
-       DNPRINTF(SR_D_CCB, "%s: sr_alloc_ccb\n", DEVNAME(sd->sd_sc));
+       DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc));
 
        if (sd->sd_ccb)
                return (1);
@@ -238,24 +1218,24 @@ sr_alloc_ccb(struct sr_discipline *sd)
        for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) {
                ccb = &sd->sd_ccb[i];
                ccb->ccb_dis = sd;
-               sr_put_ccb(ccb);
+               sr_ccb_put(ccb);
        }
 
-       DNPRINTF(SR_D_CCB, "%s: sr_alloc_ccb ccb: %d\n",
+       DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n",
            DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu);
 
        return (0);
 }
 
 void
-sr_free_ccb(struct sr_discipline *sd)
+sr_ccb_free(struct sr_discipline *sd)
 {
        struct sr_ccb           *ccb;
 
        if (!sd)
                return;
 
-       DNPRINTF(SR_D_CCB, "%s: sr_free_ccb %p\n", DEVNAME(sd->sd_sc), sd);
+       DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd);
 
        while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL)
                TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link);
@@ -265,7 +1245,7 @@ sr_free_ccb(struct sr_discipline *sd)
 }
 
 struct sr_ccb *
-sr_get_ccb(struct sr_discipline *sd)
+sr_ccb_get(struct sr_discipline *sd)
 {
        struct sr_ccb           *ccb;
        int                     s;
@@ -280,19 +1260,19 @@ sr_get_ccb(struct sr_discipline *sd)
 
        splx(s);
 
-       DNPRINTF(SR_D_CCB, "%s: sr_get_ccb: %p\n", DEVNAME(sd->sd_sc),
+       DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc),
            ccb);
 
        return (ccb);
 }
 
 void
-sr_put_ccb(struct sr_ccb *ccb)
+sr_ccb_put(struct sr_ccb *ccb)
 {
        struct sr_discipline    *sd = ccb->ccb_dis;
        int                     s;
 
-       DNPRINTF(SR_D_CCB, "%s: sr_put_ccb: %p\n", DEVNAME(sd->sd_sc),
+       DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc),
            ccb);
 
        s = splbio();
@@ -308,7 +1288,7 @@ sr_put_ccb(struct sr_ccb *ccb)
 }
 
 int
-sr_alloc_wu(struct sr_discipline *sd)
+sr_wu_alloc(struct sr_discipline *sd)
 {
        struct sr_workunit      *wu;
        int                     i, no_wu;
@@ -316,7 +1296,7 @@ sr_alloc_wu(struct sr_discipline *sd)
        if (!sd)
                return (1);
 
-       DNPRINTF(SR_D_WU, "%s: sr_alloc_wu %p %d\n", DEVNAME(sd->sd_sc),
+       DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc),
            sd, sd->sd_max_wu);
 
        if (sd->sd_wu)
@@ -333,21 +1313,21 @@ sr_alloc_wu(struct sr_discipline *sd)
        for (i = 0; i < no_wu; i++) {
                wu = &sd->sd_wu[i];
                wu->swu_dis = sd;
-               sr_put_wu(wu);
+               sr_wu_put(wu);
        }
 
        return (0);
 }
 
 void
-sr_free_wu(struct sr_discipline *sd)
+sr_wu_free(struct sr_discipline *sd)
 {
        struct sr_workunit      *wu;
 
        if (!sd)
                return;
 
-       DNPRINTF(SR_D_WU, "%s: sr_free_wu %p\n", DEVNAME(sd->sd_sc), sd);
+       DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd);
 
        while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL)
                TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link);
@@ -361,14 +1341,14 @@ sr_free_wu(struct sr_discipline *sd)
 }
 
 void
-sr_put_wu(struct sr_workunit *wu)
+sr_wu_put(struct sr_workunit *wu)
 {
        struct sr_discipline    *sd = wu->swu_dis;
        struct sr_ccb           *ccb;
 
        int                     s;
 
-       DNPRINTF(SR_D_WU, "%s: sr_put_wu: %p\n", DEVNAME(sd->sd_sc), wu);
+       DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu);
 
        s = splbio();
 
@@ -385,7 +1365,7 @@ sr_put_wu(struct sr_workunit *wu)
 
        while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) {
                TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link);
-               sr_put_ccb(ccb);
+               sr_ccb_put(ccb);
        }
        TAILQ_INIT(&wu->swu_ccb);
 
@@ -396,7 +1376,7 @@ sr_put_wu(struct sr_workunit *wu)
 }
 
 struct sr_workunit *
-sr_get_wu(struct sr_discipline *sd)
+sr_wu_get(struct sr_discipline *sd)
 {
        struct sr_workunit      *wu;
        int                     s;
@@ -412,7 +1392,7 @@ sr_get_wu(struct sr_discipline *sd)
 
        splx(s);
 
-       DNPRINTF(SR_D_WU, "%s: sr_get_wu: %p\n", DEVNAME(sd->sd_sc), wu);
+       DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu);
 
        return (wu);
 }
@@ -447,11 +1427,11 @@ sr_scsi_cmd(struct scsi_xfer *xs)
 
        if (sd->sd_deleted) {
                printf("%s: %s device is being deleted, failing io\n",
-                   DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
+                   DEVNAME(sc), sd->sd_meta->ssd_devname);
                goto stuffup;
        }
 
-       if ((wu = sr_get_wu(sd)) == NULL) {
+       if ((wu = sr_wu_get(sd)) == NULL) {
                DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc));
                return (TRY_AGAIN_LATER);
        }
@@ -537,7 +1517,7 @@ complete:
        scsi_done(xs);
        splx(s);
        if (wu)
-               sr_put_wu(wu);
+               sr_wu_put(wu);
        return (COMPLETE);
 }
 int
@@ -618,7 +1598,7 @@ sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi)
                /* XXX this will not work when we stagger disciplines */
                if (sc->sc_dis[i]) {
                        vol++;
-                       disk += sc->sc_dis[i]->sd_vol.sv_meta.svm_no_chunk;
+                       disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no;
                }
 
        strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev));
@@ -632,7 +1612,7 @@ int
 sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
 {
        int                     i, vol, rv = EINVAL;
-       struct sr_volume        *sv;
+       struct sr_discipline    *sd;
 
        for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
                /* XXX this will not work when we stagger disciplines */
@@ -641,14 +1621,14 @@ sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
                if (vol != bv->bv_volid)
                        continue;
 
-               sv = &sc->sc_dis[i]->sd_vol;
-               bv->bv_status = sv->sv_meta.svm_status;
-               bv->bv_size = sv->sv_meta.svm_size << DEV_BSHIFT;
-               bv->bv_level = sv->sv_meta.svm_level;
-               bv->bv_nodisk = sv->sv_meta.svm_no_chunk;
-               strlcpy(bv->bv_dev, sv->sv_meta.svm_devname,
+               sd = sc->sc_dis[i];
+               bv->bv_status = sd->sd_vol_status;
+               bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT;
+               bv->bv_level = sd->sd_meta->ssdi.ssd_level;
+               bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
+               strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname,
                    sizeof(bv->bv_dev));
-               strlcpy(bv->bv_vendor, sv->sv_meta.svm_vendor,
+               strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor,
                    sizeof(bv->bv_vendor));
                rv = 0;
                break;
@@ -671,15 +1651,15 @@ sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd)
                        continue;
 
                id = bd->bd_diskid;
-               if (id >= sc->sc_dis[i]->sd_vol.sv_meta.svm_no_chunk)
+               if (id >= sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no)
                        break;
 
                src = sc->sc_dis[i]->sd_vol.sv_chunks[id];
                bd->bd_status = src->src_meta.scm_status;
-               bd->bd_size = src->src_meta.scm_size << DEV_BSHIFT;
+               bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT;
                bd->bd_channel = vol;
                bd->bd_target = id;
-               strlcpy(bd->bd_vendor, src->src_meta.scm_devname,
+               strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname,
                    sizeof(bd->bd_vendor));
                rv = 0;
                break;
@@ -705,7 +1685,7 @@ sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs)
                        continue;
 
                sd = sc->sc_dis[vol];
-               if (bs->bs_target >= sd->sd_vol.sv_meta.svm_no_chunk)
+               if (bs->bs_target >= sd->sd_meta->ssdi.ssd_chunk_no)
                        goto done;
 
                switch (bs->bs_status) {
@@ -774,47 +1754,44 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
        no_chunk = bc->bc_dev_list_len / sizeof(dev_t);
        cl = &sd->sd_vol.sv_chunk_list;
        SLIST_INIT(cl);
-       if (sr_open_chunks(sc, cl, dt, no_chunk))
-               goto unwind;
-
-       /* in memory copy of metadata */
-       sd->sd_meta = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_WAITOK | M_ZERO);
 
        /* we have a valid list now create an array index */
        sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * no_chunk,
            M_DEVBUF, M_WAITOK | M_ZERO);
 
+       sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk);
+       if (sd->sd_meta_type == SR_META_F_INVALID) {
+               printf("%s: invalid metadata format\n", DEVNAME(sc));
+               goto unwind;
+       }
+
+       if (sr_meta_attach(sd, bc->bc_flags & BIOC_SCFORCE)) {
+               printf("%s: can't attach metadata type %d\n", DEVNAME(sc),
+                   sd->sd_meta_type);
+               goto unwind;
+       }
+
        /* force the raid volume by clearing metadata region */
        if (bc->bc_flags & BIOC_SCFORCE) {
                /* make sure disk isn't up and running */
-               if (sr_read_meta(sd))
+               if (sr_meta_read(sd))
                        if (sr_already_assembled(sd)) {
                                printf("%s: disk ", DEVNAME(sc));
-                               sr_print_uuid(&sd->sd_meta->ssd_uuid, 0);
+                               sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
                                printf(" is currently in use; can't force "
                                    "create\n");
                                goto unwind;
                        }
 
-               /* zero out pointers and metadata again to create disk */
-               bzero(sd->sd_vol.sv_chunks,
-                   sizeof(struct sr_chunk *) * no_chunk);
-               bzero(sd->sd_meta, SR_META_SIZE  * 512);
-
-               if (sr_clear_metadata(sd)) {
+               if (sr_meta_clear(sd)) {
                        printf("%s: failed to clear metadata\n", DEVNAME(sc));
                        goto unwind;
                }
        }
 
-       if ((no_meta = sr_read_meta(sd)) == 0) {
-               /* fill out chunk array */
-               i = 0;
-               SLIST_FOREACH(ch_entry, cl, src_link)
-                       sd->sd_vol.sv_chunks[i++] = ch_entry;
-
+       if ((no_meta = sr_meta_read(sd)) == 0) {
                /* fill out all chunk metadata */
-               sr_create_chunk_meta(sc, cl);
+               sr_meta_chunks_create(sc, cl);
                ch_entry = SLIST_FIRST(cl);
 
                /* no metadata available */
@@ -830,13 +1807,13 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
                         */
                        strip_size = MAXPHYS;
                        vol_size =
-                           ch_entry->src_meta.scm_coerced_size * no_chunk;
+                           ch_entry->src_meta.scmi.scm_coerced_size * no_chunk;
                        break;
                case 1:
                        if (no_chunk < 2)
                                goto unwind;
                        strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
-                       vol_size = ch_entry->src_meta.scm_coerced_size;
+                       vol_size = ch_entry->src_meta.scmi.scm_coerced_size;
                        break;
 #ifdef CRYPTO
                case 'C':
@@ -861,7 +1838,7 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
                                goto unwind;
 
                        strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name));
-                       vol_size = ch_entry->src_meta.scm_size;
+                       vol_size = ch_entry->src_meta.scmi.scm_size;
 
                        sr_crypto_create_keys(sd);
 
@@ -875,23 +1852,26 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
                DNPRINTF(SR_D_IOCTL,
                    "%s: sr_ioctl_createraid: vol_size: %lld\n",
                    DEVNAME(sc), vol_size);
-               sd->sd_vol.sv_meta.svm_no_chunk = no_chunk;
-               sd->sd_vol.sv_meta.svm_size = vol_size;
-               sd->sd_vol.sv_meta.svm_status = BIOC_SVONLINE;
-               sd->sd_vol.sv_meta.svm_level = bc->bc_level;
-               sd->sd_vol.sv_meta.svm_strip_size = strip_size;
-               strlcpy(sd->sd_vol.sv_meta.svm_vendor, "OPENBSD",
-                   sizeof(sd->sd_vol.sv_meta.svm_vendor));
-               snprintf(sd->sd_vol.sv_meta.svm_product,
-                   sizeof(sd->sd_vol.sv_meta.svm_product), "SR %s",
+               sd->sd_meta->ssdi.ssd_chunk_no = no_chunk;
+               sd->sd_meta->ssdi.ssd_size = vol_size;
+               sd->sd_vol_status = BIOC_SVONLINE;
+               sd->sd_meta->ssdi.ssd_level = bc->bc_level;
+               sd->sd_meta->ssdi.ssd_strip_size = strip_size;
+               strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD",
+                   sizeof(sd->sd_meta->ssdi.ssd_vendor));
+               snprintf(sd->sd_meta->ssdi.ssd_product,
+                   sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s",
                    sd->sd_name);
-               snprintf(sd->sd_vol.sv_meta.svm_revision,
-                   sizeof(sd->sd_vol.sv_meta.svm_revision), "%03d",
+               snprintf(sd->sd_meta->ssdi.ssd_revision,
+                   sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d",
                    SR_META_VERSION);
 
                sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE;
                updatemeta = 1;
        } else if (no_meta == no_chunk) {
+               if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY)
+                       printf("%s: %s was not shutdown properly\n",
+                           DEVNAME(sc), sd->sd_meta->ssd_devname);
                if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) {
                        DNPRINTF(SR_D_META, "%s: disk not auto assembled from "
                            "metadata\n", DEVNAME(sc));
@@ -899,7 +1879,7 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
                }
                if (sr_already_assembled(sd)) {
                        printf("%s: disk ", DEVNAME(sc));
-                       sr_print_uuid(&sd->sd_meta->ssd_uuid, 0);
+                       sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
                        printf(" already assembled\n");
                        goto unwind;
                }
@@ -931,10 +1911,14 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
                DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n",
                    DEVNAME(sc));
                updatemeta = 0;
+       } else if (no_meta == -1) {
+               printf("%s: one of the chunks has corrupt metadata; aborting "
+                   "assembly\n", DEVNAME(sc));
+               goto unwind;
        } else {
                if (sr_already_assembled(sd)) {
                        printf("%s: disk ", DEVNAME(sc));
-                       sr_print_uuid(&sd->sd_meta->ssd_uuid, 0);
+                       sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0);
                        printf(" already assembled; will not partial "
                            "assemble it\n");
                        goto unwind;
@@ -950,8 +1934,8 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
                /* fill out discipline members */
                sd->sd_type = SR_MD_RAID0;
                sd->sd_max_ccb_per_wu =
-                   (MAXPHYS / sd->sd_vol.sv_meta.svm_strip_size + 1) *
-                   SR_RAID0_NOWU * sd->sd_vol.sv_meta.svm_no_chunk;
+                   (MAXPHYS / sd->sd_meta->ssdi.ssd_strip_size + 1) *
+                   SR_RAID0_NOWU * sd->sd_meta->ssdi.ssd_chunk_no;
                sd->sd_max_wu = SR_RAID0_NOWU;
 
                /* setup discipline pointers */
@@ -1058,16 +2042,29 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
        rv = 0;
        if (updatemeta) {
                /* fill out remaining volume metadata */
-               sd->sd_vol.sv_meta.svm_volid = vol;
-               strlcpy(sd->sd_vol.sv_meta.svm_devname, dev->dv_xname,
-                   sizeof(sd->sd_vol.sv_meta.svm_devname));
+               sd->sd_meta->ssdi.ssd_volid = vol;
+               strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
+                   sizeof(sd->sd_meta->ssd_devname));
+               sr_meta_init(sd, cl);
+       } else {
+               if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname,
+                   sizeof(dev->dv_xname))) {
+                       printf("%s: volume %s is roaming, it used to be %s, "
+                           "updating metadata\n",
+                           DEVNAME(sc), dev->dv_xname,
+                           sd->sd_meta->ssd_devname);
+
+                       sd->sd_meta->ssdi.ssd_volid = vol;
+                       strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname,
+                           sizeof(sd->sd_meta->ssd_devname));
+               }
        }
 
        /* save metadata to disk */
-       rv = sr_save_metadata(sd, SR_VOL_DIRTY);
+       rv = sr_meta_save(sd, SR_META_DIRTY);
 
 #ifndef SMALL_KERNEL
-       if (sr_create_sensors(sd))
+       if (sr_sensors_create(sd))
                printf("%s: unable to create sensor for %s\n", DEVNAME(sc),
                    dev->dv_xname);
        else
@@ -1080,7 +2077,7 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user)
        return (rv);
 
 unwind:
-       sr_shutdown_discipline(sd);
+       sr_discipline_shutdown(sd);
 
        return (rv);
 }
@@ -1090,331 +2087,39 @@ sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr)
 {
        struct sr_discipline    *sd = NULL;
        int                     rv = 1;
-       int                     i;
-
-       DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc),
-           dr->bd_dev);
-
-       for (i = 0; i < SR_MAXSCSIBUS; i++)
-               if (sc->sc_dis[i]) {
-                       if (!strncmp(sc->sc_dis[i]->sd_vol.sv_meta.svm_devname, dr->bd_dev,
-                           sizeof(sc->sc_dis[i]->sd_vol.sv_meta.svm_devname))) {
-                               sd = sc->sc_dis[i];
-                               break;
-                       }
-               }
-
-       if (sd == NULL)
-               goto bad;
-
-       sd->sd_deleted = 1;
-       sd->sd_meta->ssd_flags = BIOC_SCNOAUTOASSEMBLE;
-       sr_shutdown(sd);
-
-       rv = 0;
-bad:
-       return (rv);
-}
-
-int
-sr_open_chunks(struct sr_softc *sc, struct sr_chunk_head *cl, dev_t *dt,
-    int no_chunk)
-{
-       struct sr_chunk         *ch_entry, *ch_prev = NULL;
-       struct disklabel        label;
-       struct bdevsw           *bdsw;
-       char                    *name;
-       int                     maj, unit, part, i, error;
-       daddr64_t               size;
-       dev_t                   dev;
-
-       DNPRINTF(SR_D_IOCTL, "%s: sr_open_chunks(%d)\n", DEVNAME(sc), no_chunk);
-
-       /* fill out chunk list */
-       for (i = 0; i < no_chunk; i++) {
-               ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF,
-                   M_WAITOK | M_ZERO);
-               /* keep disks in user supplied order */
-               if (ch_prev)
-                       SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link);
-               else
-                       SLIST_INSERT_HEAD(cl, ch_entry, src_link);
-               ch_prev = ch_entry;
-
-               dev = dt[i];
-               maj = major(dev);
-               part = DISKPART(dev);
-               unit = DISKUNIT(dev);
-               bdsw = &bdevsw[maj];
-
-               name = findblkname(maj);
-               if (name == NULL)
-                       goto unwind;
-
-               snprintf(ch_entry->src_devname, sizeof(ch_entry->src_devname),
-                   "%s%d%c", name, unit, part + 'a');
-               name = ch_entry->src_devname;
-
-               /* open device */
-               error = bdsw->d_open(dev, FREAD | FWRITE , S_IFBLK, curproc);
-
-               /* get disklabel */
-               error = bdsw->d_ioctl(dev, DIOCGDINFO, (void *)&label,
-                   0, NULL);
-               if (error) {
-                       printf("%s: %s can't obtain disklabel\n",
-                           DEVNAME(sc), name);
-                       bdsw->d_close(dev, FWRITE, S_IFBLK, curproc);
-                       goto unwind;
-               }
-
-               /* make sure the partition is of the right type */
-               if (label.d_partitions[part].p_fstype != FS_RAID) {
-                       printf("%s: %s partition not of type RAID (%d)\n",
-                           DEVNAME(sc), name,
-                           label.d_partitions[part].p_fstype);
-                       bdsw->d_close(dev, FWRITE, S_IFBLK, curproc);
-                       goto unwind;
-               }
-
-               /* get partition size while accounting for metadata! */
-               ch_entry->src_size = size =
-                   DL_GETPSIZE(&label.d_partitions[part]) -
-                   SR_META_SIZE - SR_META_OFFSET;
-               if (size <= 0) {
-                       printf("%s: %s partition too small\n",
-                           DEVNAME(sc), name);
-                       bdsw->d_close(dev, FWRITE, S_IFBLK, curproc);
-                       goto unwind;
-               }
-
-
-               ch_entry->src_dev_mm = dev; /* major/minor */
-
-               DNPRINTF(SR_D_IOCTL, "%s: found %s size %d\n", DEVNAME(sc),
-                   name, size);
-       }
-
-       return (0);
-unwind:
-       printf("%s: invalid device: %s\n", DEVNAME(sc), name ? name : "nodev");
-       return (1);
-}
-
-int
-sr_read_meta(struct sr_discipline *sd)
-{
-       struct sr_softc         *sc = sd->sd_sc;
-       struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
-       struct sr_metadata      *sm = sd->sd_meta, *m;
-       struct sr_chunk         *ch_entry;
-       struct buf              b;
-       struct sr_vol_meta      *mv;
-       struct sr_chunk_meta    *mc;
-       struct sr_opt_meta      *mo;
-       size_t                  sz = SR_META_SIZE * 512;
-       int                     no_chunk = 0;
-       u_int32_t               volid, ondisk = 0, cid;
-
-       DNPRINTF(SR_D_META, "%s: sr_read_meta\n", DEVNAME(sc));
-
-       m = malloc(sz , M_DEVBUF, M_WAITOK | M_ZERO);
-
-       SLIST_FOREACH(ch_entry, cl, src_link) {
-               bzero(&b, sizeof(b));
-
-               b.b_flags = B_READ;
-               b.b_blkno = SR_META_OFFSET;
-               b.b_bcount = sz;
-               b.b_bufsize = sz;
-               b.b_resid = sz;
-               b.b_data = (void *)m;
-               b.b_error = 0;
-               b.b_proc = curproc;
-               b.b_dev = ch_entry->src_dev_mm;
-               b.b_vp = NULL;
-               b.b_iodone = NULL;
-               LIST_INIT(&b.b_dep);
-               bdevsw_lookup(b.b_dev)->d_strategy(&b);
-               biowait(&b);
-
-               /* XXX mark chunk offline and restart metadata write */
-               if (b.b_flags & B_ERROR) {
-                       printf("%s: %s i/o error on block %lld while reading "
-                           "metadata %d\n", DEVNAME(sc),
-                           ch_entry->src_devname, b.b_blkno, b.b_error);
-                       continue;
-               }
-
-               if (m->ssd_magic != SR_MAGIC)
-                       continue;
-
-               /* validate metadata */
-               if (sr_validate_metadata(sc, ch_entry->src_dev_mm, m)) {
-                       printf("%s: invalid metadata\n", DEVNAME(sc));
-                       no_chunk = -1;
-                       goto bad;
-               }
-
-               mv = (struct sr_vol_meta *)(m + 1);
-               mc = (struct sr_chunk_meta *)(mv + 1);
-
-               /* we asssume that the first chunk has the initial metadata */
-               if (no_chunk++ == 0) {
-                       bcopy(m, sm, sz);
-                       bcopy(m, sd->sd_meta, sizeof(*sd->sd_meta));
-                       bcopy(mv, &sd->sd_vol.sv_meta,
-                           sizeof(sd->sd_vol.sv_meta));
-
-                       volid = m->ssd_vd_volid;
-                       sd->sd_meta_flags = sm->ssd_flags;
-               }
-
-               if (bcmp(&sm->ssd_uuid, &sd->sd_vol.sv_meta.svm_uuid,
-                   sizeof(struct sr_uuid))) {
-                       printf("%s: %s invalid chunk uuid ",
-                           DEVNAME(sc), ch_entry->src_devname);
-                       sr_print_uuid(&sm->ssd_uuid, 0);
-                       printf(", expected ");
-                       sr_print_uuid(&sd->sd_vol.sv_meta.svm_uuid, 1);
-                       no_chunk = -1;
-                       goto bad;
-               }
-
-               /* we have meta data on disk */
-               ch_entry->src_meta_ondisk = 1;
-
-               /* make sure we are part of this vd */
-               if (volid != m->ssd_vd_volid) {
-                       printf("%s: %s invalid volume id %d, expected %d\n",
-                           DEVNAME(sc), ch_entry->src_devname,
-                           volid, m->ssd_vd_volid);
-                       no_chunk = -1;
-                       goto bad;
-               }
-
-               if (m->ssd_chunk_id > m->ssd_chunk_no) {
-                       printf("%s: %s chunk id out of range %d, expected "
-                           "lower than %d\n", DEVNAME(sc),
-                           ch_entry->src_devname,
-                           m->ssd_chunk_id, m->ssd_chunk_no);
-                       no_chunk = -1;
-                       goto bad;
-               }
-
-               if (sd->sd_vol.sv_chunks[m->ssd_chunk_id]) {
-                       printf("%s: %s chunk id %d already in use\n",
-                           DEVNAME(sc), ch_entry->src_devname,
-                           m->ssd_chunk_id);
-                       no_chunk = -1;
-                       goto bad;
-               }
-
-               sd->sd_vol.sv_chunks[m->ssd_chunk_id] = ch_entry;
-               bcopy(mc + m->ssd_chunk_id, &ch_entry->src_meta,
-                   sizeof(ch_entry->src_meta));
-
-               if (ondisk == 0) {
-                       ondisk = m->ssd_ondisk;
-                       cid = m->ssd_chunk_id;
-               }
-
-               if (m->ssd_ondisk != ondisk) {
-                       printf("%s: %s chunk id %d contains stale metadata\n",
-                           DEVNAME(sc), ch_entry->src_devname,
-                           m->ssd_ondisk < ondisk ? m->ssd_chunk_id : cid);
-                       no_chunk = -1;
-                       goto bad;
-               }
-
-               /* XXX fix this check, sd_type isnt filled in yet */
-               if (mv->svm_level == 'C') {
-                       mo = (struct sr_opt_meta *)(mc + mv->svm_no_chunk);
-                       if (m->ssd_chunk_id > 1) {
-                               no_chunk = -1;
-                               goto bad;
-                       }
-                       bcopy(&mo->som_meta,
-                           &sd->mds.mdd_crypto.scr_meta,
-                           sizeof(sd->mds.mdd_crypto.scr_meta)
-                           );
-               }
-       }
-
-       if (no_chunk != m->ssd_chunk_no) {
-               DNPRINTF(SR_D_META, "%s: not enough chunks supplied\n",
-                   DEVNAME(sc));
-               no_chunk = -1;
-               goto bad;
-       }
-
-       DNPRINTF(SR_D_META, "%s: sr_read_meta: found %d elements\n",
-           DEVNAME(sc), no_chunk);
-
-       sr_print_metadata(m);
-
-bad:
-       /* return nr of chunks that contain metadata */
-       free(m, M_DEVBUF);
-       return (no_chunk);
-}
-
-int
-sr_create_chunk_meta(struct sr_softc *sc, struct sr_chunk_head *cl)
-{
-       struct sr_chunk         *ch_entry;
-       struct sr_uuid          uuid;
-       int                     rv = 1, cid = 0;
-       char                    *name;
-       u_int64_t               max_chunk_sz = 0, min_chunk_sz;
-
-       DNPRINTF(SR_D_IOCTL, "%s: sr_create_chunk_meta\n", DEVNAME(sc));
-
-       sr_get_uuid(&uuid);
-
-       /* fill out stuff and get largest chunk size while looping */
-       SLIST_FOREACH(ch_entry, cl, src_link) {
-               name = ch_entry->src_devname;
-               ch_entry->src_meta.scm_size = ch_entry->src_size;
-               ch_entry->src_meta.scm_chunk_id = cid++;
-               ch_entry->src_meta.scm_status = BIOC_SDONLINE;
-               strlcpy(ch_entry->src_meta.scm_devname, name,
-                   sizeof(ch_entry->src_meta.scm_devname));
-               bcopy(&uuid,  &ch_entry->src_meta.scm_uuid,
-                   sizeof(ch_entry->src_meta.scm_uuid));
+       int                     i;
 
-               if (ch_entry->src_meta.scm_size > max_chunk_sz)
-                       max_chunk_sz = ch_entry->src_meta.scm_size;
-       }
+       DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc),
+           dr->bd_dev);
 
-       /* get smallest chunk size */
-       min_chunk_sz = max_chunk_sz;
-       SLIST_FOREACH(ch_entry, cl, src_link)
-               if (ch_entry->src_meta.scm_size < min_chunk_sz)
-                       min_chunk_sz = ch_entry->src_meta.scm_size;
+       for (i = 0; i < SR_MAXSCSIBUS; i++)
+               if (sc->sc_dis[i]) {
+                       if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, dr->bd_dev,
+                           sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) {
+                               sd = sc->sc_dis[i];
+                               break;
+                       }
+               }
 
-       /* equalize all sizes */
-       SLIST_FOREACH(ch_entry, cl, src_link)
-               ch_entry->src_meta.scm_coerced_size = min_chunk_sz;
+       if (sd == NULL)
+               goto bad;
 
-       /* whine if chunks are not the same size */
-       if (min_chunk_sz != max_chunk_sz)
-               printf("%s: chunk sizes are not equal; up to %llu blocks "
-                   "wasted per chunk\n",
-                   DEVNAME(sc), max_chunk_sz - min_chunk_sz);
+       sd->sd_deleted = 1;
+       sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE;
+       sr_shutdown(sd);
 
        rv = 0;
-
+bad:
        return (rv);
 }
 
 void
-sr_unwind_chunks(struct sr_softc *sc, struct sr_chunk_head *cl)
+sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl)
 {
        struct sr_chunk         *ch_entry, *ch_next;
        dev_t                   dev;
 
-       DNPRINTF(SR_D_IOCTL, "%s: sr_unwind_chunks\n", DEVNAME(sc));
+       DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc));
 
        if (!cl)
                return;
@@ -1424,7 +2129,8 @@ sr_unwind_chunks(struct sr_softc *sc, struct sr_chunk_head *cl)
                ch_next = SLIST_NEXT(ch_entry, src_link);
 
                dev = ch_entry->src_dev_mm;
-
+               DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n",
+                   DEVNAME(sc), ch_entry->src_devname);
                if (dev != NODEV)
                        bdevsw_lookup(dev)->d_close(dev, FWRITE, S_IFBLK,
                            curproc);
@@ -1435,7 +2141,7 @@ sr_unwind_chunks(struct sr_softc *sc, struct sr_chunk_head *cl)
 }
 
 void
-sr_free_discipline(struct sr_discipline *sd)
+sr_discipline_free(struct sr_discipline *sd)
 {
        struct sr_softc         *sc = sd->sd_sc;
        int                     i;
@@ -1443,24 +2149,29 @@ sr_free_discipline(struct sr_discipline *sd)
        if (!sd)
                return;
 
-       DNPRINTF(SR_D_DIS, "%s: sr_free_discipline %s\n",
-           DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
-
+       DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n",
+           DEVNAME(sc),
+           sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
        if (sd->sd_free_resources)
                sd->sd_free_resources(sd);
        if (sd->sd_vol.sv_chunks)
                free(sd->sd_vol.sv_chunks, M_DEVBUF);
-       free(sd, M_DEVBUF);
+       if (sd->sd_meta)
+               free(sd->sd_meta, M_DEVBUF);
+       if (sd->sd_meta_foreign)
+               free(sd->sd_meta_foreign, M_DEVBUF);
 
        for (i = 0; i < SR_MAXSCSIBUS; i++)
                if (sc->sc_dis[i] == sd) {
                        sc->sc_dis[i] = NULL;
                        break;
                }
+
+       free(sd, M_DEVBUF);
 }
 
 void
-sr_shutdown_discipline(struct sr_discipline *sd)
+sr_discipline_shutdown(struct sr_discipline *sd)
 {
        struct sr_softc         *sc = sd->sd_sc;
        int                     s;
@@ -1468,8 +2179,8 @@ sr_shutdown_discipline(struct sr_discipline *sd)
        if (!sd || !sc)
                return;
 
-       DNPRINTF(SR_D_DIS, "%s: sr_shutdown_discipline %s\n",
-           DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
+       DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc),
+           sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev");
 
        s = splbio();
 
@@ -1484,16 +2195,16 @@ sr_shutdown_discipline(struct sr_discipline *sd)
                        break;
 
 #ifndef SMALL_KERNEL
-       sr_delete_sensors(sd);
+       sr_sensors_delete(sd);
 #endif /* SMALL_KERNEL */
 
        if (sd->sd_scsibus_dev)
                config_detach(sd->sd_scsibus_dev, DETACH_FORCE);
 
-       sr_unwind_chunks(sc, &sd->sd_vol.sv_chunk_list);
+       sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list);
 
        if (sd)
-               sr_free_discipline(sd);
+               sr_discipline_free(sd);
 
        splx(s);
 }
@@ -1513,11 +2224,11 @@ sr_raid_inquiry(struct sr_workunit *wu)
        inq.version = 2;
        inq.response_format = 2;
        inq.additional_length = 32;
-       strlcpy(inq.vendor, sd->sd_vol.sv_meta.svm_vendor,
+       strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor,
            sizeof(inq.vendor));
-       strlcpy(inq.product, sd->sd_vol.sv_meta.svm_product,
+       strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product,
            sizeof(inq.product));
-       strlcpy(inq.revision, sd->sd_vol.sv_meta.svm_revision,
+       strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision,
            sizeof(inq.revision));
        sr_copy_internal_data(xs, &inq, sizeof(inq));
 
@@ -1537,16 +2248,16 @@ sr_raid_read_cap(struct sr_workunit *wu)
 
        if (xs->cmd->opcode == READ_CAPACITY) {
                bzero(&rcd, sizeof(rcd));
-               if (sd->sd_vol.sv_meta.svm_size > 0xffffffffllu)
+               if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu)
                        _lto4b(0xffffffff, rcd.addr);
                else
-                       _lto4b(sd->sd_vol.sv_meta.svm_size, rcd.addr);
+                       _lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr);
                _lto4b(512, rcd.length);
                sr_copy_internal_data(xs, &rcd, sizeof(rcd));
                rv = 0;
        } else if (xs->cmd->opcode == READ_CAPACITY_16) {
                bzero(&rcd16, sizeof(rcd16));
-               _lto8b(sd->sd_vol.sv_meta.svm_size, rcd16.addr);
+               _lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr);
                _lto4b(512, rcd16.length);
                sr_copy_internal_data(xs, &rcd16, sizeof(rcd16));
                rv = 0;
@@ -1562,737 +2273,187 @@ sr_raid_tur(struct sr_workunit *wu)
 
        DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc));
 
-       if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) {
+       if (sd->sd_vol_status == BIOC_SVOFFLINE) {
                sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
                sd->sd_scsi_sense.flags = SKEY_NOT_READY;
                sd->sd_scsi_sense.add_sense_code = 0x04;
                sd->sd_scsi_sense.add_sense_code_qual = 0x11;
                sd->sd_scsi_sense.extra_len = 4;
                return (1);
-       } else if (sd->sd_vol.sv_meta.svm_status == BIOC_SVINVALID) {
+       } else if (sd->sd_vol_status == BIOC_SVINVALID) {
                sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT;
                sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR;
                sd->sd_scsi_sense.add_sense_code = 0x05;
                sd->sd_scsi_sense.add_sense_code_qual = 0x00;
                sd->sd_scsi_sense.extra_len = 4;
-               return (1);
-       }
-
-       return (0);
-}
-
-int
-sr_raid_request_sense(struct sr_workunit *wu)
-{
-       struct sr_discipline    *sd = wu->swu_dis;
-       struct scsi_xfer        *xs = wu->swu_xs;
-
-       DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
-           DEVNAME(sd->sd_sc));
-
-       /* use latest sense data */
-       bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
-
-       /* clear sense data */
-       bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
-
-       return (0);
-}
-
-int
-sr_raid_start_stop(struct sr_workunit *wu)
-{
-       struct sr_discipline    *sd = wu->swu_dis;
-       struct scsi_xfer        *xs = wu->swu_xs;
-       struct scsi_start_stop  *ss = (struct scsi_start_stop *)xs->cmd;
-       int                     rv = 1;
-
-       DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
-           DEVNAME(sd->sd_sc));
-
-       if (!ss)
-               return (rv);
-
-       if (ss->byte2 == 0x00) {
-               /* START */
-               if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) {
-                       /* bring volume online */
-                       /* XXX check to see if volume can be brought online */
-                       sd->sd_vol.sv_meta.svm_status = BIOC_SVONLINE;
-               }
-               rv = 0;
-       } else /* XXX is this the check? if (byte == 0x01) */ {
-               /* STOP */
-               if (sd->sd_vol.sv_meta.svm_status == BIOC_SVONLINE) {
-                       /* bring volume offline */
-                       sd->sd_vol.sv_meta.svm_status = BIOC_SVOFFLINE;
-               }
-               rv = 0;
-       }
-
-       return (rv);
-}
-
-int
-sr_raid_sync(struct sr_workunit *wu)
-{
-       struct sr_discipline    *sd = wu->swu_dis;
-       int                     s, rv = 0, ios;
-
-       DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
-
-       /* when doing a fake sync don't coun't the wu */
-       ios = wu->swu_fake ? 0 : 1;
-
-       s = splbio();
-       sd->sd_sync = 1;
-
-       while (sd->sd_wu_pending > ios)
-               if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) {
-                       DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
-                           DEVNAME(sd->sd_sc));
-                       rv = 1;
-                       break;
-               }
-
-       sd->sd_sync = 0;
-       splx(s);
-
-       wakeup(&sd->sd_sync);
-
-       return (rv);
-}
-
-void
-sr_raid_startwu(struct sr_workunit *wu)
-{
-       struct sr_discipline    *sd = wu->swu_dis;
-       struct sr_ccb           *ccb;
-
-       splassert(IPL_BIO);
-
-       if (wu->swu_state == SR_WU_RESTART)
-               /*
-                * no need to put the wu on the pending queue since we
-                * are restarting the io
-                */
-                ;
-       else
-               /* move wu to pending queue */
-               TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
-
-       /* start all individual ios */
-       TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
-               bdevsw_lookup(ccb->ccb_buf.b_dev)->d_strategy(&ccb->ccb_buf);
-       }
-}
-
-u_int32_t
-sr_checksum(char *s, u_int32_t *p, u_int32_t size)
-{
-       u_int32_t               chk = 0;
-       int                     i;
-
-       DNPRINTF(SR_D_MISC, "%s: sr_checksum %p %d\n", s, p, size);
-
-       if (size % sizeof(u_int32_t))
-               return (0); /* 0 is failure */
-
-       for (i = 0; i < size / sizeof(u_int32_t); i++)
-               chk ^= p[i];
-
-       return (chk);
-}
-
-void
-sr_get_uuid(struct sr_uuid *uuid)
-{
-       arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
-}
-
-void
-sr_print_uuid(struct sr_uuid *uuid, int cr)
-{
-       int                     i;
-
-       for (i = 0; i < SR_UUID_MAX; i++)
-               printf("%x%s", uuid->sui_id[i],
-                   i < SR_UUID_MAX - 1 ? ":" : "");
-
-       if (cr)
-               printf("\n");
-}
-
-int
-sr_clear_metadata(struct sr_discipline *sd)
-{
-       struct sr_softc         *sc = sd->sd_sc;
-       struct sr_chunk_head    *cl = &sd->sd_vol.sv_chunk_list;
-       struct sr_chunk         *ch_entry;
-       struct buf              b;
-       size_t                  sz = SR_META_SIZE * 512;
-       void                    *m;
-       int                     rv = 0;
-
-       DNPRINTF(SR_D_META, "%s: sr_clear_metadata\n", DEVNAME(sc));
-
-       m = malloc(sz , M_DEVBUF, M_WAITOK | M_ZERO);
-
-       SLIST_FOREACH(ch_entry, cl, src_link) {
-               bzero(&b, sizeof(b));
-
-               b.b_flags = B_WRITE;
-               b.b_blkno = SR_META_OFFSET;
-               b.b_bcount = sz;
-               b.b_bufsize = sz;
-               b.b_resid = sz;
-               b.b_data = (void *)m;
-               b.b_error = 0;
-               b.b_proc = curproc;
-               b.b_dev = ch_entry->src_dev_mm;
-               b.b_vp = NULL;
-               b.b_iodone = NULL;
-               LIST_INIT(&b.b_dep);
-               bdevsw_lookup(b.b_dev)->d_strategy(&b);
-               biowait(&b);
-
-               if (b.b_flags & B_ERROR) {
-                       printf("%s: %s i/o error on block %lld while clearing "
-                           "metadata %d\n", DEVNAME(sc),
-                           ch_entry->src_devname, b.b_blkno, b.b_error);
-                       rv++;
-                       continue;
-               }
-       }
-
-       free(m, M_DEVBUF);
-       return (rv);
-}
-
-int
-sr_already_assembled(struct sr_discipline *sd)
-{
-       struct sr_softc         *sc = sd->sd_sc;
-       int                     i;
-
-       for (i = 0; i < SR_MAXSCSIBUS; i++)
-               if (sc->sc_dis[i])
-                       if (!bcmp(&sd->sd_meta->ssd_uuid,
-                           &sc->sc_dis[i]->sd_meta->ssd_uuid,
-                           sizeof(sd->sd_meta->ssd_uuid)))
-                               return (1);
-
-       return (0);
-}
-
-void
-sr_save_metadata_callback(void *arg1, void *arg2)
-{
-       struct sr_discipline    *sd = arg1;
-       int                     s;
-
-       s = splbio();
-
-       if (sr_save_metadata(arg1, SR_VOL_DIRTY))
-               printf("%s: save metadata failed\n",
-                   DEVNAME(sd->sd_sc));
-
-       sd->sd_must_flush = 0;
-       splx(s);
-}
-
-int
-sr_save_metadata(struct sr_discipline *sd, u_int32_t flags)
-{
-       struct sr_softc         *sc = sd->sd_sc;
-       struct sr_metadata      *sm = sd->sd_meta;
-       struct sr_vol_meta      *sv = &sd->sd_vol.sv_meta, *im_sv;
-       struct sr_chunk_meta    *im_sc;
-       struct sr_opt_meta      *im_so;
-       struct sr_chunk         *src;
-       struct buf              b;
-       struct sr_workunit      wu;
-       int                     i, rv = 1, ch = 0, no_chunk, sz_opt;
-       size_t                  sz = SR_META_SIZE * 512;
-
-       DNPRINTF(SR_D_META, "%s: sr_save_metadata %s\n",
-           DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
-
-       if (!sm) {
-               printf("%s: no in memory copy of metadata\n", DEVNAME(sc));
-               goto bad;
-       }
-
-       im_sv = (struct sr_vol_meta *)(sm + 1);
-       im_sc = (struct sr_chunk_meta *)(im_sv + 1);
-       no_chunk = sd->sd_vol.sv_meta.svm_no_chunk;
-       im_so = (struct sr_opt_meta *)(im_sc + no_chunk);
-
-       /* XXX this is a temporary hack until meta is properly redone */
-       if (sd->sd_type == SR_MD_CRYPTO)
-               sz_opt = sizeof(struct sr_opt_meta);
-       else
-               sz_opt = 0;
-
-       if (sizeof(struct sr_metadata) + sizeof(struct sr_vol_meta) +
-           (sizeof(struct sr_chunk_meta) * no_chunk) +
-           sz_opt > sz) {
-               printf("%s: too much metadata; metadata NOT written\n",
-                   DEVNAME(sc));
-               goto bad;
-       }
-
-       if (sm->ssd_magic == 0) {
-               /* initial metadata */
-               sm->ssd_magic = SR_MAGIC;
-               sm->ssd_version = SR_META_VERSION;
-               sm->ssd_size = sizeof(struct sr_metadata);
-               sm->ssd_ondisk = 0;
-               sm->ssd_flags = sd->sd_meta_flags;
-               /* get uuid from chunk 0 */
-               bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scm_uuid,
-                   &sm->ssd_uuid,
-                   sizeof(struct sr_uuid));
-
-               /* volume */
-               bcopy(sv, im_sv, sizeof(struct sr_vol_meta));
-               bcopy(&sm->ssd_uuid, &im_sv->svm_uuid,
-                   sizeof(im_sv->svm_uuid));
-               sm->ssd_vd_ver = SR_VOL_VERSION;
-               sm->ssd_vd_size = sizeof(struct sr_vol_meta);
-
-               /* chunk */
-               for (i = 0; i < no_chunk; i++)
-                       bcopy(sd->sd_vol.sv_chunks[i], &im_sc[i],
-                           sizeof(struct sr_chunk_meta));
-
-               sm->ssd_chunk_ver = SR_CHUNK_VERSION;
-               sm->ssd_chunk_size = sizeof(struct sr_chunk_meta);
-               sm->ssd_chunk_no = no_chunk;
-
-               /* optional */
-               sm->ssd_opt_ver = SR_OPT_VERSION;
-               if (sd->sd_type == SR_MD_CRYPTO) {
-                       bzero(im_so, sizeof(*im_so));
-                       sm->ssd_opt_size = sizeof(struct sr_opt_meta);
-                       sm->ssd_opt_no = 1;
-               } else {
-                       sm->ssd_opt_size = 0;
-                       sm->ssd_opt_no = 0;
-               }
-       }
-
-       /* from here on out metadata is updated */
-       sm->ssd_ondisk++;
-       im_sv->svm_flags |= flags;
-       sm->ssd_vd_chk = sr_checksum(DEVNAME(sc),
-           (u_int32_t *)im_sv, sm->ssd_vd_size);
-
-       sm->ssd_chunk_chk = 0;
-       for (ch = 0; ch < sm->ssd_chunk_no; ch++)
-               sm->ssd_chunk_chk ^= sr_checksum(DEVNAME(sc),
-                   (u_int32_t *)&im_sc[ch], sm->ssd_chunk_size);
-
-       /* XXX do checksum on optional meta too */
-
-       sr_print_metadata(sm);
-
-       for (i = 0; i < sm->ssd_chunk_no; i++) {
-               memset(&b, 0, sizeof(b));
-
-               src = sd->sd_vol.sv_chunks[i];
-
-               /* skip disks that are offline */
-               if (src->src_meta.scm_status == BIOC_SDOFFLINE)
-                       continue;
-
-               /* copy encrypted key / passphrase into optinal metadata area */
-               if (sd->sd_type == SR_MD_CRYPTO && i < 2) {
-                       im_so->som_type = SR_OPT_CRYPTO;
-                       bcopy(&sd->mds.mdd_crypto.scr_meta,
-                           &im_so->som_meta.smm_crypto,
-                           sizeof(im_so->som_meta.smm_crypto));
-               }
-
-               /* calculate metdata checksum and ids */
-               sm->ssd_vd_volid = im_sv->svm_volid;
-               sm->ssd_chunk_id = i;
-               sm->ssd_checksum = sr_checksum(DEVNAME(sc),
-                   (u_int32_t *)sm, sm->ssd_size);
-
-               DNPRINTF(SR_D_META, "%s: sr_save_metadata %s: volid: %d "
-                   "chunkid: %d checksum: 0x%x\n",
-                   DEVNAME(sc), src->src_meta.scm_devname,
-                   sm->ssd_vd_volid, sm->ssd_chunk_id,
-                   sm->ssd_checksum);
-
-               b.b_flags = B_WRITE;
-               b.b_blkno = SR_META_OFFSET;
-               b.b_bcount = sz;
-               b.b_bufsize = sz;
-               b.b_resid = sz;
-               b.b_data = (void *)sm;
-               b.b_error = 0;
-               b.b_proc = curproc;
-               b.b_dev = src->src_dev_mm;
-               b.b_vp = NULL;
-               b.b_iodone = NULL;
-               LIST_INIT(&b.b_dep);
-               bdevsw_lookup(b.b_dev)->d_strategy(&b);
-
-               biowait(&b);
-
-               /* make sure in memory copy is clean */
-               if (sd->sd_type == SR_MD_CRYPTO)
-                       bzero(im_so, sizeof(*im_so));
-               sm->ssd_vd_volid = 0;
-               sm->ssd_chunk_id = 0;
-               sm->ssd_checksum = 0;
-
-               /* XXX do something smart here */
-               /* mark chunk offline and restart metadata write */
-               if (b.b_flags & B_ERROR) {
-                       printf("%s: %s i/o error on block %lld while writing "
-                           "metadata %d\n", DEVNAME(sc),
-                           src->src_meta.scm_devname, b.b_blkno, b.b_error);
-                       goto bad;
-               }
-
-               DNPRINTF(SR_D_META, "%s: sr_save_metadata written to %s\n",
-                   DEVNAME(sc), src->src_meta.scm_devname);
+               return (1);
        }
 
-       bzero(&wu, sizeof(wu));
-       wu.swu_fake = 1;
-       wu.swu_dis = sd;
-       sd->sd_scsi_sync(&wu);
-
-       rv = 0;
-bad:
-       return (rv);
+       return (0);
 }
 
 int
-sr_boot_assembly(struct sr_softc *sc)
+sr_raid_request_sense(struct sr_workunit *wu)
 {
-       struct device           *dv;
-       struct buf              *bp;
-       struct bdevsw           *bdsw;
-       struct disklabel        label;
-       struct sr_metadata      *sm;
-       struct sr_metadata_list_head mlh;
-       struct sr_metadata_list *mle, *mle2;
-       struct sr_vol_meta      *vm;
-       struct bioc_createraid  bc;
-       dev_t                   dev, devr, *dt = NULL;
-       int                     error, majdev, i, no_dev, rv = 0;
-       size_t                  sz = SR_META_SIZE * 512;
-
-       DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc));
+       struct sr_discipline    *sd = wu->swu_dis;
+       struct scsi_xfer        *xs = wu->swu_xs;
 
-       SLIST_INIT(&mlh);
-       bp = geteblk(sz);
-       if (!bp)
-               return (ENOMEM);
+       DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n",
+           DEVNAME(sd->sd_sc));
 
-       TAILQ_FOREACH(dv, &alldevs, dv_list) {
-               if (dv->dv_class != DV_DISK)
-                       continue;
+       /* use latest sense data */
+       bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense));
 
-               majdev = findblkmajor(dv);
-               if (majdev == -1)
-                       continue;
+       /* clear sense data */
+       bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense));
 
-               bp->b_dev = dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART);
-               bdsw = &bdevsw[majdev];
+       return (0);
+}
 
-               /* XXX is there  a better way of excluding some devices? */
-               if (!strncmp(dv->dv_xname, "fd", 2) ||
-                   !strncmp(dv->dv_xname, "cd", 2) ||
-                   !strncmp(dv->dv_xname, "rx", 2))
-                       continue;
-               /*
-                * The devices are being opened with S_IFCHR instead of
-                * S_IFBLK so that the SCSI mid-layer does not whine when
-                * media is not inserted in certain devices like zip drives
-                * and such.
-                */
+int
+sr_raid_start_stop(struct sr_workunit *wu)
+{
+       struct sr_discipline    *sd = wu->swu_dis;
+       struct scsi_xfer        *xs = wu->swu_xs;
+       struct scsi_start_stop  *ss = (struct scsi_start_stop *)xs->cmd;
+       int                     rv = 1;
 
-               /* open device */
-               error = (*bdsw->d_open)(dev, FREAD, S_IFCHR, curproc);
-               if (error) {
-                       DNPRINTF(SR_D_META, "%s: sr_boot_assembly open failed"
-                           "\n", DEVNAME(sc));
-                       continue;
-               }
+       DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n",
+           DEVNAME(sd->sd_sc));
 
-               /* get disklabel */
-               error = (*bdsw->d_ioctl)(dev, DIOCGDINFO, (void *)&label,
-                   FREAD, curproc);
-               if (error) {
-                       DNPRINTF(SR_D_META, "%s: sr_boot_assembly ioctl "
-                           "failed\n", DEVNAME(sc));
-                       error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
-                       continue;
-               }
+       if (!ss)
+               return (rv);
 
-               /* we are done, close device */
-               error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc);
-               if (error) {
-                       DNPRINTF(SR_D_META, "%s: sr_boot_assembly close "
-                           "failed\n", DEVNAME(sc));
-                       continue;
+       if (ss->byte2 == 0x00) {
+               /* START */
+               if (sd->sd_vol_status == BIOC_SVOFFLINE) {
+                       /* bring volume online */
+                       /* XXX check to see if volume can be brought online */
+                       sd->sd_vol_status = BIOC_SVONLINE;
                }
-
-               /* are we a softraid partition? */
-               for (i = 0; i < MAXPARTITIONS; i++) {
-                       if (label.d_partitions[i].p_fstype != FS_RAID)
-                               continue;
-
-                       /* open device */
-                       bp->b_dev = devr = MAKEDISKDEV(majdev, dv->dv_unit, i);
-                       error = (*bdsw->d_open)(devr, FREAD, S_IFCHR, curproc);
-                       if (error) {
-                               DNPRINTF(SR_D_META, "%s: sr_boot_assembly "
-                                   "open failed, partition %d\n",
-                                   DEVNAME(sc), i);
-                               continue;
-                       }
-                       /* read metadat */
-                       bp->b_flags = B_BUSY | B_READ;
-                       bp->b_blkno = SR_META_OFFSET;
-                       bp->b_cylinder = 0;
-                       bp->b_bcount = sz;
-                       bp->b_bufsize = sz;
-                       bp->b_resid = sz;
-                       (*bdsw->d_strategy)(bp);
-                       if ((error = biowait(bp))) {
-                               DNPRINTF(SR_D_META, "%s: sr_boot_assembly "
-                                   "strategy failed, partition %d\n",
-                                   DEVNAME(sc));
-                               error = (*bdsw->d_close)(devr, FREAD, S_IFCHR,
-                                   curproc);
-                               continue;
-                       }
-
-                       sm = (struct sr_metadata *)bp->b_data;
-                       if (!sr_validate_metadata(sc, devr, sm)) {
-                               /* we got one; save it off */
-                               mle = malloc(sizeof(*mle), M_DEVBUF,
-                                   M_WAITOK | M_ZERO);
-                               mle->sml_metadata = malloc(sz, M_DEVBUF,
-                                   M_WAITOK | M_ZERO);
-                               bcopy(sm, mle->sml_metadata, sz);
-                               mle->sml_mm = devr;
-                               SLIST_INSERT_HEAD(&mlh, mle, sml_link);
-                       }
-
-                       /* we are done, close device */
-                       error = (*bdsw->d_close)(devr, FREAD, S_IFCHR,
-                           curproc);
-                       if (error) {
-                               DNPRINTF(SR_D_META, "%s: sr_boot_assembly "
-                                   "close failed\n", DEVNAME(sc));
-                               continue;
-                       }
+               rv = 0;
+       } else /* XXX is this the check? if (byte == 0x01) */ {
+               /* STOP */
+               if (sd->sd_vol_status == BIOC_SVONLINE) {
+                       /* bring volume offline */
+                       sd->sd_vol_status = BIOC_SVOFFLINE;
                }
+               rv = 0;
        }
 
-       /*
-        * XXX poor mans hack that doesn't keep disks in order and does not
-        * roam disks correctly.  replace this with something smarter that
-        * orders disks by volid, chunkid and uuid.
-        */
-       dt = malloc(BIOC_CRMAXLEN, M_DEVBUF, M_WAITOK);
-       SLIST_FOREACH(mle, &mlh, sml_link) {
-               /* chunk used already? */
-               if (mle->sml_used)
-                       continue;
-
-               no_dev = 0;
-               bzero(dt, BIOC_CRMAXLEN);
-               SLIST_FOREACH(mle2, &mlh, sml_link) {
-                       /* chunk used already? */
-                       if (mle2->sml_used)
-                               continue;
+       return (rv);
+}
 
-                       /* are we the same volume? */
-                       if (mle->sml_metadata->ssd_vd_volid !=
-                           mle2->sml_metadata->ssd_vd_volid)
-                               continue;
+int
+sr_raid_sync(struct sr_workunit *wu)
+{
+       struct sr_discipline    *sd = wu->swu_dis;
+       int                     s, rv = 0, ios;
 
-                       /* same uuid? */
-                       if (bcmp(&mle->sml_metadata->ssd_uuid,
-                           &mle2->sml_metadata->ssd_uuid,
-                           sizeof(mle->sml_metadata->ssd_uuid)))
-                               continue;
+       DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc));
 
-                       /* sanity */
-                       if (dt[mle2->sml_metadata->ssd_chunk_id]) {
-                               printf("%s: chunk id already in use; can not "
-                                   "assemble volume\n", DEVNAME(sc));
-                               goto unwind;
-                       }
-                       dt[mle2->sml_metadata->ssd_chunk_id] = mle2->sml_mm;
-                       no_dev++;
-                       mle2->sml_used = 1;
-               }
-               if (mle->sml_metadata->ssd_chunk_no != no_dev) {
-                       printf("%s: not assembling partial disk that used to "
-                           "be volume %d\n", DEVNAME(sc),
-                           mle->sml_metadata->ssd_vd_volid);
-                       continue;
-               }
+       /* when doing a fake sync don't coun't the wu */
+       ios = wu->swu_fake ? 0 : 1;
 
-               bzero(&bc, sizeof(bc));
-               vm = (struct sr_vol_meta *)(mle->sml_metadata + 1);
-               bc.bc_level = vm->svm_level;
-               bc.bc_dev_list_len = no_dev * sizeof(dev_t);
-               bc.bc_dev_list = dt;
-               bc.bc_flags = BIOC_SCDEVT;
-               sr_ioctl_createraid(sc, &bc, 0);
-               rv++;
-       }
+       s = splbio();
+       sd->sd_sync = 1;
 
-unwind:
-       if (dt)
-               free(dt, M_DEVBUF);
+       while (sd->sd_wu_pending > ios)
+               if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) {
+                       DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n",
+                           DEVNAME(sd->sd_sc));
+                       rv = 1;
+                       break;
+               }
 
-       for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) {
-               mle2 = SLIST_NEXT(mle, sml_link);
+       sd->sd_sync = 0;
+       splx(s);
 
-               free(mle->sml_metadata, M_DEVBUF);
-               free(mle, M_DEVBUF);
-       }
-       SLIST_INIT(&mlh);
+       wakeup(&sd->sd_sync);
 
        return (rv);
 }
 
-int
-sr_validate_metadata(struct sr_softc *sc, dev_t dev, struct sr_metadata *sm)
+void
+sr_raid_startwu(struct sr_workunit *wu)
 {
-       struct sr_vol_meta      *mv;
-       struct sr_chunk_meta    *mc;
-       char                    *name, devname[32];
-       int                     maj, part, unit;
-       u_int32_t               chk;
+       struct sr_discipline    *sd = wu->swu_dis;
+       struct sr_ccb           *ccb;
 
-       DNPRINTF(SR_D_META, "%s: sr_validate_metadata(0x%x)\n",
-           DEVNAME(sc), dev);
+       splassert(IPL_BIO);
 
-       bzero(devname, sizeof(devname));
+       if (wu->swu_state == SR_WU_RESTART)
+               /*
+                * no need to put the wu on the pending queue since we
+                * are restarting the io
+                */
+                ;
+       else
+               /* move wu to pending queue */
+               TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
 
-       if (sm->ssd_magic != SR_MAGIC)
-               goto bad;
+       /* start all individual ios */
+       TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) {
+               bdevsw_lookup(ccb->ccb_buf.b_dev)->d_strategy(&ccb->ccb_buf);
+       }
+}
 
-       maj = major(dev);
-       part = DISKPART(dev);
-       unit = DISKUNIT(dev);
+void
+sr_checksum_print(u_int8_t *md5)
+{
+       int                     i;
 
-       name = findblkname(maj);
-       if (name == NULL)
-               goto bad;
+       for (i = 0; i < MD5_DIGEST_LENGTH; i++)
+               printf("%02x", md5[i]);
+}
 
-       snprintf(devname, sizeof(devname),
-           "%s%d%c", name, unit, part + 'a');
-       name = devname;
+void
+sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len)
+{
+       MD5_CTX                 ctx;
 
-       /* validate metadata */
-       if (sm->ssd_version != SR_META_VERSION) {
-               printf("%s: %s can not read metadata version %d, "
-                   "expected %d\n", DEVNAME(sc),
-                   devname, sm->ssd_version,
-                   SR_META_VERSION);
-               goto bad;
-       }
-       if (sm->ssd_size != sizeof(struct sr_metadata)) {
-               printf("%s: %s invalid metadata size %d, "
-                   "expected %d\n", DEVNAME(sc),
-                   devname, sm->ssd_size,
-                   sizeof(struct sr_metadata));
-               goto bad;
-       }
-       chk = sr_checksum(DEVNAME(sc), (u_int32_t *)sm, sm->ssd_size);
-       /*
-        * since the checksum value is part of the checksum a good
-        * result equals 0
-        */
-       if (chk != 0) {
-               printf("%s: %s invalid metadata checksum 0x%x, "
-                   "expected 0x%x\n", DEVNAME(sc),
-                   devname, sm->ssd_checksum, chk);
-               goto bad;
-       }
+       DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src,
+           md5, len);
 
-       /* validate volume metadata */
-       if (sm->ssd_vd_ver != SR_VOL_VERSION) {
-               printf("%s: %s can not read volume metadata version "
-                   "%d, expected %d\n", DEVNAME(sc),
-                   devname, sm->ssd_vd_ver,
-                   SR_VOL_VERSION);
-               goto bad;
-       }
-       if (sm->ssd_vd_size != sizeof(struct sr_vol_meta)) {
-               printf("%s: %s invalid volume metadata size %d, "
-                   "expected %d\n", DEVNAME(sc),
-                   devname, sm->ssd_vd_size,
-                   sizeof(struct sr_vol_meta));
-               goto bad;
-       }
-       mv = (struct sr_vol_meta *)(sm + 1);
-       chk = sr_checksum(DEVNAME(sc), (u_int32_t *)mv, sm->ssd_vd_size);
-       if (chk != sm->ssd_vd_chk) {
-               printf("%s: %s invalid volume metadata checksum 0x%x, "
-                   "expected 0x%x\n", DEVNAME(sc),
-                   devname, sm->ssd_vd_chk, chk);
-               goto bad;
-       }
+       MD5Init(&ctx);
+       MD5Update(&ctx, src, len);
+       MD5Final(md5, &ctx);
+}
 
-       /* validate chunk metadata */
-       if (sm->ssd_chunk_ver != SR_CHUNK_VERSION) {
-               printf("%s: %s can not read chunk metadata version "
-                   "%d, expected %d\n", DEVNAME(sc),
-                   devname, sm->ssd_chunk_ver,
-                   SR_CHUNK_VERSION);
-               goto bad;
-       }
-       if (sm->ssd_chunk_size != sizeof(struct sr_chunk_meta)) {
-               printf("%s: %s invalid chunk metadata size %d, "
-                   "expected %d\n", DEVNAME(sc),
-                   devname, sm->ssd_chunk_size,
-                   sizeof(struct sr_chunk_meta));
-               goto bad;
-       }
+void
+sr_uuid_get(struct sr_uuid *uuid)
+{
+       /* XXX replace with idgen32 */
+       arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id));
+}
 
-       mc = (struct sr_chunk_meta *)(mv + 1);
-       /* checksum is calculated over ALL chunks */
-       chk = sr_checksum(DEVNAME(sc), (u_int32_t *)(mc),
-           sm->ssd_chunk_size * sm->ssd_chunk_no);
+void
+sr_uuid_print(struct sr_uuid *uuid, int cr)
+{
+       int                     i;
 
-       if (chk != sm->ssd_chunk_chk) {
-               printf("%s: %s invalid chunk metadata checksum 0x%x, "
-                   "expected 0x%x\n", DEVNAME(sc),
-                   devname, sm->ssd_chunk_chk, chk);
-               goto bad;
-       }
+       for (i = 0; i < SR_UUID_MAX; i++)
+               printf("%x%s", uuid->sui_id[i],
+                   i < SR_UUID_MAX - 1 ? ":" : "");
 
-       /* warn if disk changed order */
-       if (strncmp(mc[sm->ssd_chunk_id].scm_devname, name,
-           sizeof(mc[sm->ssd_chunk_id].scm_devname)))
-               printf("%s: roaming device %s -> %s\n", DEVNAME(sc),
-                   mc[sm->ssd_chunk_id].scm_devname, name);
+       if (cr)
+               printf("\n");
+}
 
-       /* we have meta data on disk */
-       DNPRINTF(SR_D_META, "%s: sr_validate_metadata valid metadata %s\n",
-           DEVNAME(sc), devname);
+int
+sr_already_assembled(struct sr_discipline *sd)
+{
+       struct sr_softc         *sc = sd->sd_sc;
+       int                     i;
 
-       return (0);
-bad:
-       DNPRINTF(SR_D_META, "%s: sr_validate_metadata invalid metadata %s\n",
-           DEVNAME(sc), devname);
+       for (i = 0; i < SR_MAXSCSIBUS; i++)
+               if (sc->sc_dis[i])
+                       if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid,
+                           &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid,
+                           sizeof(sd->sd_meta->ssdi.ssd_uuid)))
+                               return (1);
 
-       return (1);
+       return (0);
 }
 
 int32_t
@@ -2324,11 +2485,11 @@ sr_shutdown(void *arg)
        struct sr_softc         *sc = sd->sd_sc;
 #endif
        DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n",
-           DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
+           DEVNAME(sc), sd->sd_meta->ssd_devname);
 
-       sr_save_metadata(sd, 0);
+       sr_meta_save(sd, 0);
 
-       sr_shutdown_discipline(sd);
+       sr_discipline_shutdown(sd);
 }
 
 int
@@ -2341,7 +2502,7 @@ sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
        DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func,
            xs->cmd->opcode);
 
-       if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) {
+       if (sd->sd_vol_status == BIOC_SVOFFLINE) {
                DNPRINTF(SR_D_DIS, "%s: %s device offline\n",
                    DEVNAME(sd->sd_sc));
                goto bad;
@@ -2349,7 +2510,7 @@ sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
 
        if (xs->datalen == 0) {
                printf("%s: %s: illegal block count\n",
-                   DEVNAME(sd->sd_sc), func, sd->sd_vol.sv_meta.svm_devname);
+                   DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname);
                goto bad;
        }
 
@@ -2361,14 +2522,14 @@ sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func)
                *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr);
        else {
                printf("%s: %s: illegal cmdlen\n", DEVNAME(sd->sd_sc), func,
-                   sd->sd_vol.sv_meta.svm_devname);
+                   sd->sd_meta->ssd_devname);
                goto bad;
        }
 
        wu->swu_blk_start = *blk;
        wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1;
 
-       if (wu->swu_blk_end > sd->sd_vol.sv_meta.svm_size) {
+       if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) {
                DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld "
                    "end: %lld length: %d\n",
                    DEVNAME(sd->sd_sc), func, wu->swu_blk_start,
@@ -2422,26 +2583,26 @@ queued:
 
 #ifndef SMALL_KERNEL
 int
-sr_create_sensors(struct sr_discipline *sd)
+sr_sensors_create(struct sr_discipline *sd)
 {
        struct sr_softc         *sc = sd->sd_sc;
        int                     rv = 1;
 
-       DNPRINTF(SR_D_STATE, "%s: %s: sr_create_sensors\n",
-           DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
+       DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n",
+           DEVNAME(sc), sd->sd_meta->ssd_devname);
 
        strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc),
            sizeof(sd->sd_vol.sv_sensordev.xname));
 
        sd->sd_vol.sv_sensor.type = SENSOR_DRIVE;
        sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN;
-       strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_vol.sv_meta.svm_devname,
+       strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname,
            sizeof(sd->sd_vol.sv_sensor.desc));
 
        sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor);
 
        if (sc->sc_sensors_running == 0) {
-               if (sensor_task_register(sc, sr_refresh_sensors, 10) == NULL)
+               if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL)
                        goto bad;
                sc->sc_sensors_running = 1;
        }
@@ -2453,35 +2614,37 @@ bad:
 }
 
 void
-sr_delete_sensors(struct sr_discipline *sd)
+sr_sensors_delete(struct sr_discipline *sd)
 {
 #ifdef SR_DEBUG
        struct sr_softc         *sc = sd->sd_sc;
 #endif
-       DNPRINTF(SR_D_STATE, "%s: %s: sr_delete_sensors\n",
-           DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname);
+       DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_delete\n",
+           DEVNAME(sc), sd->sd_meta->ssd_devname);
 
        if (sd->sd_vol.sv_sensor_valid)
                sensordev_deinstall(&sd->sd_vol.sv_sensordev);
 }
 
 void
-sr_refresh_sensors(void *arg)
+sr_sensors_refresh(void *arg)
 {
        struct sr_softc         *sc = arg;
-       int                     i, vol;
        struct sr_volume        *sv;
+       struct sr_discipline    *sd;
+       int                     i, vol;
 
-       DNPRINTF(SR_D_STATE, "%s: sr_refresh_sensors\n", DEVNAME(sc));
+       DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc));
 
        for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) {
                /* XXX this will not work when we stagger disciplines */
                if (!sc->sc_dis[i])
                        continue;
 
-               sv = &sc->sc_dis[i]->sd_vol;
+               sd = sc->sc_dis[i];
+               sv = &sd->sd_vol;
 
-               switch(sv->sv_meta.svm_status) {
+               switch(sd->sd_vol_status) {
                case BIOC_SVOFFLINE:
                        sv->sv_sensor.value = SENSOR_DRIVE_FAIL;
                        sv->sv_sensor.status = SENSOR_S_CRIT;
@@ -2535,7 +2698,7 @@ sr_print_stats(void)
 
                sd = sc->sc_dis[i];
                printf("%s: ios pending: %d  collisions %llu\n",
-                   sd->sd_vol.sv_meta.svm_devname,
+                   sd->sd_meta->ssd_devname,
                    sd->sd_wu_pending,
                    sd->sd_wu_collisions);
        }
@@ -2544,73 +2707,58 @@ sr_print_stats(void)
 
 #ifdef SR_DEBUG
 void
-sr_print_metadata(struct sr_metadata *sm)
+sr_meta_print(struct sr_metadata *m)
 {
-       struct sr_vol_meta      *im_sv;
-       struct sr_chunk_meta    *im_sc;
-       struct sr_opt_meta      *im_so;
-       int                     ch;
+       int                     i;
+       struct sr_meta_chunk    *mc;
+       struct sr_meta_opt      *mo;
 
        if (!(sr_debug & SR_D_META))
                return;
 
-       im_sv = (struct sr_vol_meta *)(sm + 1);
-       im_sc = (struct sr_chunk_meta *)(im_sv + 1);
-       im_so = (struct sr_opt_meta *)(im_sc + im_sv->svm_no_chunk);
-
-       DNPRINTF(SR_D_META, "\tmeta magic 0x%llx\n", sm->ssd_magic);
-       DNPRINTF(SR_D_META, "\tmeta version %d\n", sm->ssd_version);
-       DNPRINTF(SR_D_META, "\tmeta checksum 0x%x\n", sm->ssd_checksum);
-       DNPRINTF(SR_D_META, "\tmeta size %d\n", sm->ssd_size);
-       DNPRINTF(SR_D_META, "\tmeta on disk version %u\n", sm->ssd_ondisk);
-       DNPRINTF(SR_D_META, "\tmeta uuid ");
-       sr_print_uuid(&sm->ssd_uuid, 1);
-       DNPRINTF(SR_D_META, "\tvd version %d\n", sm->ssd_vd_ver);
-       DNPRINTF(SR_D_META, "\tvd size %lu\n", sm->ssd_vd_size);
-       DNPRINTF(SR_D_META, "\tvd id %u\n", sm->ssd_vd_volid);
-       DNPRINTF(SR_D_META, "\tvd checksum 0x%x\n", sm->ssd_vd_chk);
-       DNPRINTF(SR_D_META, "\tchunk version %d\n", sm->ssd_chunk_ver);
-       DNPRINTF(SR_D_META, "\tchunks %d\n", sm->ssd_chunk_no);
-       DNPRINTF(SR_D_META, "\tchunk size %u\n", sm->ssd_chunk_size);
-       DNPRINTF(SR_D_META, "\tchunk id %u\n", sm->ssd_chunk_id);
-       DNPRINTF(SR_D_META, "\tchunk checksum 0x%x\n", sm->ssd_chunk_chk);
-       if (sm->ssd_opt_no) {
-               DNPRINTF(SR_D_META, "\topt version %d\n", sm->ssd_opt_ver);
-               DNPRINTF(SR_D_META, "\topt items %d\n", sm->ssd_opt_no);
-               DNPRINTF(SR_D_META, "\topt size %d\n", sm->ssd_opt_size);
-               DNPRINTF(SR_D_META, "\topt chk 0x%x\n", sm->ssd_opt_chk);
-       }
-
-
-       DNPRINTF(SR_D_META, "\t\tvol id %d\n", im_sv->svm_volid);
-       DNPRINTF(SR_D_META, "\t\tvol status %d\n", im_sv->svm_status);
-       DNPRINTF(SR_D_META, "\t\tvol flags 0x%x\n", im_sv->svm_flags);
-       DNPRINTF(SR_D_META, "\t\tvol level %d\n", im_sv->svm_level);
-       DNPRINTF(SR_D_META, "\t\tvol size %lld\n", im_sv->svm_size);
-       DNPRINTF(SR_D_META, "\t\tvol name %s\n", im_sv->svm_devname);
-       DNPRINTF(SR_D_META, "\t\tvol vendor %s\n", im_sv->svm_vendor);
-       DNPRINTF(SR_D_META, "\t\tvol prod %s\n", im_sv->svm_product);
-       DNPRINTF(SR_D_META, "\t\tvol rev %s\n", im_sv->svm_revision);
-       DNPRINTF(SR_D_META, "\t\tvol no chunks %d\n", im_sv->svm_no_chunk);
-       DNPRINTF(SR_D_META, "\t\tvol uuid ");
-       sr_print_uuid(& im_sv->svm_uuid, 1);
-       DNPRINTF(SR_D_META, "\t\tvol stripsize %d\n", im_sv->svm_strip_size);
-
-       for (ch = 0; ch < im_sv->svm_no_chunk; ch++) {
-               DNPRINTF(SR_D_META, "\t\t\tchunk vol id %d\n",
-                   im_sc[ch].scm_volid);
-               DNPRINTF(SR_D_META, "\t\t\tchunk id %d\n",
-                   im_sc[ch].scm_chunk_id);
-               DNPRINTF(SR_D_META, "\t\t\tchunk status %d\n",
-                   im_sc[ch].scm_status);
-               DNPRINTF(SR_D_META, "\t\t\tchunk name %s\n",
-                   im_sc[ch].scm_devname);
-               DNPRINTF(SR_D_META, "\t\t\tchunk size %lld\n",
-                   im_sc[ch].scm_size);
-               DNPRINTF(SR_D_META, "\t\t\tchunk coerced size %lld\n",
-                   im_sc[ch].scm_coerced_size);
-               DNPRINTF(SR_D_META, "\t\t\tchunk uuid ");
-               sr_print_uuid(&im_sc[ch].scm_uuid, 1);
+       printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic);
+       printf("\tssd_version %d\n", m->ssdi.ssd_version);
+       printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags);
+       printf("\tssd_uuid ");
+       sr_uuid_print(&m->ssdi.ssd_uuid, 1);
+       printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no);
+       printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id);
+       printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no);
+       printf("\tssd_volid %d\n", m->ssdi.ssd_volid);
+       printf("\tssd_level %d\n", m->ssdi.ssd_level);
+       printf("\tssd_level %lld\n", m->ssdi.ssd_size);
+       printf("\tssd_devname %s\n", m->ssd_devname);
+       printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor);
+       printf("\tssd_product %s\n", m->ssdi.ssd_product);
+       printf("\tssd_revision %s\n", m->ssdi.ssd_revision);
+       printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size);
+       printf("\tssd_checksum ");
+       sr_checksum_print(m->ssd_checksum);
+       printf("\n");
+       printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags);
+       printf("\tssd_ondisk %llu\n", m->ssd_ondisk);
+
+       mc = (struct sr_meta_chunk *)(m + 1);
+       for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) {
+               printf("\t\tscm_volid %d\n", mc->scmi.scm_volid);
+               printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id);
+               printf("\t\tscm_devname %s\n", mc->scmi.scm_devname);
+               printf("\t\tscm_size %lld\n", mc->scmi.scm_size);
+               printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size);
+               printf("\t\tscm_uuid ");
+               sr_uuid_print(&mc->scmi.scm_uuid, 1);
+               printf("\t\tscm_checksum ");
+               sr_checksum_print(mc->scm_checksum);
+               printf("\n");
+               printf("\t\tscm_status %d\n", mc->scm_status);
+       }
+
+       mo = (struct sr_meta_opt *)(mc);
+       for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) {
+               printf("\t\t\tsom_type %d\n", mo->somi.som_type);
+               printf("\t\t\tsom_checksum ");
+               sr_checksum_print(mo->som_checksum);
+               printf("\n");
        }
 }
 
index 5d955d5..804e60f 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraid_crypto.c,v 1.28 2008/06/25 17:43:09 thib Exp $ */
+/* $OpenBSD: softraid_crypto.c,v 1.29 2008/07/19 22:41:58 marco Exp $ */
 /*
  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
  * Copyright (c) 2008 Hans-Joerg Hoexer <hshoexer@openbsd.org>
@@ -100,16 +100,23 @@ sr_crypto_getcryptop(struct sr_workunit *wu, int encrypt)
        struct sr_discipline    *sd = wu->swu_dis;
        struct cryptop          *crp;
        struct cryptodesc       *crd;
-       struct uio              *uio;
-       int                      flags, i, n;
-       daddr64_t                blk = 0;
-       u_int                    keyndx;
+       struct uio              *uio = NULL;
+       int                     flags, i, n, s;
+       daddr64_t               blk = 0;
+       u_int                   keyndx;
 
        DNPRINTF(SR_D_DIS, "%s: sr_crypto_getcryptop wu: %p encrypt: %d\n",
            DEVNAME(sd->sd_sc), wu, encrypt);
 
-       uio = pool_get(&sr_uiopl, PR_WAITOK|PR_ZERO);
-       uio->uio_iov = pool_get(&sr_iovpl, PR_WAITOK);
+       s = splbio();
+       uio = pool_get(&sd->mds.mdd_crypto.sr_uiopl, PR_ZERO);
+       if (uio == NULL)
+               goto unwind;
+       uio->uio_iov = pool_get(&sd->mds.mdd_crypto.sr_iovpl, 0);
+       if (uio->uio_iov == NULL)
+               goto unwind;
+       splx(s);
+
        uio->uio_iovcnt = 1;
        uio->uio_iov->iov_len = xs->datalen;
        if (xs->flags & SCSI_DATA_OUT) {
@@ -172,8 +179,14 @@ unwind:
                crypto_freereq(crp);
        if (wu->swu_xs->flags & SCSI_DATA_OUT)
                free(uio->uio_iov->iov_base, M_DEVBUF);
-       pool_put(&sr_iovpl, uio->uio_iov);
-       pool_put(&sr_uiopl, uio);
+
+       s = splbio();
+       if (uio && uio->uio_iov)
+               pool_put(&sd->mds.mdd_crypto.sr_iovpl, uio->uio_iov);
+       if (uio)
+               pool_put(&sd->mds.mdd_crypto.sr_uiopl, uio);
+       splx(s);
+
        return (NULL);
 }
 
@@ -182,14 +195,18 @@ sr_crypto_putcryptop(struct cryptop *crp)
 {
        struct uio              *uio = crp->crp_buf;
        struct sr_workunit      *wu = crp->crp_opaque;
+       struct sr_discipline    *sd = wu->swu_dis;
+       int                     s;
 
        DNPRINTF(SR_D_DIS, "%s: sr_crypto_putcryptop crp: %p\n",
            DEVNAME(wu->swu_dis->sd_sc), crp);
 
        if (wu->swu_xs->flags & SCSI_DATA_OUT)
                free(uio->uio_iov->iov_base, M_DEVBUF);
-       pool_put(&sr_iovpl, uio->uio_iov);
-       pool_put(&sr_uiopl, uio);
+       s = splbio();
+       pool_put(&sd->mds.mdd_crypto.sr_iovpl, uio->uio_iov);
+       pool_put(&sd->mds.mdd_crypto.sr_uiopl, uio);
+       splx(s);
        crypto_freereq(crp);
 
        return (wu);
@@ -434,12 +451,17 @@ sr_crypto_alloc_resources(struct sr_discipline *sd)
        DNPRINTF(SR_D_DIS, "%s: sr_crypto_alloc_resources\n",
            DEVNAME(sd->sd_sc));
 
+       pool_init(&sd->mds.mdd_crypto.sr_uiopl, sizeof(struct uio), 0, 0, 0,
+           "sr_uiopl", NULL);
+       pool_init(&sd->mds.mdd_crypto.sr_iovpl, sizeof(struct iovec), 0, 0, 0,
+           "sr_iovpl", NULL);
+
        for (i = 0; i < SR_CRYPTO_MAXKEYS; i++)
                sd->mds.mdd_crypto.scr_sid[i] = (u_int64_t)-1;
 
-       if (sr_alloc_wu(sd))
+       if (sr_wu_alloc(sd))
                return (ENOMEM);
-       if (sr_alloc_ccb(sd))
+       if (sr_ccb_alloc(sd))
                return (ENOMEM);
        if (sr_crypto_decrypt_key(sd))
                return (EPERM); 
@@ -458,7 +480,7 @@ sr_crypto_alloc_resources(struct sr_discipline *sd)
        }
 
        /* Allocate a session for every 2^SR_CRYPTO_KEY_BLKSHIFT blocks */
-       num_keys = sd->sd_vol.sv_meta.svm_size >> SR_CRYPTO_KEY_BLKSHIFT;
+       num_keys = sd->sd_meta->ssdi.ssd_size >> SR_CRYPTO_KEY_BLKSHIFT;
        if (num_keys >= SR_CRYPTO_MAXKEYS)
                return (EFBIG);
        for (i = 0; i <= num_keys; i++) {
@@ -497,11 +519,11 @@ sr_crypto_free_resources(struct sr_discipline *sd)
                sd->mds.mdd_crypto.scr_sid[i] = (u_int64_t)-1;
        }
 
-       sr_free_wu(sd);
-       sr_free_ccb(sd);
+       sr_wu_free(sd);
+       sr_ccb_free(sd);
 
-       if (sd->sd_meta)
-               free(sd->sd_meta, M_DEVBUF);
+       pool_destroy(&sd->mds.mdd_crypto.sr_uiopl);
+       pool_destroy(&sd->mds.mdd_crypto.sr_iovpl);
 
        rv = 0;
        return (rv);
@@ -570,11 +592,11 @@ sr_crypto_rw2(struct sr_workunit *wu, struct cryptop *crp)
 
        wu->swu_io_count = 1;
 
-       ccb = sr_get_ccb(sd);
+       ccb = sr_ccb_get(sd);
        if (!ccb) {
                /* should never happen but handle more gracefully */
                printf("%s: %s: too many ccbs queued\n",
-                   DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname);
+                   DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
                goto bad;
        }
 
@@ -608,7 +630,7 @@ sr_crypto_rw2(struct sr_workunit *wu, struct cryptop *crp)
 
         DNPRINTF(SR_D_DIS, "%s: %s: sr_crypto_rw2: b_bcount: %d "
             "b_blkno: %x b_flags 0x%0x b_data %p\n",
-            DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
+            DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
             ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
             ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
 
@@ -623,7 +645,7 @@ queued:
        splx(s);
        return (0);
 bad:
-       /* wu is unwound by sr_put_wu */
+       /* wu is unwound by sr_wu_put */
        if (crp)
                crp->crp_etype = EINVAL;
        return (1);
@@ -740,7 +762,7 @@ sr_crypto_finish_io(struct sr_workunit *wu)
        }
 
        /* do not change the order of these 2 functions */
-       sr_put_wu(wu);
+       sr_wu_put(wu);
        scsi_done(xs);
 
        if (sd->sd_sync && sd->sd_wu_pending == 0)
index d899468..9fa1fed 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraid_raid0.c,v 1.8 2008/02/05 16:49:25 marco Exp $ */
+/* $OpenBSD: softraid_raid0.c,v 1.9 2008/07/19 22:41:58 marco Exp $ */
 /*
  * Copyright (c) 2008 Marco Peereboom <marco@peereboom.us>
  *
@@ -55,14 +55,14 @@ sr_raid0_alloc_resources(struct sr_discipline *sd)
        DNPRINTF(SR_D_DIS, "%s: sr_raid0_alloc_resources\n",
            DEVNAME(sd->sd_sc));
 
-       if (sr_alloc_wu(sd))
+       if (sr_wu_alloc(sd))
                goto bad;
-       if (sr_alloc_ccb(sd))
+       if (sr_ccb_alloc(sd))
                goto bad;
 
        /* setup runtime values */
        sd->mds.mdd_raid0.sr0_strip_bits =
-           sr_validate_stripsize(sd->sd_vol.sv_meta.svm_strip_size);
+           sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
        if (sd->mds.mdd_raid0.sr0_strip_bits == -1)
                goto bad;
 
@@ -82,11 +82,8 @@ sr_raid0_free_resources(struct sr_discipline *sd)
        DNPRINTF(SR_D_DIS, "%s: sr_raid0_free_resources\n",
            DEVNAME(sd->sd_sc));
 
-       sr_free_wu(sd);
-       sr_free_ccb(sd);
-
-       if (sd->sd_meta)
-               free(sd->sd_meta, M_DEVBUF);
+       sr_wu_free(sd);
+       sr_ccb_free(sd);
 
        rv = 0;
        return (rv);
@@ -98,8 +95,8 @@ sr_raid0_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
        int                     old_state, s;
 
        DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
-           DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
-           sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, c, new_state);
+           DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
+           sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
 
        /* ok to go to splbio since this only happens in error path */
        s = splbio();
@@ -125,8 +122,8 @@ die:
                splx(s); /* XXX */
                panic("%s: %s: %s: invalid chunk state transition "
                    "%d -> %d\n", DEVNAME(sd->sd_sc),
-                   sd->sd_vol.sv_meta.svm_devname,
-                   sd->sd_vol.sv_chunks[c]->src_meta.scm_devname,
+                   sd->sd_meta->ssd_devname,
+                   sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
                    old_state, new_state);
                /* NOTREACHED */
        }
@@ -135,7 +132,7 @@ die:
        sd->sd_set_vol_state(sd);
 
        sd->sd_must_flush = 1;
-       workq_add_task(NULL, 0, sr_save_metadata_callback, sd, NULL);
+       workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
 done:
        splx(s);
 }
@@ -145,12 +142,12 @@ sr_raid0_set_vol_state(struct sr_discipline *sd)
 {
        int                     states[SR_MAX_STATES];
        int                     new_state, i, s, nd;
-       int                     old_state = sd->sd_vol.sv_meta.svm_status;
+       int                     old_state = sd->sd_vol_status;
 
        DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
-           DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname);
+           DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
 
-       nd = sd->sd_vol.sv_meta.svm_no_chunk;
+       nd = sd->sd_meta->ssdi.ssd_chunk_no;
 
        for (i = 0; i < SR_MAX_STATES; i++)
                states[i] = 0;
@@ -160,8 +157,8 @@ sr_raid0_set_vol_state(struct sr_discipline *sd)
                if (s > SR_MAX_STATES)
                        panic("%s: %s: %s: invalid chunk state",
                            DEVNAME(sd->sd_sc),
-                           sd->sd_vol.sv_meta.svm_devname,
-                           sd->sd_vol.sv_chunks[i]->src_meta.scm_devname);
+                           sd->sd_meta->ssd_devname,
+                           sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
                states[s]++;
        }
 
@@ -171,7 +168,7 @@ sr_raid0_set_vol_state(struct sr_discipline *sd)
                new_state = BIOC_SVOFFLINE;
 
        DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
-           DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
+           DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
            old_state, new_state);
 
        switch (old_state) {
@@ -190,12 +187,12 @@ sr_raid0_set_vol_state(struct sr_discipline *sd)
 die:
                panic("%s: %s: invalid volume state transition "
                    "%d -> %d\n", DEVNAME(sd->sd_sc),
-                   sd->sd_vol.sv_meta.svm_devname,
+                   sd->sd_meta->ssd_devname,
                    old_state, new_state);
                /* NOTREACHED */
        }
 
-       sd->sd_vol.sv_meta.svm_status = new_state;
+       sd->sd_vol_status = new_state;
 }
 
 int
@@ -215,12 +212,12 @@ sr_raid0_rw(struct sr_workunit *wu)
        if (sr_validate_io(wu, &blk, "sr_raid0_rw"))
                goto bad;
 
-       strip_size = sd->sd_vol.sv_meta.svm_strip_size;
+       strip_size = sd->sd_meta->ssdi.ssd_strip_size;
        strip_bits = sd->mds.mdd_raid0.sr0_strip_bits;
-       no_chunk = sd->sd_vol.sv_meta.svm_no_chunk;
+       no_chunk = sd->sd_meta->ssdi.ssd_chunk_no;
 
        DNPRINTF(SR_D_DIS, "%s: %s: front end io: lba %lld size %d\n",
-           DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
+           DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
            blk, xs->datalen);
 
        /* all offs are in bytes */
@@ -238,16 +235,16 @@ sr_raid0_rw(struct sr_workunit *wu)
                /* make sure chunk is online */
                scp = sd->sd_vol.sv_chunks[chunk];
                if (scp->src_meta.scm_status != BIOC_SDONLINE) {
-                       sr_put_ccb(ccb);
+                       sr_ccb_put(ccb);
                        goto bad;
                }
 
-               ccb = sr_get_ccb(sd);
+               ccb = sr_ccb_get(sd);
                if (!ccb) {
                        /* should never happen but handle more gracefully */
                        printf("%s: %s: too many ccbs queued\n",
                            DEVNAME(sd->sd_sc),
-                           sd->sd_vol.sv_meta.svm_devname);
+                           sd->sd_meta->ssd_devname);
                        goto bad;
                }
 
@@ -255,7 +252,7 @@ sr_raid0_rw(struct sr_workunit *wu)
                    "strip_no: %lld chunk: %lld stripoffs: %lld "
                    "chunkoffs: %lld physoffs: %lld length: %lld "
                    "leftover: %lld data: %p\n",
-                   DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, lbaoffs,
+                   DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, lbaoffs,
                    strip_no, chunk, stripoffs, chunkoffs, physoffs, length,
                    leftover, data);
 
@@ -279,7 +276,7 @@ sr_raid0_rw(struct sr_workunit *wu)
 
                DNPRINTF(SR_D_DIS, "%s: %s: sr_raid0: b_bcount: %d "
                    "b_blkno: %lld b_flags 0x%0x b_data %p\n",
-                   DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
+                   DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
                    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
                    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
 
@@ -306,7 +303,7 @@ queued:
        splx(s);
        return (0);
 bad:
-       /* wu is unwound by sr_put_wu */
+       /* wu is unwound by sr_wu_put */
        return (1);
 }
 
@@ -385,7 +382,7 @@ sr_raid0_intr(struct buf *bp)
                            DEVNAME(sc), wu);
 
                /* do not change the order of these 2 functions */
-               sr_put_wu(wu);
+               sr_wu_put(wu);
                scsi_done(xs);
 
                if (sd->sd_sync && sd->sd_wu_pending == 0)
@@ -397,7 +394,7 @@ sr_raid0_intr(struct buf *bp)
 bad:
        xs->error = XS_DRIVER_STUFFUP;
        xs->flags |= ITSDONE;
-       sr_put_wu(wu);
+       sr_wu_put(wu);
        scsi_done(xs);
        splx(s);
 }
index 85a789f..a89294c 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraid_raid1.c,v 1.5 2008/02/05 16:49:25 marco Exp $ */
+/* $OpenBSD: softraid_raid1.c,v 1.6 2008/07/19 22:41:58 marco Exp $ */
 /*
  * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
  *
@@ -55,9 +55,9 @@ sr_raid1_alloc_resources(struct sr_discipline *sd)
        DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n",
            DEVNAME(sd->sd_sc));
 
-       if (sr_alloc_wu(sd))
+       if (sr_wu_alloc(sd))
                goto bad;
-       if (sr_alloc_ccb(sd))
+       if (sr_ccb_alloc(sd))
                goto bad;
 
        rv = 0;
@@ -76,11 +76,8 @@ sr_raid1_free_resources(struct sr_discipline *sd)
        DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n",
            DEVNAME(sd->sd_sc));
 
-       sr_free_wu(sd);
-       sr_free_ccb(sd);
-
-       if (sd->sd_meta)
-               free(sd->sd_meta, M_DEVBUF);
+       sr_wu_free(sd);
+       sr_ccb_free(sd);
 
        rv = 0;
        return (rv);
@@ -92,8 +89,8 @@ sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
        int                     old_state, s;
 
        DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
-           DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
-           sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, c, new_state);
+           DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
+           sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
 
        /* ok to go to splbio since this only happens in error path */
        s = splbio();
@@ -148,8 +145,8 @@ die:
                splx(s); /* XXX */
                panic("%s: %s: %s: invalid chunk state transition "
                    "%d -> %d\n", DEVNAME(sd->sd_sc),
-                   sd->sd_vol.sv_meta.svm_devname,
-                   sd->sd_vol.sv_chunks[c]->src_meta.scm_devname,
+                   sd->sd_meta->ssd_devname,
+                   sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
                    old_state, new_state);
                /* NOTREACHED */
        }
@@ -158,7 +155,7 @@ die:
        sd->sd_set_vol_state(sd);
 
        sd->sd_must_flush = 1;
-       workq_add_task(NULL, 0, sr_save_metadata_callback, sd, NULL);
+       workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL);
 done:
        splx(s);
 }
@@ -168,12 +165,12 @@ sr_raid1_set_vol_state(struct sr_discipline *sd)
 {
        int                     states[SR_MAX_STATES];
        int                     new_state, i, s, nd;
-       int                     old_state = sd->sd_vol.sv_meta.svm_status;
+       int                     old_state = sd->sd_vol_status;
 
        DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
-           DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname);
+           DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
 
-       nd = sd->sd_vol.sv_meta.svm_no_chunk;
+       nd = sd->sd_meta->ssdi.ssd_chunk_no;
 
        for (i = 0; i < SR_MAX_STATES; i++)
                states[i] = 0;
@@ -183,8 +180,8 @@ sr_raid1_set_vol_state(struct sr_discipline *sd)
                if (s > SR_MAX_STATES)
                        panic("%s: %s: %s: invalid chunk state",
                            DEVNAME(sd->sd_sc),
-                           sd->sd_vol.sv_meta.svm_devname,
-                           sd->sd_vol.sv_chunks[i]->src_meta.scm_devname);
+                           sd->sd_meta->ssd_devname,
+                           sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
                states[s]++;
        }
 
@@ -207,7 +204,7 @@ sr_raid1_set_vol_state(struct sr_discipline *sd)
        }
 
        DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
-           DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
+           DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
            old_state, new_state);
 
        switch (old_state) {
@@ -274,12 +271,12 @@ sr_raid1_set_vol_state(struct sr_discipline *sd)
 die:
                panic("%s: %s: invalid volume state transition "
                    "%d -> %d\n", DEVNAME(sd->sd_sc),
-                   sd->sd_vol.sv_meta.svm_devname,
+                   sd->sd_meta->ssd_devname,
                    old_state, new_state);
                /* NOTREACHED */
        }
 
-       sd->sd_vol.sv_meta.svm_status = new_state;
+       sd->sd_vol_status = new_state;
 }
 
 int
@@ -302,16 +299,16 @@ sr_raid1_rw(struct sr_workunit *wu)
        if (xs->flags & SCSI_DATA_IN)
                ios = 1;
        else
-               ios = sd->sd_vol.sv_meta.svm_no_chunk;
+               ios = sd->sd_meta->ssdi.ssd_chunk_no;
        wu->swu_io_count = ios;
 
        for (i = 0; i < ios; i++) {
-               ccb = sr_get_ccb(sd);
+               ccb = sr_ccb_get(sd);
                if (!ccb) {
                        /* should never happen but handle more gracefully */
                        printf("%s: %s: too many ccbs queued\n",
                            DEVNAME(sd->sd_sc),
-                           sd->sd_vol.sv_meta.svm_devname);
+                           sd->sd_meta->ssd_devname);
                        goto bad;
                }
 
@@ -337,7 +334,7 @@ sr_raid1_rw(struct sr_workunit *wu)
 ragain:
                        /* interleave reads */
                        x = sd->mds.mdd_raid1.sr1_counter++ %
-                           sd->sd_vol.sv_meta.svm_no_chunk;
+                           sd->sd_meta->ssdi.ssd_chunk_no;
                        scp = sd->sd_vol.sv_chunks[x];
                        switch (scp->src_meta.scm_status) {
                        case BIOC_SDONLINE:
@@ -348,7 +345,7 @@ ragain:
                        case BIOC_SDOFFLINE:
                        case BIOC_SDREBUILD:
                        case BIOC_SDHOTSPARE:
-                               if (rt++ < sd->sd_vol.sv_meta.svm_no_chunk)
+                               if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
                                        goto ragain;
 
                                /* FALLTHROUGH */
@@ -356,7 +353,7 @@ ragain:
                                /* volume offline */
                                printf("%s: is offline, can't read\n",
                                    DEVNAME(sd->sd_sc));
-                               sr_put_ccb(ccb);
+                               sr_ccb_put(ccb);
                                goto bad;
                        }
                } else {
@@ -373,7 +370,7 @@ ragain:
                        case BIOC_SDHOTSPARE: /* should never happen */
                        case BIOC_SDOFFLINE:
                                wu->swu_io_count--;
-                               sr_put_ccb(ccb);
+                               sr_ccb_put(ccb);
                                continue;
 
                        default:
@@ -391,7 +388,7 @@ ragain:
 
                DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d "
                    "b_blkno: %x b_flags 0x%0x b_data %p\n",
-                   DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname,
+                   DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
                    ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno,
                    ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data);
        }
@@ -415,7 +412,7 @@ queued:
        splx(s);
        return (0);
 bad:
-       /* wu is unwound by sr_put_wu */
+       /* wu is unwound by sr_wu_put */
        return (1);
 }
 
@@ -465,7 +462,7 @@ sr_raid1_intr(struct buf *bp)
                        if (xs->flags & SCSI_DATA_IN) {
                                printf("%s: retrying read on block %lld\n",
                                    DEVNAME(sc), ccb->ccb_buf.b_blkno);
-                               sr_put_ccb(ccb);
+                               sr_ccb_put(ccb);
                                TAILQ_INIT(&wu->swu_ccb);
                                wu->swu_state = SR_WU_RESTART;
                                if (sd->sd_scsi_rw(wu))
@@ -513,7 +510,7 @@ sr_raid1_intr(struct buf *bp)
                            DEVNAME(sc), wu);
 
                /* do not change the order of these 2 functions */
-               sr_put_wu(wu);
+               sr_wu_put(wu);
                scsi_done(xs);
 
                if (sd->sd_sync && sd->sd_wu_pending == 0)
@@ -526,7 +523,7 @@ retry:
 bad:
        xs->error = XS_DRIVER_STUFFUP;
        xs->flags |= ITSDONE;
-       sr_put_wu(wu);
+       sr_wu_put(wu);
        scsi_done(xs);
        splx(s);
 }
@@ -544,7 +541,7 @@ sr_raid1_recreate_wu(struct sr_workunit *wu)
                /* toss all ccbs */
                while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) {
                        TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link);
-                       sr_put_ccb(ccb);
+                       sr_ccb_put(ccb);
                }
                TAILQ_INIT(&wup->swu_ccb);
 
index 0b40b5c..1b87e3e 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: softraidvar.h,v 1.60 2008/06/25 17:43:09 thib Exp $ */
+/* $OpenBSD: softraidvar.h,v 1.61 2008/07/19 22:41:58 marco Exp $ */
 /*
  * Copyright (c) 2006 Marco Peereboom <marco@peereboom.us>
  * Copyright (c) 2008 Chris Kuethe <ckuethe@openbsd.org>
 #ifndef SOFTRAIDVAR_H
 #define SOFTRAIDVAR_H
 
+#include <crypto/md5.h>
+
 #define SR_UUID_MAX            4
 struct sr_uuid {
        u_int32_t               sui_id[SR_UUID_MAX];
 } __packed;
 
-#define SR_META_SIZE           32      /* save space at chunk beginning */
+#define SR_META_SIZE           64      /* save space at chunk beginning */
 #define SR_META_OFFSET         16      /* skip 8192 bytes at chunk beginning */
-#define SR_META_VERSION                1       /* bump when sr_metadata changes */
+#define SR_META_VERSION                2       /* bump when sr_metadata changes */
 struct sr_metadata {
-       /* do not change order of ssd_magic, ssd_version & ssd_checksum */
-       u_int64_t               ssd_magic;      /* magic id */
+       struct sr_meta_invariant {
+               /* do not change order of ssd_magic, ssd_version */
+               u_int64_t       ssd_magic;      /* magic id */
 #define        SR_MAGIC                0x4d4152436372616dLLU
-       u_int8_t                ssd_version;    /* meta data version */
-       u_int8_t                ssd_pad1[3];
-       u_int32_t               ssd_flags;      /* flags */
-
-       /* meta-data */
-       u_int32_t               ssd_checksum;   /* xor of the structure */
-       u_int32_t               ssd_size;       /* sizeof(sr_metadata) */
-       u_int32_t               ssd_ondisk;     /* on disk version counter */
-       u_int32_t               ssd_pad2;
-       struct sr_uuid          ssd_uuid;       /* unique identifier */
-
-       /* virtual disk data */
-       u_int32_t               ssd_vd_ver;     /* vd structure version */
-       u_int32_t               ssd_vd_size;    /* vd structure size */
-       u_int32_t               ssd_vd_volid;   /* volume id */
-       u_int32_t               ssd_vd_chk;     /* vd structure xor */
-
-       /* chunk data */
-       u_int32_t               ssd_chunk_ver;  /* chunk structure version */
-       u_int32_t               ssd_chunk_no;   /* number of chunks */
-       u_int32_t               ssd_chunk_size; /* chunk structure size */
-       u_int32_t               ssd_chunk_id;   /* chunk identifier */
-       u_int32_t               ssd_chunk_chk;  /* chunk structure xor */
-       u_int32_t               ssd_pad3;
-
-       /* optional metadata */
-       u_int32_t               ssd_opt_ver;    /* optinal meta version */
-       u_int32_t               ssd_opt_no;     /* nr of optional md elements */
-       u_int32_t               ssd_opt_size;   /* sizeof optional metadata */
-       u_int32_t               ssd_opt_chk;    /* optional metadata xor */
-} __packed;
-
-#define SR_VOL_VERSION 2       /* bump when sr_vol_meta changes */
-struct sr_vol_meta {
-       u_int32_t               svm_volid;      /* volume id */
-       u_int32_t               svm_status;     /* use bioc_vol status */
-       u_int32_t               svm_flags;      /* flags */
-#define        SR_VOL_DIRTY            0x01
-       u_int32_t               svm_level;      /* raid level */
-       int64_t                 svm_size;       /* virt disk size in blocks */
-       char                    svm_devname[32];/* /dev/XXXXX */
-       char                    svm_vendor[8];  /* scsi vendor */
-       char                    svm_product[16];/* scsi product */
-       char                    svm_revision[4];/* scsi revision */
-       u_int32_t               svm_no_chunk;   /* number of chunks */
-       struct sr_uuid          svm_uuid;       /* volume unique identifier */
-
-       /* optional members */
-       u_int32_t               svm_strip_size; /* strip size */
+               u_int32_t       ssd_version;    /* meta data version */
+               u_int32_t       ssd_flags;
+               struct sr_uuid  ssd_uuid;       /* unique identifier */
+
+               /* chunks */
+               u_int32_t       ssd_chunk_no;   /* number of chunks */
+               u_int32_t       ssd_chunk_id;   /* chunk identifier */
+
+               /* optional */
+               u_int32_t       ssd_opt_no;     /* nr of optional md * elements */
+               u_int32_t       ssd_pad;
+
+               /* volume metadata */
+               u_int32_t       ssd_volid;      /* volume id */
+               u_int32_t       ssd_level;      /* raid level */
+               int64_t         ssd_size;       /* virt disk size in blocks */
+               char            ssd_vendor[8];  /* scsi vendor */
+               char            ssd_product[16];/* scsi product */
+               char            ssd_revision[4];/* scsi revision */
+               /* optional volume members */
+               u_int32_t       ssd_strip_size; /* strip size */
+       } _sdd_invariant;
+#define ssdi                   _sdd_invariant
+       /* MD5 of invariant metadata */
+       u_int8_t                ssd_checksum[MD5_DIGEST_LENGTH];
+       char                    ssd_devname[32];/* /dev/XXXXX */
+       u_int32_t               ssd_meta_flags;
+#define        SR_META_DIRTY           0x1
+       u_int32_t               ssd_pad;
+       u_int64_t               ssd_ondisk;     /* on disk version counter */
 } __packed;
 
-#define SR_CHUNK_VERSION       1       /* bump when sr_chunk_meta changes */
-struct sr_chunk_meta {
-       u_int32_t               scm_volid;      /* vd we belong to */
-       u_int32_t               scm_chunk_id;   /* chunk id */
+struct sr_meta_chunk {
+       struct sr_meta_chunk_invariant {
+               u_int32_t       scm_volid;      /* vd we belong to */
+               u_int32_t       scm_chunk_id;   /* chunk id */
+               char            scm_devname[32];/* /dev/XXXXX */
+               int64_t         scm_size;       /* size of partition in blocks*/
+               int64_t         scm_coerced_size; /* coerced sz of part in blk*/
+               struct sr_uuid  scm_uuid;       /* unique identifier */
+       } _scm_invariant;
+#define scmi                   _scm_invariant
+       /* MD5 of invariant chunk metadata */
+       u_int8_t                scm_checksum[MD5_DIGEST_LENGTH];
        u_int32_t               scm_status;     /* use bio bioc_disk status */
-       u_int32_t               scm_pad1;
-       char                    scm_devname[32];/* /dev/XXXXX */
-       int64_t                 scm_size;       /* size of partition in blocks*/
-       int64_t                 scm_coerced_size; /* coerced sz of part in blk*/
-       struct sr_uuid          scm_uuid;       /* unique identifier */
 } __packed;
 
-#define SR_CRYPTO_MAXKEYBYTES  32
-#define SR_CRYPTO_MAXKEYS      32
+#define SR_CRYPTO_MAXKEYBYTES  32      /* max bytes in a key (AES-XTS-256) */
+#define SR_CRYPTO_MAXKEYS      32      /* max keys per volume */
 #define SR_CRYPTO_KEYBITS      512     /* AES-XTS with 2 * 256 bit keys */
 #define SR_CRYPTO_KEYBYTES     (SR_CRYPTO_KEYBITS >> 3)
-#define SR_CRYPTO_KDFHINTBYTES 256
-#define SR_CRYPTO_CHECKBYTES   64
+#define SR_CRYPTO_KDFHINTBYTES 256     /* size of opaque KDF hint */
+#define SR_CRYPTO_CHECKBYTES   64      /* size of generic key chksum struct */
 #define SR_CRYPTO_KEY_BLKSHIFT 30      /* 0.5TB per key */
 
+/*
+ * Check that HMAC-SHA1_k(decrypted scm_key) == sch_mac, where
+ * k = SHA1(masking key)
+ */
+struct sr_crypto_chk_hmac_sha1 {
+       u_int8_t        sch_mac[20];
+} __packed;
+
+struct sr_meta_crypto {
+       u_int32_t               scm_alg;        /* vol crypto algorithm */
+#define SR_CRYPTOA_AES_XTS_128 1
+#define SR_CRYPTOA_AES_XTS_256 2
+       u_int32_t               scm_flags;      /* key & kdfhint valid */
+#define SR_CRYPTOF_INVALID     (0)
+#define SR_CRYPTOF_KEY         (1<<0)
+#define SR_CRYPTOF_KDFHINT     (1<<1)
+       u_int32_t               scm_mask_alg;   /* disk key masking crypt alg */
+#define SR_CRYPTOM_AES_ECB_256 1
+       u_int32_t               scm_pad1;
+       u_int8_t                scm_reserved[64];
+
+       /* symmetric keys used for disk encryption */
+       u_int8_t                scm_key[SR_CRYPTO_MAXKEYS][SR_CRYPTO_KEYBYTES];
+       /* hint to kdf algorithm (opaque to kernel) */
+       u_int8_t                scm_kdfhint[SR_CRYPTO_KDFHINTBYTES];
+
+       u_int32_t               scm_check_alg;  /* key chksum algorithm */
+#define SR_CRYPTOC_HMAC_SHA1           1
+       u_int32_t               scm_pad2;
+       union {
+               struct sr_crypto_chk_hmac_sha1  chk_hmac_sha1;
+               u_int8_t                        chk_reserved2[64];
+       }                       _scm_chk;
+#define        chk_hmac_sha1   _scm_chk.chk_hmac_sha1
+} __packed;
+
+struct sr_meta_opt {
+       struct sr_meta_opt_invariant {
+               u_int32_t       som_type;       /* optional type */
+#define SR_OPT_INVALID         0x00
+#define SR_OPT_CRYPTO          0x01
+               u_int32_t       som_pad;
+               union {
+                       struct sr_meta_crypto smm_crypto;
+               }               som_meta;
+       } _som_invariant;
+#define somi                   _som_invariant
+#define somi_crypto            _som_invariant.smm_crypto
+       /* MD5 of invariant optional metadata */
+       u_int8_t                som_checksum[MD5_DIGEST_LENGTH];
+} __packed;
+
+/* this is a generic hint for KDF done in userland, not interpreted by the kernel. */
 struct sr_crypto_genkdf {
        u_int32_t       len;
        u_int32_t       type;
@@ -109,6 +149,7 @@ struct sr_crypto_genkdf {
 #define SR_CRYPTOKDFT_PBKDF2   (1<<0)
 };
 
+/* this is a hint for KDF using PKCS#5.  Not interpreted by the kernel */
 struct sr_crypto_kdf_pbkdf2 {
        u_int32_t       len;
        u_int32_t       type;
@@ -117,13 +158,9 @@ struct sr_crypto_kdf_pbkdf2 {
 };
 
 /*
- * Check that HMAC-SHA1_k(decrypted scm_key) == sch_mac, where
- * k = SHA1(masking key)
+ * this structure is used to copy masking keys and KDF hints from/to userland.
+ * the embedded hint structures are not interpreted by the kernel.
  */
-struct sr_crypto_chk_hmac_sha1 {
-       u_int8_t        sch_mac[20];
-};
-
 struct sr_crypto_kdfinfo {
        u_int32_t       len;
        u_int32_t       flags;
@@ -139,46 +176,11 @@ struct sr_crypto_kdfinfo {
 #define pbkdf2         _kdfhint.pbkdf2
 };
 
-struct sr_crypto_metadata {
-       u_int32_t               scm_alg;
-#define SR_CRYPTOA_AES_XTS_128 1
-#define SR_CRYPTOA_AES_XTS_256 2
-       u_int32_t               scm_flags;
-#define SR_CRYPTOF_INVALID     (0)
-#define SR_CRYPTOF_KEY         (1<<0)
-#define SR_CRYPTOF_KDFHINT     (1<<1)
-       u_int32_t               scm_mask_alg;
-#define SR_CRYPTOM_AES_ECB_256 1
-       u_int8_t                scm_reserved[64];
-
-       u_int8_t                scm_key[SR_CRYPTO_MAXKEYS][SR_CRYPTO_KEYBYTES];
-       u_int8_t                scm_kdfhint[SR_CRYPTO_KDFHINTBYTES];
-
-       u_int32_t               scm_check_alg;
-#define SR_CRYPTOC_HMAC_SHA1           1
-       union {
-               struct sr_crypto_chk_hmac_sha1  chk_hmac_sha1;
-               u_int8_t                        chk_reserved2[64];
-       }                       _scm_chk;
-#define        chk_hmac_sha1   _scm_chk.chk_hmac_sha1
-};
-
-#define SR_OPT_VERSION         1       /* bump when sr_opt_meta changes */
-struct sr_opt_meta {
-       u_int32_t               som_type;
-#define SR_OPT_INVALID         0x00
-#define SR_OPT_CRYPTO          0x01
-       u_int32_t               som_pad;
-       union {
-               struct sr_crypto_metadata smm_crypto;
-       }                       som_meta;
-};
-
 #ifdef _KERNEL
-
 #include <dev/biovar.h>
 
 #include <sys/buf.h>
+#include <sys/pool.h>
 #include <sys/queue.h>
 #include <sys/rwlock.h>
 
@@ -211,6 +213,7 @@ extern u_int32_t            sr_debug;
 #define        SR_MAX_LD               1
 #define        SR_MAX_CMDS             16
 #define        SR_MAX_STATES           7
+#define SR_VM_IGNORE_DIRTY     1
 
 /* forward define to prevent dependency goo */
 struct sr_softc;
@@ -289,15 +292,19 @@ struct sr_raid1 {
 /* CRYPTO */
 #define SR_CRYPTO_NOWU         16
 struct sr_crypto {
-       struct sr_crypto_metadata scr_meta;
+       struct sr_meta_crypto   scr_meta;
 
+       struct pool             sr_uiopl;
+       struct pool             sr_iovpl;
+
+       /* XXX only keep scr_sid over time */
        u_int8_t                scr_key[SR_CRYPTO_MAXKEYS][SR_CRYPTO_KEYBYTES];
        u_int8_t                scr_maskkey[SR_CRYPTO_MAXKEYBYTES];
        u_int64_t               scr_sid[SR_CRYPTO_MAXKEYS];
 };
 
 struct sr_metadata_list {
-       struct sr_metadata      *sml_metadata;
+       u_int8_t                sml_metadata[SR_META_SIZE * 512];
        dev_t                   sml_mm;
        int                     sml_used;
 
@@ -307,7 +314,8 @@ struct sr_metadata_list {
 SLIST_HEAD(sr_metadata_list_head, sr_metadata_list);
 
 struct sr_chunk {
-       struct sr_chunk_meta    src_meta;       /* chunk meta data */
+       struct sr_meta_chunk    src_meta;       /* chunk meta data */
+       struct sr_meta_opt      src_opt;        /* optional metadata */
 
        /* runtime data */
        dev_t                   src_dev_mm;     /* major/minor */
@@ -323,8 +331,6 @@ struct sr_chunk {
 SLIST_HEAD(sr_chunk_head, sr_chunk);
 
 struct sr_volume {
-       struct sr_vol_meta      sv_meta;        /* meta data */
-
        /* runtime data */
        struct sr_chunk_head    sv_chunk_list;  /* linked list of all chunks */
        struct sr_chunk         **sv_chunks;    /* array to same chunks */
@@ -356,7 +362,9 @@ struct sr_discipline {
 
        /* discipline metadata */
        struct sr_metadata      *sd_meta;       /* in memory copy of metadata */
+       void                    *sd_meta_foreign; /* non native metadata */
        u_int32_t               sd_meta_flags;
+       int                     sd_meta_type;   /* metadata functions */
 
        int                     sd_sync;
        int                     sd_must_flush;
@@ -368,7 +376,7 @@ struct sr_discipline {
 
        /* discipline volume */
        struct sr_volume        sd_vol;         /* volume associated */
-
+       int                     sd_vol_status;  /* runtime vol status */
        /* discipline resources */
        struct sr_ccb           *sd_ccb;
        struct sr_ccb_list      sd_ccb_freeq;
@@ -429,23 +437,19 @@ struct sr_softc {
        struct sr_discipline    *sc_dis[SR_MAXSCSIBUS]; /* scsibus is u_int8_t */
 };
 
-struct pool;
-extern struct pool     sr_uiopl;
-extern struct pool     sr_iovpl;
-
 /* work units & ccbs */
-int                    sr_alloc_ccb(struct sr_discipline *);
-void                   sr_free_ccb(struct sr_discipline *);
-struct sr_ccb          *sr_get_ccb(struct sr_discipline *);
-void                   sr_put_ccb(struct sr_ccb *);
-int                    sr_alloc_wu(struct sr_discipline *);
-void                   sr_free_wu(struct sr_discipline *);
-struct sr_workunit     *sr_get_wu(struct sr_discipline *);
-void                   sr_put_wu(struct sr_workunit *);
+int                    sr_ccb_alloc(struct sr_discipline *);
+void                   sr_ccb_free(struct sr_discipline *);
+struct sr_ccb          *sr_ccb_get(struct sr_discipline *);
+void                   sr_ccb_put(struct sr_ccb *);
+int                    sr_wu_alloc(struct sr_discipline *);
+void                   sr_wu_free(struct sr_discipline *);
+struct sr_workunit     *sr_wu_get(struct sr_discipline *);
+void                   sr_wu_put(struct sr_workunit *);
 
 /* misc functions */
 int32_t                        sr_validate_stripsize(u_int32_t);
-void                   sr_save_metadata_callback(void *, void *);
+void                   sr_meta_save_callback(void *, void *);
 int                    sr_validate_io(struct sr_workunit *, daddr64_t *,
                            char *);
 int                    sr_check_io_collision(struct sr_workunit *);