From 080cddd563f844d3a5b25880c3756acac0c07f3e Mon Sep 17 00:00:00 2001 From: marco Date: Sat, 19 Jul 2008 22:41:58 +0000 Subject: [PATCH] Full rewrite of metadata handling. This fixes power failures and crashes that caused illegal checksums. The new metadata code is more or less ready to deal with other vendor's metadata formats. While here clean up the name space. Fix thib's pool mess by adding removing bad flags in interrupt context. tested on macppc, amd64, i386, sparc64 & hppa sparc64 has issues with crypto however those do not seem to be softraid specific. help from okan@ ckuethe@ Will Backman and others --- sys/dev/softraid.c | 2628 ++++++++++++++++++++----------------- sys/dev/softraid_crypto.c | 68 +- sys/dev/softraid_raid0.c | 63 +- sys/dev/softraid_raid1.c | 65 +- sys/dev/softraidvar.h | 260 ++-- 5 files changed, 1626 insertions(+), 1458 deletions(-) diff --git a/sys/dev/softraid.c b/sys/dev/softraid.c index a60ee5ca196..b727b104ace 100644 --- a/sys/dev/softraid.c +++ b/sys/dev/softraid.c @@ -1,4 +1,4 @@ -/* $OpenBSD: softraid.c,v 1.116 2008/06/25 17:43:09 thib Exp $ */ +/* $OpenBSD: softraid.c,v 1.117 2008/07/19 22:41:58 marco Exp $ */ /* * Copyright (c) 2007 Marco Peereboom * Copyright (c) 2008 Chris Kuethe @@ -64,75 +64,1069 @@ uint32_t sr_debug = 0 ; #endif -void sr_init(void); +int sr_match(struct device *, void *, void *); +void sr_attach(struct device *, struct device *, void *); +int sr_detach(struct device *, int); +int sr_activate(struct device *, enum devact); + +struct cfattach softraid_ca = { + sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, + sr_activate +}; + +struct cfdriver softraid_cd = { + NULL, "softraid", DV_DULL +}; + +/* scsi & discipline */ +int sr_scsi_cmd(struct scsi_xfer *); +void sr_minphys(struct buf *bp); +void sr_copy_internal_data(struct scsi_xfer *, + void *, size_t); +int sr_scsi_ioctl(struct scsi_link *, u_long, + caddr_t, int, struct proc *); +int sr_ioctl(struct device *, u_long, caddr_t); +int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); +int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); +int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); +int sr_ioctl_setstate(struct sr_softc *, + struct bioc_setstate *); +int sr_ioctl_createraid(struct sr_softc *, + struct bioc_createraid *, int); +int sr_ioctl_deleteraid(struct sr_softc *, + struct bioc_deleteraid *); +void sr_chunks_unwind(struct sr_softc *, + struct sr_chunk_head *); +void sr_discipline_free(struct sr_discipline *); +void sr_discipline_shutdown(struct sr_discipline *); + +/* utility functions */ +void sr_shutdown(void *); +void sr_uuid_get(struct sr_uuid *); +void sr_uuid_print(struct sr_uuid *, int); +void sr_checksum_print(u_int8_t *); +void sr_checksum(struct sr_softc *, void *, void *, + u_int32_t); +int sr_boot_assembly(struct sr_softc *); +int sr_already_assembled(struct sr_discipline *); + +/* don't include these on RAMDISK */ +#ifndef SMALL_KERNEL +void sr_sensors_refresh(void *); +int sr_sensors_create(struct sr_discipline *); +void sr_sensors_delete(struct sr_discipline *); +#endif + +/* metadata */ +int sr_meta_probe(struct sr_discipline *, dev_t *, int); +int sr_meta_attach(struct sr_discipline *, int); +void sr_meta_getdevname(struct sr_softc *, dev_t, char *, + int); +int sr_meta_rw(struct sr_discipline *, dev_t, void *, + size_t, daddr64_t, long); +int sr_meta_clear(struct sr_discipline *); +int sr_meta_read(struct sr_discipline *); +int sr_meta_save(struct sr_discipline *, u_int32_t); +int sr_meta_validate(struct sr_discipline *, dev_t, + struct sr_metadata *, void *); +void sr_meta_chunks_create(struct sr_softc *, + struct sr_chunk_head *); +void sr_meta_init(struct sr_discipline *, + struct sr_chunk_head *); + +/* native metadata format */ +int sr_meta_native_bootprobe(struct sr_softc *, + struct device *, struct sr_metadata_list_head *); +#define SR_META_NOTCLAIMED (0) +#define SR_META_CLAIMED (1) +int sr_meta_native_probe(struct sr_softc *, + struct sr_chunk *); +int sr_meta_native_attach(struct sr_discipline *, int); +int sr_meta_native_read(struct sr_discipline *, dev_t, + struct sr_metadata *, void *); +int sr_meta_native_write(struct sr_discipline *, dev_t, + struct sr_metadata *,void *); + +#ifdef SR_DEBUG +void sr_meta_print(struct sr_metadata *); +#else +#define sr_meta_print(m) +#endif + +/* the metadata driver should remain stateless */ +struct sr_meta_driver { + daddr64_t smd_offset; /* metadata location */ + u_int32_t smd_size; /* size of metadata */ + + int (*smd_probe)(struct sr_softc *, + struct sr_chunk *); + int (*smd_attach)(struct sr_discipline *, int); + int (*smd_detach)(struct sr_discipline *); + int (*smd_read)(struct sr_discipline *, dev_t, + struct sr_metadata *, void *); + int (*smd_write)(struct sr_discipline *, dev_t, + struct sr_metadata *, void *); + int (*smd_validate)(struct sr_discipline *, + struct sr_metadata *, void *); +} smd[] = { + { SR_META_OFFSET, SR_META_SIZE * 512, + sr_meta_native_probe, sr_meta_native_attach, NULL, + sr_meta_native_read , sr_meta_native_write, NULL }, +#define SR_META_F_NATIVE 0 + { 0, 0, NULL, NULL, NULL, NULL } +#define SR_META_F_INVALID -1 +}; + +int +sr_meta_attach(struct sr_discipline *sd, int force) +{ + struct sr_softc *sc = sd->sd_sc; + struct sr_chunk_head *cl; + struct sr_chunk *ch_entry; + int rv = 1, i; + + DNPRINTF(SR_D_META, "%s: sr_meta_attach(%d)\n", DEVNAME(sc)); + + /* in memory copy of metadata */ + sd->sd_meta = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO); + if (!sd->sd_meta) { + printf("%s: could not allocate memory for metadata\n", + DEVNAME(sc)); + goto bad; + } + + if (sd->sd_meta_type != SR_META_F_NATIVE) { + /* in memory copy of foreign metadata */ + sd->sd_meta_foreign = malloc(smd[sd->sd_meta_type].smd_size , + M_DEVBUF, M_ZERO); + if (!sd->sd_meta_foreign) { + /* unwind frees sd_meta */ + printf("%s: could not allocate memory for foreign " + "metadata\n", DEVNAME(sc)); + goto bad; + } + } + + if (smd[sd->sd_meta_type].smd_attach(sd, force)) + goto bad; + + /* fill out chunk array */ + cl = &sd->sd_vol.sv_chunk_list; + i = 0; + SLIST_FOREACH(ch_entry, cl, src_link) + sd->sd_vol.sv_chunks[i++] = ch_entry; + + rv = 0; +bad: + return (rv); +} + +int +sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) +{ + struct sr_softc *sc = sd->sd_sc; + struct bdevsw *bdsw; + struct sr_chunk *ch_entry, *ch_prev = NULL; + struct sr_chunk_head *cl; + char devname[32]; + int i, d, type, found, prevf, error; + dev_t dev; + + DNPRINTF(SR_D_META, "%s: sr_meta_probe(%d)\n", DEVNAME(sc), no_chunk); + + if (no_chunk == 0) + goto unwind; + + + cl = &sd->sd_vol.sv_chunk_list; + + for (d = 0, prevf = SR_META_F_INVALID; d < no_chunk; d++) { + dev = dt[d]; + sr_meta_getdevname(sc, dev, devname, sizeof(devname)); + bdsw = bdevsw_lookup(dev); + + /* + * XXX leaving dev open for now; move this to attach and figure + * out the open/close dance for unwind. + */ + error = bdsw->d_open(dev, FREAD | FWRITE , S_IFBLK, curproc); + if (error) { + DNPRINTF(SR_D_META,"%s: sr_meta_probe can't open %s\n", + DEVNAME(sc), devname); + /* XXX device isn't open but will be closed anyway */ + goto unwind; + } + + ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, + M_WAITOK | M_ZERO); + /* keep disks in user supplied order */ + if (ch_prev) + SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); + else + SLIST_INSERT_HEAD(cl, ch_entry, src_link); + ch_prev = ch_entry; + strlcpy(ch_entry->src_devname, devname, + sizeof(ch_entry->src_devname)); + ch_entry->src_dev_mm = dev; + + /* determine if this is a device we understand */ + for (i = 0, found = SR_META_F_INVALID; smd[i].smd_probe; i++) { + type = smd[i].smd_probe(sc, ch_entry); + if (type == SR_META_F_INVALID) + continue; + else { + found = type; + break; + } + } + if (found == SR_META_F_INVALID) + goto unwind; + if (prevf == SR_META_F_INVALID) + prevf = found; + if (prevf != found) { + DNPRINTF(SR_D_META, "%s: prevf != found\n", + DEVNAME(sc)); + goto unwind; + } + } + + return (prevf); +unwind: + return (SR_META_F_INVALID); +} + +void +sr_meta_getdevname(struct sr_softc *sc, dev_t dev, char *buf, int size) +{ + int maj, unit, part; + char *name; + + DNPRINTF(SR_D_META, "%s: sr_meta_getdevname(%p, %d)\n", + DEVNAME(sc), buf, size); + + if (!buf) + return; + + maj = major(dev); + part = DISKPART(dev); + unit = DISKUNIT(dev); + + name = findblkname(maj); + if (name == NULL) + return; + + snprintf(buf, size, "%s%d%c", name, unit, part + 'a'); +} + +int +sr_meta_rw(struct sr_discipline *sd, dev_t dev, void *md, size_t sz, + daddr64_t ofs, long flags) +{ + struct sr_softc *sc = sd->sd_sc; + struct buf b; + int rv = 1; + + DNPRINTF(SR_D_META, "%s: sr_meta_rw(0x%x, %p, %d, %llu 0x%x)\n", + DEVNAME(sc), dev, md, sz, ofs, flags); + + if (md == NULL) { + printf("%s: read invalid metadata pointer\n", sc); + goto done; + } + + bzero(&b, sizeof(b)); + b.b_flags = flags; + b.b_blkno = ofs; + b.b_bcount = sz; + b.b_bufsize = sz; + b.b_resid = sz; + b.b_data = md; + b.b_error = 0; + b.b_proc = curproc; + b.b_dev = dev; + b.b_vp = NULL; + b.b_iodone = NULL; + LIST_INIT(&b.b_dep); + bdevsw_lookup(b.b_dev)->d_strategy(&b); + biowait(&b); + + if (b.b_flags & B_ERROR) { + printf("%s: 0x%x i/o error on block %lld while reading " + "metadata %d\n", sc, dev, b.b_blkno, b.b_error); + goto done; + } + rv = 0; +done: + return (rv); +} + +int +sr_meta_clear(struct sr_discipline *sd) +{ + struct sr_softc *sc = sd->sd_sc; + struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; + struct sr_chunk *ch_entry; + void *m; + int rv = 1; + + DNPRINTF(SR_D_META, "%s: sr_meta_clear\n", DEVNAME(sc)); + + if (sd->sd_meta_type != SR_META_F_NATIVE) { + printf("%s: sr_meta_clear can not clear foreign metadata\n", + DEVNAME(sc)); + goto done; + } + + m = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_WAITOK | M_ZERO); + SLIST_FOREACH(ch_entry, cl, src_link) { + if (sr_meta_native_write(sd, ch_entry->src_dev_mm, m, NULL)) { + /* XXX mark disk offline */ + DNPRINTF(SR_D_META, "%s: sr_meta_clear failed to " + "clear %s\n", ch_entry->src_devname); + rv++; + continue; + } + bzero(&ch_entry->src_meta, sizeof(ch_entry->src_meta)); + bzero(&ch_entry->src_opt, sizeof(ch_entry->src_opt)); + } + + bzero(sd->sd_meta, SR_META_SIZE * 512); + + free(m, M_DEVBUF); + rv = 0; +done: + return (rv); +} + +void +sr_meta_chunks_create(struct sr_softc *sc, struct sr_chunk_head *cl) +{ + struct sr_chunk *ch_entry; + struct sr_uuid uuid; + int cid = 0; + char *name; + u_int64_t max_chunk_sz = 0, min_chunk_sz; + + DNPRINTF(SR_D_META, "%s: sr_meta_chunks_create\n", DEVNAME(sc)); + + sr_uuid_get(&uuid); + + /* fill out stuff and get largest chunk size while looping */ + SLIST_FOREACH(ch_entry, cl, src_link) { + name = ch_entry->src_devname; + ch_entry->src_meta.scmi.scm_size = ch_entry->src_size; + ch_entry->src_meta.scmi.scm_chunk_id = cid++; + ch_entry->src_meta.scm_status = BIOC_SDONLINE; + strlcpy(ch_entry->src_meta.scmi.scm_devname, name, + sizeof(ch_entry->src_meta.scmi.scm_devname)); + bcopy(&uuid, &ch_entry->src_meta.scmi.scm_uuid, + sizeof(ch_entry->src_meta.scmi.scm_uuid)); + + if (ch_entry->src_meta.scmi.scm_size > max_chunk_sz) + max_chunk_sz = ch_entry->src_meta.scmi.scm_size; + } + + /* get smallest chunk size */ + min_chunk_sz = max_chunk_sz; + SLIST_FOREACH(ch_entry, cl, src_link) + if (ch_entry->src_meta.scmi.scm_size < min_chunk_sz) + min_chunk_sz = ch_entry->src_meta.scmi.scm_size; + + /* equalize all sizes */ + SLIST_FOREACH(ch_entry, cl, src_link) + ch_entry->src_meta.scmi.scm_coerced_size = min_chunk_sz; + + /* whine if chunks are not the same size */ + if (min_chunk_sz != max_chunk_sz) + printf("%s: chunk sizes are not equal; up to %llu blocks " + "wasted per chunk\n", + DEVNAME(sc), max_chunk_sz - min_chunk_sz); +} + +void +sr_meta_init(struct sr_discipline *sd, struct sr_chunk_head *cl) +{ + struct sr_softc *sc = sd->sd_sc; + struct sr_metadata *sm = sd->sd_meta; + struct sr_meta_chunk *im_sc; + struct sr_meta_opt *im_so; + int i, chunk_no; + + DNPRINTF(SR_D_META, "%s: sr_meta_init\n", DEVNAME(sc)); + + if (!sm) + return; + + /* initial metadata */ + sm->ssdi.ssd_magic = SR_MAGIC; + sm->ssdi.ssd_version = SR_META_VERSION; + sm->ssd_ondisk = 0; + sm->ssdi.ssd_flags = sd->sd_meta_flags; + /* get uuid from chunk 0 */ + bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scmi.scm_uuid, + &sm->ssdi.ssd_uuid, + sizeof(struct sr_uuid)); + + /* volume is filled in createraid */ + + /* add missing chunk bits */ + chunk_no = sm->ssdi.ssd_chunk_no; + for (i = 0; i < chunk_no; i++) { + im_sc = &sd->sd_vol.sv_chunks[i]->src_meta; + im_sc->scmi.scm_volid = sm->ssdi.ssd_volid; + sr_checksum(sc, im_sc, &im_sc->scm_checksum, + sizeof(struct sr_meta_chunk_invariant)); + + /* carry optional meta also in chunk area */ + im_so = &sd->sd_vol.sv_chunks[i]->src_opt; + bzero(im_so, sizeof(*im_so)); + if (sd->sd_type == SR_MD_CRYPTO) { + sm->ssdi.ssd_opt_no = 1; + im_so->somi.som_type = SR_OPT_CRYPTO; + + /* + * copy encrypted key / passphrase into optional + * metadata area + */ + bcopy(&sd->mds.mdd_crypto.scr_meta, + &im_so->somi.som_meta.smm_crypto, + sizeof(im_so->somi.som_meta.smm_crypto)); + + sr_checksum(sc, im_so, im_so->som_checksum, + sizeof(struct sr_meta_opt_invariant)); + } + } +} + +void +sr_meta_save_callback(void *arg1, void *arg2) +{ + struct sr_discipline *sd = arg1; + int s; + + s = splbio(); + + if (sr_meta_save(arg1, SR_META_DIRTY)) + printf("%s: save metadata failed\n", + DEVNAME(sd->sd_sc)); + + sd->sd_must_flush = 0; + splx(s); +} + +int +sr_meta_save(struct sr_discipline *sd, u_int32_t flags) +{ + struct sr_softc *sc = sd->sd_sc; + struct sr_metadata *sm = sd->sd_meta, *m; + struct sr_meta_driver *s; + struct sr_chunk *src; + struct sr_meta_chunk *cm; + struct sr_workunit wu; + struct sr_meta_opt *om; + int i; + + DNPRINTF(SR_D_META, "%s: sr_meta_save %s\n", + DEVNAME(sc), sd->sd_meta->ssd_devname); + + if (!sm) { + printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); + goto bad; + } + + /* meta scratchpad */ + s = &smd[sd->sd_meta_type]; + m = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO); + if (!m) { + printf("%s: could not allocate metadata scratch area\n", + DEVNAME(sc)); + goto bad; + } + + if (sm->ssdi.ssd_opt_no > 1) + panic("not yet save > 1 optional metadata members"); + + /* from here on out metadata is updated */ +restart: + sm->ssd_ondisk++; + sm->ssd_meta_flags = flags; + bcopy(sm, m, sizeof(*m)); + + for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { + src = sd->sd_vol.sv_chunks[i]; + cm = (struct sr_meta_chunk *)(m + 1); + bcopy(&src->src_meta, cm + i, sizeof(*cm)); + } + + /* optional metadata */ + om = (struct sr_meta_opt *)(cm + i); + for (i = 0; i < sm->ssdi.ssd_opt_no; i++) { + bcopy(&src->src_opt, om + i, sizeof(*om)); + sr_checksum(sc, om, &om->som_checksum, + sizeof(struct sr_meta_opt_invariant)); + } + + for (i = 0; i < sm->ssdi.ssd_chunk_no; i++) { + src = sd->sd_vol.sv_chunks[i]; + + /* skip disks that are offline */ + if (src->src_meta.scm_status == BIOC_SDOFFLINE) + continue; + + /* calculate metdata checksum for correct chunk */ + m->ssdi.ssd_chunk_id = i; + sr_checksum(sc, m, &m->ssd_checksum, + sizeof(struct sr_meta_invariant)); + +#ifdef SR_DEBUG + DNPRINTF(SR_D_META, "%s: sr_meta_save %s: volid: %d " + "chunkid: %d checksum: ", + DEVNAME(sc), src->src_meta.scmi.scm_devname, + m->ssdi.ssd_volid, m->ssdi.ssd_chunk_id); + + if (sr_debug &= SR_D_META) + sr_checksum_print((u_int8_t *)&m->ssd_checksum); + DNPRINTF(SR_D_META, "\n"); + sr_meta_print(m); +#endif + + /* translate and write to disk */ + if (s->smd_write(sd, src->src_dev_mm, m, NULL /* XXX */)) { + printf("%s: could not write metadata to %s\n", + DEVNAME(sc), src->src_devname); + /* restart the meta write */ + src->src_meta.scm_status = BIOC_SDOFFLINE; + /* XXX recalculate volume status */ + goto restart; + } + } + + bzero(&wu, sizeof(wu)); + wu.swu_fake = 1; + wu.swu_dis = sd; + sd->sd_scsi_sync(&wu); + + free(m, M_DEVBUF); + return (0); +bad: + return (1); +} + +int +sr_meta_read(struct sr_discipline *sd) +{ +#ifdef SR_DEBUG + struct sr_softc *sc = sd->sd_sc; +#endif + struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; + struct sr_metadata *sm; + struct sr_chunk *ch_entry; + struct sr_meta_chunk *cp; + struct sr_meta_driver *s; + struct sr_meta_opt *om; + void *fm = NULL; + int no_disk = 0; + + DNPRINTF(SR_D_META, "%s: sr_meta_read\n", DEVNAME(sc)); + + sm = malloc(SR_META_SIZE * 512, M_DEVBUF, M_WAITOK | M_ZERO); + s = &smd[sd->sd_meta_type]; + if (sd->sd_meta_type != SR_META_F_NATIVE) + fm = malloc(s->smd_size , M_DEVBUF, M_WAITOK | M_ZERO); + + cp = (struct sr_meta_chunk *)(sm + 1); + SLIST_FOREACH(ch_entry, cl, src_link) { + /* read and translate */ + if (s->smd_read(sd, ch_entry->src_dev_mm, sm, fm)) { + /* XXX mark disk offline */ + DNPRINTF(SR_D_META, "%s: sr_meta_read failed\n", + DEVNAME(sc)); + continue; + } + + if (sm->ssdi.ssd_magic != SR_MAGIC) { + DNPRINTF(SR_D_META, "%s: sr_meta_read !SR_MAGIC\n", + DEVNAME(sc)); + continue; + } + + /* validate metadata */ + if (sr_meta_validate(sd, ch_entry->src_dev_mm, sm, fm)) { + DNPRINTF(SR_D_META, "%s: invalid metadata\n", + DEVNAME(sc)); + no_disk = -1; + goto done; + } + + /* assume chunk 0 contains metadata */ + if (no_disk == 0) + bcopy(sm, sd->sd_meta, sizeof(*sd->sd_meta)); + + bcopy(cp, &ch_entry->src_meta, sizeof(ch_entry->src_meta)); + + if (sm->ssdi.ssd_opt_no > 1) + panic("not yet read > 1 optional metadata members"); + + if (sm->ssdi.ssd_opt_no) { + om = (struct sr_meta_opt *) ((u_int8_t *)(sm + 1) + + sizeof(struct sr_meta_chunk) * + sm->ssdi.ssd_chunk_no); + bcopy(om, &ch_entry->src_opt, sizeof(ch_entry->src_opt)); + + if (om->somi.som_type == SR_OPT_CRYPTO) { + bcopy(&ch_entry->src_opt.somi.som_meta.smm_crypto, + &sd->mds.mdd_crypto.scr_meta, + sizeof(sd->mds.mdd_crypto.scr_meta)); + } + + } + + cp++; + no_disk++; + } + + free(sm, M_DEVBUF); + if (fm) + free(fm, M_DEVBUF); + +done: + DNPRINTF(SR_D_META, "%s: sr_meta_read found %d parts\n", DEVNAME(sc), + no_disk); + return (no_disk); +} + +int +sr_meta_validate(struct sr_discipline *sd, dev_t dev, struct sr_metadata *sm, + void *fm) +{ + struct sr_softc *sc = sd->sd_sc; + struct sr_meta_driver *s; + struct sr_meta_chunk *mc; + char devname[32]; + int rv = 1; + u_int8_t checksum[MD5_DIGEST_LENGTH]; + + DNPRINTF(SR_D_META, "%s: sr_meta_validate(%p)\n", DEVNAME(sc), sm); + + sr_meta_getdevname(sc, dev, devname, sizeof(devname)); + + s = &smd[sd->sd_meta_type]; + if (sd->sd_meta_type != SR_META_F_NATIVE) + if (s->smd_validate(sd, sm, fm)) { + printf("%s: invalid foreign metadata\n", DEVNAME(sc)); + goto done; + } + + /* + * at this point all foreign metadata has been translated to the native + * format and will be treated just like the native format + */ + + if (sm->ssdi.ssd_version != SR_META_VERSION) { + printf("%s: %s can not read metadata version %d, expected %d\n", + DEVNAME(sc), devname, sm->ssdi.ssd_version, + SR_META_VERSION); + goto done; + } + + sr_checksum(sc, sm, &checksum, sizeof(struct sr_meta_invariant)); + if (bcmp(&checksum, &sm->ssd_checksum, sizeof(checksum))) { + printf("%s: invalid metadata checksum\n", DEVNAME(sc)); + goto done; + } + + /* XXX do other checksums */ + + /* warn if disk changed order */ + mc = (struct sr_meta_chunk *)(sm + 1); + if (strncmp(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname, + sizeof(mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname))) + printf("%s: roaming device %s -> %s\n", DEVNAME(sc), + mc[sm->ssdi.ssd_chunk_id].scmi.scm_devname, devname); + + /* we have meta data on disk */ + DNPRINTF(SR_D_META, "%s: sr_meta_validate valid metadata %s\n", + DEVNAME(sc), devname); + + rv = 0; +done: + return (rv); +} + +int +sr_meta_native_bootprobe(struct sr_softc *sc, struct device *dv, + struct sr_metadata_list_head *mlh) +{ + struct bdevsw *bdsw; + struct disklabel label; + struct sr_metadata *md; + struct sr_discipline *fake_sd; + struct sr_metadata_list *mle; + char devname[32]; + dev_t dev, devr; + int error, i, majdev; + int rv = SR_META_NOTCLAIMED; + + DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe\n", DEVNAME(sc)); + + majdev = findblkmajor(dv); + if (majdev == -1) + goto done; + dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); + bdsw = &bdevsw[majdev]; + + /* + * The devices are being opened with S_IFCHR instead of + * S_IFBLK so that the SCSI mid-layer does not whine when + * media is not inserted in certain devices like zip drives + * and such. + */ + + /* open device */ + error = (*bdsw->d_open)(dev, FREAD, S_IFCHR, curproc); + if (error) { + DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " + "failed\n" , DEVNAME(sc)); + goto done; + } + + /* get disklabel */ + error = (*bdsw->d_ioctl)(dev, DIOCGDINFO, (void *)&label, + FREAD, curproc); + if (error) { + DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe ioctl " + "failed\n", DEVNAME(sc)); + error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc); + goto done; + } + + /* we are done, close device */ + error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc); + if (error) { + DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe close " + "failed\n", DEVNAME(sc)); + goto done; + } + + md = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO); + if (md == NULL) { + printf("%s: not enough memory for metadata buffer\n", + DEVNAME(sc)); + goto done; + } + + /* create fake sd to use utility functions */ + fake_sd = malloc(sizeof(struct sr_discipline) , M_DEVBUF, M_ZERO); + if (fake_sd == NULL) { + printf("%s: not enough memory for fake discipline\n", + DEVNAME(sc)); + goto nosd; + } + fake_sd->sd_sc = sc; + fake_sd->sd_meta_type = SR_META_F_NATIVE; + + for (i = 0; i < MAXPARTITIONS; i++) { + if (label.d_partitions[i].p_fstype != FS_RAID) + continue; + + /* open partition */ + devr = MAKEDISKDEV(majdev, dv->dv_unit, i); + error = (*bdsw->d_open)(devr, FREAD, S_IFCHR, curproc); + if (error) { + DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " + "open failed, partition %d\n", + DEVNAME(sc), i); + continue; + } + + if (sr_meta_native_read(fake_sd, devr, md, NULL)) { + printf("%s: native bootprobe could not read native " + "metadata\n", DEVNAME(sc)); + continue; + } + + /* are we a softraid partition? */ + sr_meta_getdevname(sc, devr, devname, sizeof(devname)); + if (sr_meta_validate(fake_sd, devr, md, NULL) == 0) { + if (md->ssdi.ssd_flags & BIOC_SCNOAUTOASSEMBLE) { + DNPRINTF(SR_D_META, "%s: don't save %s\n", + DEVNAME(sc), devname); + } else { + /* XXX fix M_WAITOK, this is boot time */ + mle = malloc(sizeof(*mle), M_DEVBUF, + M_WAITOK | M_ZERO); + bcopy(md, &mle->sml_metadata, + SR_META_SIZE * 512); + mle->sml_mm = devr; + SLIST_INSERT_HEAD(mlh, mle, sml_link); + rv = SR_META_CLAIMED; + } + } + + /* we are done, close partition */ + error = (*bdsw->d_close)(devr, FREAD, S_IFCHR, curproc); + if (error) { + DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " + "close failed\n", DEVNAME(sc)); + continue; + } + } + + free(fake_sd, M_DEVBUF); +nosd: + free(md, M_DEVBUF); +done: + return (rv); +} + +int +sr_boot_assembly(struct sr_softc *sc) +{ + struct device *dv; + struct sr_metadata_list_head mlh; + struct sr_metadata_list *mle, *mle2; + struct sr_metadata *m1, *m2; + struct bioc_createraid bc; + int rv = 0, no_dev; + dev_t *dt = NULL; + + DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); + + SLIST_INIT(&mlh); + + TAILQ_FOREACH(dv, &alldevs, dv_list) { + if (dv->dv_class != DV_DISK) + continue; + + /* XXX is there a better way of excluding some devices? */ + if (!strncmp(dv->dv_xname, "fd", 2) || + !strncmp(dv->dv_xname, "cd", 2) || + !strncmp(dv->dv_xname, "rx", 2)) + continue; + + /* native softraid uses partitions */ + if (sr_meta_native_bootprobe(sc, dv, &mlh) == SR_META_CLAIMED) + continue; + + /* probe non-native disks */ + } + + /* + * XXX poor mans hack that doesn't keep disks in order and does not + * roam disks correctly. replace this with something smarter that + * orders disks by volid, chunkid and uuid. + */ + dt = malloc(BIOC_CRMAXLEN, M_DEVBUF, M_WAITOK); + SLIST_FOREACH(mle, &mlh, sml_link) { + /* chunk used already? */ + if (mle->sml_used) + continue; + + no_dev = 0; + bzero(dt, BIOC_CRMAXLEN); + SLIST_FOREACH(mle2, &mlh, sml_link) { + /* chunk used already? */ + if (mle2->sml_used) + continue; + + m1 = (struct sr_metadata *)&mle->sml_metadata; + m2 = (struct sr_metadata *)&mle2->sml_metadata; + + /* are we the same volume? */ + if (m1->ssdi.ssd_volid != m2->ssdi.ssd_volid) + continue; + + /* same uuid? */ + if (bcmp(&m1->ssdi.ssd_uuid, &m2->ssdi.ssd_uuid, + sizeof(m1->ssdi.ssd_uuid))) + continue; + + /* sanity */ + if (dt[m2->ssdi.ssd_chunk_id]) { + printf("%s: chunk id already in use; can not " + "assemble volume\n", DEVNAME(sc)); + goto unwind; + } + dt[m2->ssdi.ssd_chunk_id] = mle2->sml_mm; + no_dev++; + mle2->sml_used = 1; + } + if (m1->ssdi.ssd_chunk_no != no_dev) { + printf("%s: not assembling partial disk that used to " + "be volume %d\n", DEVNAME(sc), + m1->ssdi.ssd_volid); + continue; + } + + bzero(&bc, sizeof(bc)); + bc.bc_level = m1->ssdi.ssd_level; + bc.bc_dev_list_len = no_dev * sizeof(dev_t); + bc.bc_dev_list = dt; + bc.bc_flags = BIOC_SCDEVT; + sr_ioctl_createraid(sc, &bc, 0); + rv++; + } + + /* done with metadata */ +unwind: + for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { + mle2 = SLIST_NEXT(mle, sml_link); + free(mle, M_DEVBUF); + } + SLIST_INIT(&mlh); + + if (dt) + free(dt, M_DEVBUF); + + return (rv); +} + +int +sr_meta_native_probe(struct sr_softc *sc, struct sr_chunk *ch_entry) +{ + struct disklabel label; + char *devname; + int error, part; + daddr64_t size; + struct bdevsw *bdsw; + dev_t dev; -int sr_match(struct device *, void *, void *); -void sr_attach(struct device *, struct device *, void *); -int sr_detach(struct device *, int); -int sr_activate(struct device *, enum devact); + DNPRINTF(SR_D_META, "%s: sr_meta_native_probe(%s)\n", + DEVNAME(sc), ch_entry->src_devname); -struct cfattach softraid_ca = { - sizeof(struct sr_softc), sr_match, sr_attach, sr_detach, - sr_activate -}; + dev = ch_entry->src_dev_mm; + devname = ch_entry->src_devname; + bdsw = bdevsw_lookup(dev); + part = DISKPART(dev); -struct cfdriver softraid_cd = { - NULL, "softraid", DV_DULL -}; + /* get disklabel */ + error = bdsw->d_ioctl(dev, DIOCGDINFO, (void *)&label, 0, NULL); + if (error) { + DNPRINTF(SR_D_META, "%s: %s can't obtain disklabel\n", + DEVNAME(sc), devname); + goto unwind; + } -int sr_scsi_cmd(struct scsi_xfer *); -void sr_minphys(struct buf *bp); -void sr_copy_internal_data(struct scsi_xfer *, - void *, size_t); -int sr_scsi_ioctl(struct scsi_link *, u_long, - caddr_t, int, struct proc *); -int sr_ioctl(struct device *, u_long, caddr_t); -int sr_ioctl_inq(struct sr_softc *, struct bioc_inq *); -int sr_ioctl_vol(struct sr_softc *, struct bioc_vol *); -int sr_ioctl_disk(struct sr_softc *, struct bioc_disk *); -int sr_ioctl_setstate(struct sr_softc *, - struct bioc_setstate *); -int sr_ioctl_createraid(struct sr_softc *, - struct bioc_createraid *, int); -int sr_ioctl_deleteraid(struct sr_softc *, - struct bioc_deleteraid *); -int sr_open_chunks(struct sr_softc *, - struct sr_chunk_head *, dev_t *, int); -int sr_read_meta(struct sr_discipline *); -int sr_create_chunk_meta(struct sr_softc *, - struct sr_chunk_head *); -void sr_unwind_chunks(struct sr_softc *, - struct sr_chunk_head *); -void sr_free_discipline(struct sr_discipline *); -void sr_shutdown_discipline(struct sr_discipline *); + /* make sure the partition is of the right type */ + if (label.d_partitions[part].p_fstype != FS_RAID) { + DNPRINTF(SR_D_META, + "%s: %s partition not of type RAID (%d)\n", DEVNAME(sc) , + devname, + label.d_partitions[part].p_fstype); + goto unwind; + } -/* utility functions */ -void sr_shutdown(void *); -void sr_get_uuid(struct sr_uuid *); -void sr_print_uuid(struct sr_uuid *, int); -u_int32_t sr_checksum(char *, u_int32_t *, u_int32_t); -int sr_clear_metadata(struct sr_discipline *); -int sr_save_metadata(struct sr_discipline *, u_int32_t); -int sr_boot_assembly(struct sr_softc *); -int sr_already_assembled(struct sr_discipline *); -int sr_validate_metadata(struct sr_softc *, dev_t, - struct sr_metadata *); + size = DL_GETPSIZE(&label.d_partitions[part]) - + SR_META_SIZE - SR_META_OFFSET; + if (size <= 0) { + DNPRINTF(SR_D_META, "%s: %s partition too small\n", DEVNAME(sc), + devname); + goto unwind; + } + ch_entry->src_size = size; -/* don't include these on RAMDISK */ -#ifndef SMALL_KERNEL -void sr_refresh_sensors(void *); -int sr_create_sensors(struct sr_discipline *); -void sr_delete_sensors(struct sr_discipline *); + DNPRINTF(SR_D_META, "%s: probe found %s size %d\n", DEVNAME(sc), + devname, size); + + return (SR_META_F_NATIVE); +unwind: + DNPRINTF(SR_D_META, "%s: invalid device: %s\n", DEVNAME(sc), + devname ? devname : "nodev"); + return (SR_META_F_INVALID); +} + +int +sr_meta_native_attach(struct sr_discipline *sd, int force) +{ + struct sr_softc *sc = sd->sd_sc; + struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; + struct sr_metadata *md = NULL; + struct sr_chunk *ch_entry; + struct sr_uuid uuid; + int sr, not_sr, rv = 1, d, expected = -1; + + DNPRINTF(SR_D_META, "%s: sr_meta_native_attach\n", DEVNAME(sc)); + + md = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_ZERO); + if (md == NULL) { + printf("%s: not enough memory for metadata buffer\n", + DEVNAME(sc)); + goto bad; + } + + bzero(&uuid, sizeof uuid); + + sr = not_sr = d = 0; + SLIST_FOREACH(ch_entry, cl, src_link) { + if (sr_meta_native_read(sd, ch_entry->src_dev_mm, md, NULL)) { + printf("%s: could not read native metadata\n", + DEVNAME(sc)); + goto bad; + } + + if (md->ssdi.ssd_magic == SR_MAGIC) { + sr++; + if (d == 0) { + bcopy(&md->ssdi.ssd_uuid, &uuid, sizeof uuid); + expected = md->ssdi.ssd_chunk_no; + continue; + } else if (bcmp(&md->ssdi.ssd_uuid, &uuid, + sizeof uuid)) { + printf("%s: not part of the same volume\n", + DEVNAME(sc)); + goto bad; + } + } else + not_sr++; + + d++; + } + + if (sr && not_sr) { + printf("%s: not all chunks are of the native metadata format", + DEVNAME(sc)); + goto bad; + } + if (expected != sr && !force && expected != -1) { + /* XXX make this smart so that we can bring up degraded disks */ + printf("%s: not all chunks were provided\n", DEVNAME(sc)); + goto bad; + } + + rv = 0; +bad: + if (md) + free(md, M_DEVBUF); + return (rv); +} + +int +sr_meta_native_read(struct sr_discipline *sd, dev_t dev, + struct sr_metadata *md, void *fm) +{ +#ifdef SR_DEBUG + struct sr_softc *sc = sd->sd_sc; #endif + DNPRINTF(SR_D_META, "%s: sr_meta_native_read(0x%x, %p)\n", + DEVNAME(sc), dev, md); + + return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, + B_READ)); +} +int +sr_meta_native_write(struct sr_discipline *sd, dev_t dev, + struct sr_metadata *md, void *fm) +{ #ifdef SR_DEBUG -void sr_print_metadata(struct sr_metadata *); -#else -#define sr_print_metadata(m) + struct sr_softc *sc = sd->sd_sc; #endif + DNPRINTF(SR_D_META, "%s: sr_meta_native_write(0x%x, %p)\n", + DEVNAME(sc), dev, md); -struct pool sr_uiopl; -struct pool sr_iovpl; + return (sr_meta_rw(sd, dev, md, SR_META_SIZE * 512, SR_META_OFFSET, + B_WRITE)); +} struct scsi_adapter sr_switch = { sr_scsi_cmd, sr_minphys, NULL, NULL, sr_scsi_ioctl @@ -142,23 +1136,9 @@ struct scsi_device sr_dev = { NULL, NULL, NULL, NULL }; -void -sr_init(void) -{ - pool_init(&sr_uiopl, sizeof(struct uio), 0, 0, 0, "sr_uiopl", NULL); - pool_init(&sr_iovpl, sizeof(struct iovec), 0, 0, 0, "sr_iovpl", NULL); -} - int sr_match(struct device *parent, void *match, void *aux) { - static int called = 0; - - if (!called) { - sr_init(); - called = 1; - } - return (1); } @@ -219,7 +1199,7 @@ sr_copy_internal_data(struct scsi_xfer *xs, void *v, size_t size) } int -sr_alloc_ccb(struct sr_discipline *sd) +sr_ccb_alloc(struct sr_discipline *sd) { struct sr_ccb *ccb; int i; @@ -227,7 +1207,7 @@ sr_alloc_ccb(struct sr_discipline *sd) if (!sd) return (1); - DNPRINTF(SR_D_CCB, "%s: sr_alloc_ccb\n", DEVNAME(sd->sd_sc)); + DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc\n", DEVNAME(sd->sd_sc)); if (sd->sd_ccb) return (1); @@ -238,24 +1218,24 @@ sr_alloc_ccb(struct sr_discipline *sd) for (i = 0; i < sd->sd_max_wu * sd->sd_max_ccb_per_wu; i++) { ccb = &sd->sd_ccb[i]; ccb->ccb_dis = sd; - sr_put_ccb(ccb); + sr_ccb_put(ccb); } - DNPRINTF(SR_D_CCB, "%s: sr_alloc_ccb ccb: %d\n", + DNPRINTF(SR_D_CCB, "%s: sr_ccb_alloc ccb: %d\n", DEVNAME(sd->sd_sc), sd->sd_max_wu * sd->sd_max_ccb_per_wu); return (0); } void -sr_free_ccb(struct sr_discipline *sd) +sr_ccb_free(struct sr_discipline *sd) { struct sr_ccb *ccb; if (!sd) return; - DNPRINTF(SR_D_CCB, "%s: sr_free_ccb %p\n", DEVNAME(sd->sd_sc), sd); + DNPRINTF(SR_D_CCB, "%s: sr_ccb_free %p\n", DEVNAME(sd->sd_sc), sd); while ((ccb = TAILQ_FIRST(&sd->sd_ccb_freeq)) != NULL) TAILQ_REMOVE(&sd->sd_ccb_freeq, ccb, ccb_link); @@ -265,7 +1245,7 @@ sr_free_ccb(struct sr_discipline *sd) } struct sr_ccb * -sr_get_ccb(struct sr_discipline *sd) +sr_ccb_get(struct sr_discipline *sd) { struct sr_ccb *ccb; int s; @@ -280,19 +1260,19 @@ sr_get_ccb(struct sr_discipline *sd) splx(s); - DNPRINTF(SR_D_CCB, "%s: sr_get_ccb: %p\n", DEVNAME(sd->sd_sc), + DNPRINTF(SR_D_CCB, "%s: sr_ccb_get: %p\n", DEVNAME(sd->sd_sc), ccb); return (ccb); } void -sr_put_ccb(struct sr_ccb *ccb) +sr_ccb_put(struct sr_ccb *ccb) { struct sr_discipline *sd = ccb->ccb_dis; int s; - DNPRINTF(SR_D_CCB, "%s: sr_put_ccb: %p\n", DEVNAME(sd->sd_sc), + DNPRINTF(SR_D_CCB, "%s: sr_ccb_put: %p\n", DEVNAME(sd->sd_sc), ccb); s = splbio(); @@ -308,7 +1288,7 @@ sr_put_ccb(struct sr_ccb *ccb) } int -sr_alloc_wu(struct sr_discipline *sd) +sr_wu_alloc(struct sr_discipline *sd) { struct sr_workunit *wu; int i, no_wu; @@ -316,7 +1296,7 @@ sr_alloc_wu(struct sr_discipline *sd) if (!sd) return (1); - DNPRINTF(SR_D_WU, "%s: sr_alloc_wu %p %d\n", DEVNAME(sd->sd_sc), + DNPRINTF(SR_D_WU, "%s: sr_wu_alloc %p %d\n", DEVNAME(sd->sd_sc), sd, sd->sd_max_wu); if (sd->sd_wu) @@ -333,21 +1313,21 @@ sr_alloc_wu(struct sr_discipline *sd) for (i = 0; i < no_wu; i++) { wu = &sd->sd_wu[i]; wu->swu_dis = sd; - sr_put_wu(wu); + sr_wu_put(wu); } return (0); } void -sr_free_wu(struct sr_discipline *sd) +sr_wu_free(struct sr_discipline *sd) { struct sr_workunit *wu; if (!sd) return; - DNPRINTF(SR_D_WU, "%s: sr_free_wu %p\n", DEVNAME(sd->sd_sc), sd); + DNPRINTF(SR_D_WU, "%s: sr_wu_free %p\n", DEVNAME(sd->sd_sc), sd); while ((wu = TAILQ_FIRST(&sd->sd_wu_freeq)) != NULL) TAILQ_REMOVE(&sd->sd_wu_freeq, wu, swu_link); @@ -361,14 +1341,14 @@ sr_free_wu(struct sr_discipline *sd) } void -sr_put_wu(struct sr_workunit *wu) +sr_wu_put(struct sr_workunit *wu) { struct sr_discipline *sd = wu->swu_dis; struct sr_ccb *ccb; int s; - DNPRINTF(SR_D_WU, "%s: sr_put_wu: %p\n", DEVNAME(sd->sd_sc), wu); + DNPRINTF(SR_D_WU, "%s: sr_wu_put: %p\n", DEVNAME(sd->sd_sc), wu); s = splbio(); @@ -385,7 +1365,7 @@ sr_put_wu(struct sr_workunit *wu) while ((ccb = TAILQ_FIRST(&wu->swu_ccb)) != NULL) { TAILQ_REMOVE(&wu->swu_ccb, ccb, ccb_link); - sr_put_ccb(ccb); + sr_ccb_put(ccb); } TAILQ_INIT(&wu->swu_ccb); @@ -396,7 +1376,7 @@ sr_put_wu(struct sr_workunit *wu) } struct sr_workunit * -sr_get_wu(struct sr_discipline *sd) +sr_wu_get(struct sr_discipline *sd) { struct sr_workunit *wu; int s; @@ -412,7 +1392,7 @@ sr_get_wu(struct sr_discipline *sd) splx(s); - DNPRINTF(SR_D_WU, "%s: sr_get_wu: %p\n", DEVNAME(sd->sd_sc), wu); + DNPRINTF(SR_D_WU, "%s: sr_wu_get: %p\n", DEVNAME(sd->sd_sc), wu); return (wu); } @@ -447,11 +1427,11 @@ sr_scsi_cmd(struct scsi_xfer *xs) if (sd->sd_deleted) { printf("%s: %s device is being deleted, failing io\n", - DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname); + DEVNAME(sc), sd->sd_meta->ssd_devname); goto stuffup; } - if ((wu = sr_get_wu(sd)) == NULL) { + if ((wu = sr_wu_get(sd)) == NULL) { DNPRINTF(SR_D_CMD, "%s: sr_scsi_cmd no wu\n", DEVNAME(sc)); return (TRY_AGAIN_LATER); } @@ -537,7 +1517,7 @@ complete: scsi_done(xs); splx(s); if (wu) - sr_put_wu(wu); + sr_wu_put(wu); return (COMPLETE); } int @@ -618,7 +1598,7 @@ sr_ioctl_inq(struct sr_softc *sc, struct bioc_inq *bi) /* XXX this will not work when we stagger disciplines */ if (sc->sc_dis[i]) { vol++; - disk += sc->sc_dis[i]->sd_vol.sv_meta.svm_no_chunk; + disk += sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no; } strlcpy(bi->bi_dev, sc->sc_dev.dv_xname, sizeof(bi->bi_dev)); @@ -632,7 +1612,7 @@ int sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) { int i, vol, rv = EINVAL; - struct sr_volume *sv; + struct sr_discipline *sd; for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { /* XXX this will not work when we stagger disciplines */ @@ -641,14 +1621,14 @@ sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv) if (vol != bv->bv_volid) continue; - sv = &sc->sc_dis[i]->sd_vol; - bv->bv_status = sv->sv_meta.svm_status; - bv->bv_size = sv->sv_meta.svm_size << DEV_BSHIFT; - bv->bv_level = sv->sv_meta.svm_level; - bv->bv_nodisk = sv->sv_meta.svm_no_chunk; - strlcpy(bv->bv_dev, sv->sv_meta.svm_devname, + sd = sc->sc_dis[i]; + bv->bv_status = sd->sd_vol_status; + bv->bv_size = sd->sd_meta->ssdi.ssd_size << DEV_BSHIFT; + bv->bv_level = sd->sd_meta->ssdi.ssd_level; + bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no; + strlcpy(bv->bv_dev, sd->sd_meta->ssd_devname, sizeof(bv->bv_dev)); - strlcpy(bv->bv_vendor, sv->sv_meta.svm_vendor, + strlcpy(bv->bv_vendor, sd->sd_meta->ssdi.ssd_vendor, sizeof(bv->bv_vendor)); rv = 0; break; @@ -671,15 +1651,15 @@ sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *bd) continue; id = bd->bd_diskid; - if (id >= sc->sc_dis[i]->sd_vol.sv_meta.svm_no_chunk) + if (id >= sc->sc_dis[i]->sd_meta->ssdi.ssd_chunk_no) break; src = sc->sc_dis[i]->sd_vol.sv_chunks[id]; bd->bd_status = src->src_meta.scm_status; - bd->bd_size = src->src_meta.scm_size << DEV_BSHIFT; + bd->bd_size = src->src_meta.scmi.scm_size << DEV_BSHIFT; bd->bd_channel = vol; bd->bd_target = id; - strlcpy(bd->bd_vendor, src->src_meta.scm_devname, + strlcpy(bd->bd_vendor, src->src_meta.scmi.scm_devname, sizeof(bd->bd_vendor)); rv = 0; break; @@ -705,7 +1685,7 @@ sr_ioctl_setstate(struct sr_softc *sc, struct bioc_setstate *bs) continue; sd = sc->sc_dis[vol]; - if (bs->bs_target >= sd->sd_vol.sv_meta.svm_no_chunk) + if (bs->bs_target >= sd->sd_meta->ssdi.ssd_chunk_no) goto done; switch (bs->bs_status) { @@ -774,47 +1754,44 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) no_chunk = bc->bc_dev_list_len / sizeof(dev_t); cl = &sd->sd_vol.sv_chunk_list; SLIST_INIT(cl); - if (sr_open_chunks(sc, cl, dt, no_chunk)) - goto unwind; - - /* in memory copy of metadata */ - sd->sd_meta = malloc(SR_META_SIZE * 512 , M_DEVBUF, M_WAITOK | M_ZERO); /* we have a valid list now create an array index */ sd->sd_vol.sv_chunks = malloc(sizeof(struct sr_chunk *) * no_chunk, M_DEVBUF, M_WAITOK | M_ZERO); + sd->sd_meta_type = sr_meta_probe(sd, dt, no_chunk); + if (sd->sd_meta_type == SR_META_F_INVALID) { + printf("%s: invalid metadata format\n", DEVNAME(sc)); + goto unwind; + } + + if (sr_meta_attach(sd, bc->bc_flags & BIOC_SCFORCE)) { + printf("%s: can't attach metadata type %d\n", DEVNAME(sc), + sd->sd_meta_type); + goto unwind; + } + /* force the raid volume by clearing metadata region */ if (bc->bc_flags & BIOC_SCFORCE) { /* make sure disk isn't up and running */ - if (sr_read_meta(sd)) + if (sr_meta_read(sd)) if (sr_already_assembled(sd)) { printf("%s: disk ", DEVNAME(sc)); - sr_print_uuid(&sd->sd_meta->ssd_uuid, 0); + sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); printf(" is currently in use; can't force " "create\n"); goto unwind; } - /* zero out pointers and metadata again to create disk */ - bzero(sd->sd_vol.sv_chunks, - sizeof(struct sr_chunk *) * no_chunk); - bzero(sd->sd_meta, SR_META_SIZE * 512); - - if (sr_clear_metadata(sd)) { + if (sr_meta_clear(sd)) { printf("%s: failed to clear metadata\n", DEVNAME(sc)); goto unwind; } } - if ((no_meta = sr_read_meta(sd)) == 0) { - /* fill out chunk array */ - i = 0; - SLIST_FOREACH(ch_entry, cl, src_link) - sd->sd_vol.sv_chunks[i++] = ch_entry; - + if ((no_meta = sr_meta_read(sd)) == 0) { /* fill out all chunk metadata */ - sr_create_chunk_meta(sc, cl); + sr_meta_chunks_create(sc, cl); ch_entry = SLIST_FIRST(cl); /* no metadata available */ @@ -830,13 +1807,13 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) */ strip_size = MAXPHYS; vol_size = - ch_entry->src_meta.scm_coerced_size * no_chunk; + ch_entry->src_meta.scmi.scm_coerced_size * no_chunk; break; case 1: if (no_chunk < 2) goto unwind; strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name)); - vol_size = ch_entry->src_meta.scm_coerced_size; + vol_size = ch_entry->src_meta.scmi.scm_coerced_size; break; #ifdef CRYPTO case 'C': @@ -861,7 +1838,7 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) goto unwind; strlcpy(sd->sd_name, "CRYPTO", sizeof(sd->sd_name)); - vol_size = ch_entry->src_meta.scm_size; + vol_size = ch_entry->src_meta.scmi.scm_size; sr_crypto_create_keys(sd); @@ -875,23 +1852,26 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_createraid: vol_size: %lld\n", DEVNAME(sc), vol_size); - sd->sd_vol.sv_meta.svm_no_chunk = no_chunk; - sd->sd_vol.sv_meta.svm_size = vol_size; - sd->sd_vol.sv_meta.svm_status = BIOC_SVONLINE; - sd->sd_vol.sv_meta.svm_level = bc->bc_level; - sd->sd_vol.sv_meta.svm_strip_size = strip_size; - strlcpy(sd->sd_vol.sv_meta.svm_vendor, "OPENBSD", - sizeof(sd->sd_vol.sv_meta.svm_vendor)); - snprintf(sd->sd_vol.sv_meta.svm_product, - sizeof(sd->sd_vol.sv_meta.svm_product), "SR %s", + sd->sd_meta->ssdi.ssd_chunk_no = no_chunk; + sd->sd_meta->ssdi.ssd_size = vol_size; + sd->sd_vol_status = BIOC_SVONLINE; + sd->sd_meta->ssdi.ssd_level = bc->bc_level; + sd->sd_meta->ssdi.ssd_strip_size = strip_size; + strlcpy(sd->sd_meta->ssdi.ssd_vendor, "OPENBSD", + sizeof(sd->sd_meta->ssdi.ssd_vendor)); + snprintf(sd->sd_meta->ssdi.ssd_product, + sizeof(sd->sd_meta->ssdi.ssd_product), "SR %s", sd->sd_name); - snprintf(sd->sd_vol.sv_meta.svm_revision, - sizeof(sd->sd_vol.sv_meta.svm_revision), "%03d", + snprintf(sd->sd_meta->ssdi.ssd_revision, + sizeof(sd->sd_meta->ssdi.ssd_revision), "%03d", SR_META_VERSION); sd->sd_meta_flags = bc->bc_flags & BIOC_SCNOAUTOASSEMBLE; updatemeta = 1; } else if (no_meta == no_chunk) { + if (sd->sd_meta->ssd_meta_flags & SR_META_DIRTY) + printf("%s: %s was not shutdown properly\n", + DEVNAME(sc), sd->sd_meta->ssd_devname); if (user == 0 && sd->sd_meta_flags & BIOC_SCNOAUTOASSEMBLE) { DNPRINTF(SR_D_META, "%s: disk not auto assembled from " "metadata\n", DEVNAME(sc)); @@ -899,7 +1879,7 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) } if (sr_already_assembled(sd)) { printf("%s: disk ", DEVNAME(sc)); - sr_print_uuid(&sd->sd_meta->ssd_uuid, 0); + sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); printf(" already assembled\n"); goto unwind; } @@ -931,10 +1911,14 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) DNPRINTF(SR_D_META, "%s: disk assembled from metadata\n", DEVNAME(sc)); updatemeta = 0; + } else if (no_meta == -1) { + printf("%s: one of the chunks has corrupt metadata; aborting " + "assembly\n", DEVNAME(sc)); + goto unwind; } else { if (sr_already_assembled(sd)) { printf("%s: disk ", DEVNAME(sc)); - sr_print_uuid(&sd->sd_meta->ssd_uuid, 0); + sr_uuid_print(&sd->sd_meta->ssdi.ssd_uuid, 0); printf(" already assembled; will not partial " "assemble it\n"); goto unwind; @@ -950,8 +1934,8 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) /* fill out discipline members */ sd->sd_type = SR_MD_RAID0; sd->sd_max_ccb_per_wu = - (MAXPHYS / sd->sd_vol.sv_meta.svm_strip_size + 1) * - SR_RAID0_NOWU * sd->sd_vol.sv_meta.svm_no_chunk; + (MAXPHYS / sd->sd_meta->ssdi.ssd_strip_size + 1) * + SR_RAID0_NOWU * sd->sd_meta->ssdi.ssd_chunk_no; sd->sd_max_wu = SR_RAID0_NOWU; /* setup discipline pointers */ @@ -1058,16 +2042,29 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) rv = 0; if (updatemeta) { /* fill out remaining volume metadata */ - sd->sd_vol.sv_meta.svm_volid = vol; - strlcpy(sd->sd_vol.sv_meta.svm_devname, dev->dv_xname, - sizeof(sd->sd_vol.sv_meta.svm_devname)); + sd->sd_meta->ssdi.ssd_volid = vol; + strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, + sizeof(sd->sd_meta->ssd_devname)); + sr_meta_init(sd, cl); + } else { + if (strncmp(sd->sd_meta->ssd_devname, dev->dv_xname, + sizeof(dev->dv_xname))) { + printf("%s: volume %s is roaming, it used to be %s, " + "updating metadata\n", + DEVNAME(sc), dev->dv_xname, + sd->sd_meta->ssd_devname); + + sd->sd_meta->ssdi.ssd_volid = vol; + strlcpy(sd->sd_meta->ssd_devname, dev->dv_xname, + sizeof(sd->sd_meta->ssd_devname)); + } } /* save metadata to disk */ - rv = sr_save_metadata(sd, SR_VOL_DIRTY); + rv = sr_meta_save(sd, SR_META_DIRTY); #ifndef SMALL_KERNEL - if (sr_create_sensors(sd)) + if (sr_sensors_create(sd)) printf("%s: unable to create sensor for %s\n", DEVNAME(sc), dev->dv_xname); else @@ -1080,7 +2077,7 @@ sr_ioctl_createraid(struct sr_softc *sc, struct bioc_createraid *bc, int user) return (rv); unwind: - sr_shutdown_discipline(sd); + sr_discipline_shutdown(sd); return (rv); } @@ -1090,331 +2087,39 @@ sr_ioctl_deleteraid(struct sr_softc *sc, struct bioc_deleteraid *dr) { struct sr_discipline *sd = NULL; int rv = 1; - int i; - - DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), - dr->bd_dev); - - for (i = 0; i < SR_MAXSCSIBUS; i++) - if (sc->sc_dis[i]) { - if (!strncmp(sc->sc_dis[i]->sd_vol.sv_meta.svm_devname, dr->bd_dev, - sizeof(sc->sc_dis[i]->sd_vol.sv_meta.svm_devname))) { - sd = sc->sc_dis[i]; - break; - } - } - - if (sd == NULL) - goto bad; - - sd->sd_deleted = 1; - sd->sd_meta->ssd_flags = BIOC_SCNOAUTOASSEMBLE; - sr_shutdown(sd); - - rv = 0; -bad: - return (rv); -} - -int -sr_open_chunks(struct sr_softc *sc, struct sr_chunk_head *cl, dev_t *dt, - int no_chunk) -{ - struct sr_chunk *ch_entry, *ch_prev = NULL; - struct disklabel label; - struct bdevsw *bdsw; - char *name; - int maj, unit, part, i, error; - daddr64_t size; - dev_t dev; - - DNPRINTF(SR_D_IOCTL, "%s: sr_open_chunks(%d)\n", DEVNAME(sc), no_chunk); - - /* fill out chunk list */ - for (i = 0; i < no_chunk; i++) { - ch_entry = malloc(sizeof(struct sr_chunk), M_DEVBUF, - M_WAITOK | M_ZERO); - /* keep disks in user supplied order */ - if (ch_prev) - SLIST_INSERT_AFTER(ch_prev, ch_entry, src_link); - else - SLIST_INSERT_HEAD(cl, ch_entry, src_link); - ch_prev = ch_entry; - - dev = dt[i]; - maj = major(dev); - part = DISKPART(dev); - unit = DISKUNIT(dev); - bdsw = &bdevsw[maj]; - - name = findblkname(maj); - if (name == NULL) - goto unwind; - - snprintf(ch_entry->src_devname, sizeof(ch_entry->src_devname), - "%s%d%c", name, unit, part + 'a'); - name = ch_entry->src_devname; - - /* open device */ - error = bdsw->d_open(dev, FREAD | FWRITE , S_IFBLK, curproc); - - /* get disklabel */ - error = bdsw->d_ioctl(dev, DIOCGDINFO, (void *)&label, - 0, NULL); - if (error) { - printf("%s: %s can't obtain disklabel\n", - DEVNAME(sc), name); - bdsw->d_close(dev, FWRITE, S_IFBLK, curproc); - goto unwind; - } - - /* make sure the partition is of the right type */ - if (label.d_partitions[part].p_fstype != FS_RAID) { - printf("%s: %s partition not of type RAID (%d)\n", - DEVNAME(sc), name, - label.d_partitions[part].p_fstype); - bdsw->d_close(dev, FWRITE, S_IFBLK, curproc); - goto unwind; - } - - /* get partition size while accounting for metadata! */ - ch_entry->src_size = size = - DL_GETPSIZE(&label.d_partitions[part]) - - SR_META_SIZE - SR_META_OFFSET; - if (size <= 0) { - printf("%s: %s partition too small\n", - DEVNAME(sc), name); - bdsw->d_close(dev, FWRITE, S_IFBLK, curproc); - goto unwind; - } - - - ch_entry->src_dev_mm = dev; /* major/minor */ - - DNPRINTF(SR_D_IOCTL, "%s: found %s size %d\n", DEVNAME(sc), - name, size); - } - - return (0); -unwind: - printf("%s: invalid device: %s\n", DEVNAME(sc), name ? name : "nodev"); - return (1); -} - -int -sr_read_meta(struct sr_discipline *sd) -{ - struct sr_softc *sc = sd->sd_sc; - struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; - struct sr_metadata *sm = sd->sd_meta, *m; - struct sr_chunk *ch_entry; - struct buf b; - struct sr_vol_meta *mv; - struct sr_chunk_meta *mc; - struct sr_opt_meta *mo; - size_t sz = SR_META_SIZE * 512; - int no_chunk = 0; - u_int32_t volid, ondisk = 0, cid; - - DNPRINTF(SR_D_META, "%s: sr_read_meta\n", DEVNAME(sc)); - - m = malloc(sz , M_DEVBUF, M_WAITOK | M_ZERO); - - SLIST_FOREACH(ch_entry, cl, src_link) { - bzero(&b, sizeof(b)); - - b.b_flags = B_READ; - b.b_blkno = SR_META_OFFSET; - b.b_bcount = sz; - b.b_bufsize = sz; - b.b_resid = sz; - b.b_data = (void *)m; - b.b_error = 0; - b.b_proc = curproc; - b.b_dev = ch_entry->src_dev_mm; - b.b_vp = NULL; - b.b_iodone = NULL; - LIST_INIT(&b.b_dep); - bdevsw_lookup(b.b_dev)->d_strategy(&b); - biowait(&b); - - /* XXX mark chunk offline and restart metadata write */ - if (b.b_flags & B_ERROR) { - printf("%s: %s i/o error on block %lld while reading " - "metadata %d\n", DEVNAME(sc), - ch_entry->src_devname, b.b_blkno, b.b_error); - continue; - } - - if (m->ssd_magic != SR_MAGIC) - continue; - - /* validate metadata */ - if (sr_validate_metadata(sc, ch_entry->src_dev_mm, m)) { - printf("%s: invalid metadata\n", DEVNAME(sc)); - no_chunk = -1; - goto bad; - } - - mv = (struct sr_vol_meta *)(m + 1); - mc = (struct sr_chunk_meta *)(mv + 1); - - /* we asssume that the first chunk has the initial metadata */ - if (no_chunk++ == 0) { - bcopy(m, sm, sz); - bcopy(m, sd->sd_meta, sizeof(*sd->sd_meta)); - bcopy(mv, &sd->sd_vol.sv_meta, - sizeof(sd->sd_vol.sv_meta)); - - volid = m->ssd_vd_volid; - sd->sd_meta_flags = sm->ssd_flags; - } - - if (bcmp(&sm->ssd_uuid, &sd->sd_vol.sv_meta.svm_uuid, - sizeof(struct sr_uuid))) { - printf("%s: %s invalid chunk uuid ", - DEVNAME(sc), ch_entry->src_devname); - sr_print_uuid(&sm->ssd_uuid, 0); - printf(", expected "); - sr_print_uuid(&sd->sd_vol.sv_meta.svm_uuid, 1); - no_chunk = -1; - goto bad; - } - - /* we have meta data on disk */ - ch_entry->src_meta_ondisk = 1; - - /* make sure we are part of this vd */ - if (volid != m->ssd_vd_volid) { - printf("%s: %s invalid volume id %d, expected %d\n", - DEVNAME(sc), ch_entry->src_devname, - volid, m->ssd_vd_volid); - no_chunk = -1; - goto bad; - } - - if (m->ssd_chunk_id > m->ssd_chunk_no) { - printf("%s: %s chunk id out of range %d, expected " - "lower than %d\n", DEVNAME(sc), - ch_entry->src_devname, - m->ssd_chunk_id, m->ssd_chunk_no); - no_chunk = -1; - goto bad; - } - - if (sd->sd_vol.sv_chunks[m->ssd_chunk_id]) { - printf("%s: %s chunk id %d already in use\n", - DEVNAME(sc), ch_entry->src_devname, - m->ssd_chunk_id); - no_chunk = -1; - goto bad; - } - - sd->sd_vol.sv_chunks[m->ssd_chunk_id] = ch_entry; - bcopy(mc + m->ssd_chunk_id, &ch_entry->src_meta, - sizeof(ch_entry->src_meta)); - - if (ondisk == 0) { - ondisk = m->ssd_ondisk; - cid = m->ssd_chunk_id; - } - - if (m->ssd_ondisk != ondisk) { - printf("%s: %s chunk id %d contains stale metadata\n", - DEVNAME(sc), ch_entry->src_devname, - m->ssd_ondisk < ondisk ? m->ssd_chunk_id : cid); - no_chunk = -1; - goto bad; - } - - /* XXX fix this check, sd_type isnt filled in yet */ - if (mv->svm_level == 'C') { - mo = (struct sr_opt_meta *)(mc + mv->svm_no_chunk); - if (m->ssd_chunk_id > 1) { - no_chunk = -1; - goto bad; - } - bcopy(&mo->som_meta, - &sd->mds.mdd_crypto.scr_meta, - sizeof(sd->mds.mdd_crypto.scr_meta) - ); - } - } - - if (no_chunk != m->ssd_chunk_no) { - DNPRINTF(SR_D_META, "%s: not enough chunks supplied\n", - DEVNAME(sc)); - no_chunk = -1; - goto bad; - } - - DNPRINTF(SR_D_META, "%s: sr_read_meta: found %d elements\n", - DEVNAME(sc), no_chunk); - - sr_print_metadata(m); - -bad: - /* return nr of chunks that contain metadata */ - free(m, M_DEVBUF); - return (no_chunk); -} - -int -sr_create_chunk_meta(struct sr_softc *sc, struct sr_chunk_head *cl) -{ - struct sr_chunk *ch_entry; - struct sr_uuid uuid; - int rv = 1, cid = 0; - char *name; - u_int64_t max_chunk_sz = 0, min_chunk_sz; - - DNPRINTF(SR_D_IOCTL, "%s: sr_create_chunk_meta\n", DEVNAME(sc)); - - sr_get_uuid(&uuid); - - /* fill out stuff and get largest chunk size while looping */ - SLIST_FOREACH(ch_entry, cl, src_link) { - name = ch_entry->src_devname; - ch_entry->src_meta.scm_size = ch_entry->src_size; - ch_entry->src_meta.scm_chunk_id = cid++; - ch_entry->src_meta.scm_status = BIOC_SDONLINE; - strlcpy(ch_entry->src_meta.scm_devname, name, - sizeof(ch_entry->src_meta.scm_devname)); - bcopy(&uuid, &ch_entry->src_meta.scm_uuid, - sizeof(ch_entry->src_meta.scm_uuid)); + int i; - if (ch_entry->src_meta.scm_size > max_chunk_sz) - max_chunk_sz = ch_entry->src_meta.scm_size; - } + DNPRINTF(SR_D_IOCTL, "%s: sr_ioctl_deleteraid %s\n", DEVNAME(sc), + dr->bd_dev); - /* get smallest chunk size */ - min_chunk_sz = max_chunk_sz; - SLIST_FOREACH(ch_entry, cl, src_link) - if (ch_entry->src_meta.scm_size < min_chunk_sz) - min_chunk_sz = ch_entry->src_meta.scm_size; + for (i = 0; i < SR_MAXSCSIBUS; i++) + if (sc->sc_dis[i]) { + if (!strncmp(sc->sc_dis[i]->sd_meta->ssd_devname, dr->bd_dev, + sizeof(sc->sc_dis[i]->sd_meta->ssd_devname))) { + sd = sc->sc_dis[i]; + break; + } + } - /* equalize all sizes */ - SLIST_FOREACH(ch_entry, cl, src_link) - ch_entry->src_meta.scm_coerced_size = min_chunk_sz; + if (sd == NULL) + goto bad; - /* whine if chunks are not the same size */ - if (min_chunk_sz != max_chunk_sz) - printf("%s: chunk sizes are not equal; up to %llu blocks " - "wasted per chunk\n", - DEVNAME(sc), max_chunk_sz - min_chunk_sz); + sd->sd_deleted = 1; + sd->sd_meta->ssdi.ssd_flags = BIOC_SCNOAUTOASSEMBLE; + sr_shutdown(sd); rv = 0; - +bad: return (rv); } void -sr_unwind_chunks(struct sr_softc *sc, struct sr_chunk_head *cl) +sr_chunks_unwind(struct sr_softc *sc, struct sr_chunk_head *cl) { struct sr_chunk *ch_entry, *ch_next; dev_t dev; - DNPRINTF(SR_D_IOCTL, "%s: sr_unwind_chunks\n", DEVNAME(sc)); + DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind\n", DEVNAME(sc)); if (!cl) return; @@ -1424,7 +2129,8 @@ sr_unwind_chunks(struct sr_softc *sc, struct sr_chunk_head *cl) ch_next = SLIST_NEXT(ch_entry, src_link); dev = ch_entry->src_dev_mm; - + DNPRINTF(SR_D_IOCTL, "%s: sr_chunks_unwind closing: %s\n", + DEVNAME(sc), ch_entry->src_devname); if (dev != NODEV) bdevsw_lookup(dev)->d_close(dev, FWRITE, S_IFBLK, curproc); @@ -1435,7 +2141,7 @@ sr_unwind_chunks(struct sr_softc *sc, struct sr_chunk_head *cl) } void -sr_free_discipline(struct sr_discipline *sd) +sr_discipline_free(struct sr_discipline *sd) { struct sr_softc *sc = sd->sd_sc; int i; @@ -1443,24 +2149,29 @@ sr_free_discipline(struct sr_discipline *sd) if (!sd) return; - DNPRINTF(SR_D_DIS, "%s: sr_free_discipline %s\n", - DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname); - + DNPRINTF(SR_D_DIS, "%s: sr_discipline_free %s\n", + DEVNAME(sc), + sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); if (sd->sd_free_resources) sd->sd_free_resources(sd); if (sd->sd_vol.sv_chunks) free(sd->sd_vol.sv_chunks, M_DEVBUF); - free(sd, M_DEVBUF); + if (sd->sd_meta) + free(sd->sd_meta, M_DEVBUF); + if (sd->sd_meta_foreign) + free(sd->sd_meta_foreign, M_DEVBUF); for (i = 0; i < SR_MAXSCSIBUS; i++) if (sc->sc_dis[i] == sd) { sc->sc_dis[i] = NULL; break; } + + free(sd, M_DEVBUF); } void -sr_shutdown_discipline(struct sr_discipline *sd) +sr_discipline_shutdown(struct sr_discipline *sd) { struct sr_softc *sc = sd->sd_sc; int s; @@ -1468,8 +2179,8 @@ sr_shutdown_discipline(struct sr_discipline *sd) if (!sd || !sc) return; - DNPRINTF(SR_D_DIS, "%s: sr_shutdown_discipline %s\n", - DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname); + DNPRINTF(SR_D_DIS, "%s: sr_discipline_shutdown %s\n", DEVNAME(sc), + sd->sd_meta ? sd->sd_meta->ssd_devname : "nodev"); s = splbio(); @@ -1484,16 +2195,16 @@ sr_shutdown_discipline(struct sr_discipline *sd) break; #ifndef SMALL_KERNEL - sr_delete_sensors(sd); + sr_sensors_delete(sd); #endif /* SMALL_KERNEL */ if (sd->sd_scsibus_dev) config_detach(sd->sd_scsibus_dev, DETACH_FORCE); - sr_unwind_chunks(sc, &sd->sd_vol.sv_chunk_list); + sr_chunks_unwind(sc, &sd->sd_vol.sv_chunk_list); if (sd) - sr_free_discipline(sd); + sr_discipline_free(sd); splx(s); } @@ -1513,11 +2224,11 @@ sr_raid_inquiry(struct sr_workunit *wu) inq.version = 2; inq.response_format = 2; inq.additional_length = 32; - strlcpy(inq.vendor, sd->sd_vol.sv_meta.svm_vendor, + strlcpy(inq.vendor, sd->sd_meta->ssdi.ssd_vendor, sizeof(inq.vendor)); - strlcpy(inq.product, sd->sd_vol.sv_meta.svm_product, + strlcpy(inq.product, sd->sd_meta->ssdi.ssd_product, sizeof(inq.product)); - strlcpy(inq.revision, sd->sd_vol.sv_meta.svm_revision, + strlcpy(inq.revision, sd->sd_meta->ssdi.ssd_revision, sizeof(inq.revision)); sr_copy_internal_data(xs, &inq, sizeof(inq)); @@ -1537,16 +2248,16 @@ sr_raid_read_cap(struct sr_workunit *wu) if (xs->cmd->opcode == READ_CAPACITY) { bzero(&rcd, sizeof(rcd)); - if (sd->sd_vol.sv_meta.svm_size > 0xffffffffllu) + if (sd->sd_meta->ssdi.ssd_size > 0xffffffffllu) _lto4b(0xffffffff, rcd.addr); else - _lto4b(sd->sd_vol.sv_meta.svm_size, rcd.addr); + _lto4b(sd->sd_meta->ssdi.ssd_size, rcd.addr); _lto4b(512, rcd.length); sr_copy_internal_data(xs, &rcd, sizeof(rcd)); rv = 0; } else if (xs->cmd->opcode == READ_CAPACITY_16) { bzero(&rcd16, sizeof(rcd16)); - _lto8b(sd->sd_vol.sv_meta.svm_size, rcd16.addr); + _lto8b(sd->sd_meta->ssdi.ssd_size, rcd16.addr); _lto4b(512, rcd16.length); sr_copy_internal_data(xs, &rcd16, sizeof(rcd16)); rv = 0; @@ -1562,737 +2273,187 @@ sr_raid_tur(struct sr_workunit *wu) DNPRINTF(SR_D_DIS, "%s: sr_raid_tur\n", DEVNAME(sd->sd_sc)); - if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) { + if (sd->sd_vol_status == BIOC_SVOFFLINE) { sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; sd->sd_scsi_sense.flags = SKEY_NOT_READY; sd->sd_scsi_sense.add_sense_code = 0x04; sd->sd_scsi_sense.add_sense_code_qual = 0x11; sd->sd_scsi_sense.extra_len = 4; return (1); - } else if (sd->sd_vol.sv_meta.svm_status == BIOC_SVINVALID) { + } else if (sd->sd_vol_status == BIOC_SVINVALID) { sd->sd_scsi_sense.error_code = SSD_ERRCODE_CURRENT; sd->sd_scsi_sense.flags = SKEY_HARDWARE_ERROR; sd->sd_scsi_sense.add_sense_code = 0x05; sd->sd_scsi_sense.add_sense_code_qual = 0x00; sd->sd_scsi_sense.extra_len = 4; - return (1); - } - - return (0); -} - -int -sr_raid_request_sense(struct sr_workunit *wu) -{ - struct sr_discipline *sd = wu->swu_dis; - struct scsi_xfer *xs = wu->swu_xs; - - DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", - DEVNAME(sd->sd_sc)); - - /* use latest sense data */ - bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); - - /* clear sense data */ - bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); - - return (0); -} - -int -sr_raid_start_stop(struct sr_workunit *wu) -{ - struct sr_discipline *sd = wu->swu_dis; - struct scsi_xfer *xs = wu->swu_xs; - struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; - int rv = 1; - - DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", - DEVNAME(sd->sd_sc)); - - if (!ss) - return (rv); - - if (ss->byte2 == 0x00) { - /* START */ - if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) { - /* bring volume online */ - /* XXX check to see if volume can be brought online */ - sd->sd_vol.sv_meta.svm_status = BIOC_SVONLINE; - } - rv = 0; - } else /* XXX is this the check? if (byte == 0x01) */ { - /* STOP */ - if (sd->sd_vol.sv_meta.svm_status == BIOC_SVONLINE) { - /* bring volume offline */ - sd->sd_vol.sv_meta.svm_status = BIOC_SVOFFLINE; - } - rv = 0; - } - - return (rv); -} - -int -sr_raid_sync(struct sr_workunit *wu) -{ - struct sr_discipline *sd = wu->swu_dis; - int s, rv = 0, ios; - - DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); - - /* when doing a fake sync don't coun't the wu */ - ios = wu->swu_fake ? 0 : 1; - - s = splbio(); - sd->sd_sync = 1; - - while (sd->sd_wu_pending > ios) - if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { - DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", - DEVNAME(sd->sd_sc)); - rv = 1; - break; - } - - sd->sd_sync = 0; - splx(s); - - wakeup(&sd->sd_sync); - - return (rv); -} - -void -sr_raid_startwu(struct sr_workunit *wu) -{ - struct sr_discipline *sd = wu->swu_dis; - struct sr_ccb *ccb; - - splassert(IPL_BIO); - - if (wu->swu_state == SR_WU_RESTART) - /* - * no need to put the wu on the pending queue since we - * are restarting the io - */ - ; - else - /* move wu to pending queue */ - TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); - - /* start all individual ios */ - TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { - bdevsw_lookup(ccb->ccb_buf.b_dev)->d_strategy(&ccb->ccb_buf); - } -} - -u_int32_t -sr_checksum(char *s, u_int32_t *p, u_int32_t size) -{ - u_int32_t chk = 0; - int i; - - DNPRINTF(SR_D_MISC, "%s: sr_checksum %p %d\n", s, p, size); - - if (size % sizeof(u_int32_t)) - return (0); /* 0 is failure */ - - for (i = 0; i < size / sizeof(u_int32_t); i++) - chk ^= p[i]; - - return (chk); -} - -void -sr_get_uuid(struct sr_uuid *uuid) -{ - arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); -} - -void -sr_print_uuid(struct sr_uuid *uuid, int cr) -{ - int i; - - for (i = 0; i < SR_UUID_MAX; i++) - printf("%x%s", uuid->sui_id[i], - i < SR_UUID_MAX - 1 ? ":" : ""); - - if (cr) - printf("\n"); -} - -int -sr_clear_metadata(struct sr_discipline *sd) -{ - struct sr_softc *sc = sd->sd_sc; - struct sr_chunk_head *cl = &sd->sd_vol.sv_chunk_list; - struct sr_chunk *ch_entry; - struct buf b; - size_t sz = SR_META_SIZE * 512; - void *m; - int rv = 0; - - DNPRINTF(SR_D_META, "%s: sr_clear_metadata\n", DEVNAME(sc)); - - m = malloc(sz , M_DEVBUF, M_WAITOK | M_ZERO); - - SLIST_FOREACH(ch_entry, cl, src_link) { - bzero(&b, sizeof(b)); - - b.b_flags = B_WRITE; - b.b_blkno = SR_META_OFFSET; - b.b_bcount = sz; - b.b_bufsize = sz; - b.b_resid = sz; - b.b_data = (void *)m; - b.b_error = 0; - b.b_proc = curproc; - b.b_dev = ch_entry->src_dev_mm; - b.b_vp = NULL; - b.b_iodone = NULL; - LIST_INIT(&b.b_dep); - bdevsw_lookup(b.b_dev)->d_strategy(&b); - biowait(&b); - - if (b.b_flags & B_ERROR) { - printf("%s: %s i/o error on block %lld while clearing " - "metadata %d\n", DEVNAME(sc), - ch_entry->src_devname, b.b_blkno, b.b_error); - rv++; - continue; - } - } - - free(m, M_DEVBUF); - return (rv); -} - -int -sr_already_assembled(struct sr_discipline *sd) -{ - struct sr_softc *sc = sd->sd_sc; - int i; - - for (i = 0; i < SR_MAXSCSIBUS; i++) - if (sc->sc_dis[i]) - if (!bcmp(&sd->sd_meta->ssd_uuid, - &sc->sc_dis[i]->sd_meta->ssd_uuid, - sizeof(sd->sd_meta->ssd_uuid))) - return (1); - - return (0); -} - -void -sr_save_metadata_callback(void *arg1, void *arg2) -{ - struct sr_discipline *sd = arg1; - int s; - - s = splbio(); - - if (sr_save_metadata(arg1, SR_VOL_DIRTY)) - printf("%s: save metadata failed\n", - DEVNAME(sd->sd_sc)); - - sd->sd_must_flush = 0; - splx(s); -} - -int -sr_save_metadata(struct sr_discipline *sd, u_int32_t flags) -{ - struct sr_softc *sc = sd->sd_sc; - struct sr_metadata *sm = sd->sd_meta; - struct sr_vol_meta *sv = &sd->sd_vol.sv_meta, *im_sv; - struct sr_chunk_meta *im_sc; - struct sr_opt_meta *im_so; - struct sr_chunk *src; - struct buf b; - struct sr_workunit wu; - int i, rv = 1, ch = 0, no_chunk, sz_opt; - size_t sz = SR_META_SIZE * 512; - - DNPRINTF(SR_D_META, "%s: sr_save_metadata %s\n", - DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname); - - if (!sm) { - printf("%s: no in memory copy of metadata\n", DEVNAME(sc)); - goto bad; - } - - im_sv = (struct sr_vol_meta *)(sm + 1); - im_sc = (struct sr_chunk_meta *)(im_sv + 1); - no_chunk = sd->sd_vol.sv_meta.svm_no_chunk; - im_so = (struct sr_opt_meta *)(im_sc + no_chunk); - - /* XXX this is a temporary hack until meta is properly redone */ - if (sd->sd_type == SR_MD_CRYPTO) - sz_opt = sizeof(struct sr_opt_meta); - else - sz_opt = 0; - - if (sizeof(struct sr_metadata) + sizeof(struct sr_vol_meta) + - (sizeof(struct sr_chunk_meta) * no_chunk) + - sz_opt > sz) { - printf("%s: too much metadata; metadata NOT written\n", - DEVNAME(sc)); - goto bad; - } - - if (sm->ssd_magic == 0) { - /* initial metadata */ - sm->ssd_magic = SR_MAGIC; - sm->ssd_version = SR_META_VERSION; - sm->ssd_size = sizeof(struct sr_metadata); - sm->ssd_ondisk = 0; - sm->ssd_flags = sd->sd_meta_flags; - /* get uuid from chunk 0 */ - bcopy(&sd->sd_vol.sv_chunks[0]->src_meta.scm_uuid, - &sm->ssd_uuid, - sizeof(struct sr_uuid)); - - /* volume */ - bcopy(sv, im_sv, sizeof(struct sr_vol_meta)); - bcopy(&sm->ssd_uuid, &im_sv->svm_uuid, - sizeof(im_sv->svm_uuid)); - sm->ssd_vd_ver = SR_VOL_VERSION; - sm->ssd_vd_size = sizeof(struct sr_vol_meta); - - /* chunk */ - for (i = 0; i < no_chunk; i++) - bcopy(sd->sd_vol.sv_chunks[i], &im_sc[i], - sizeof(struct sr_chunk_meta)); - - sm->ssd_chunk_ver = SR_CHUNK_VERSION; - sm->ssd_chunk_size = sizeof(struct sr_chunk_meta); - sm->ssd_chunk_no = no_chunk; - - /* optional */ - sm->ssd_opt_ver = SR_OPT_VERSION; - if (sd->sd_type == SR_MD_CRYPTO) { - bzero(im_so, sizeof(*im_so)); - sm->ssd_opt_size = sizeof(struct sr_opt_meta); - sm->ssd_opt_no = 1; - } else { - sm->ssd_opt_size = 0; - sm->ssd_opt_no = 0; - } - } - - /* from here on out metadata is updated */ - sm->ssd_ondisk++; - im_sv->svm_flags |= flags; - sm->ssd_vd_chk = sr_checksum(DEVNAME(sc), - (u_int32_t *)im_sv, sm->ssd_vd_size); - - sm->ssd_chunk_chk = 0; - for (ch = 0; ch < sm->ssd_chunk_no; ch++) - sm->ssd_chunk_chk ^= sr_checksum(DEVNAME(sc), - (u_int32_t *)&im_sc[ch], sm->ssd_chunk_size); - - /* XXX do checksum on optional meta too */ - - sr_print_metadata(sm); - - for (i = 0; i < sm->ssd_chunk_no; i++) { - memset(&b, 0, sizeof(b)); - - src = sd->sd_vol.sv_chunks[i]; - - /* skip disks that are offline */ - if (src->src_meta.scm_status == BIOC_SDOFFLINE) - continue; - - /* copy encrypted key / passphrase into optinal metadata area */ - if (sd->sd_type == SR_MD_CRYPTO && i < 2) { - im_so->som_type = SR_OPT_CRYPTO; - bcopy(&sd->mds.mdd_crypto.scr_meta, - &im_so->som_meta.smm_crypto, - sizeof(im_so->som_meta.smm_crypto)); - } - - /* calculate metdata checksum and ids */ - sm->ssd_vd_volid = im_sv->svm_volid; - sm->ssd_chunk_id = i; - sm->ssd_checksum = sr_checksum(DEVNAME(sc), - (u_int32_t *)sm, sm->ssd_size); - - DNPRINTF(SR_D_META, "%s: sr_save_metadata %s: volid: %d " - "chunkid: %d checksum: 0x%x\n", - DEVNAME(sc), src->src_meta.scm_devname, - sm->ssd_vd_volid, sm->ssd_chunk_id, - sm->ssd_checksum); - - b.b_flags = B_WRITE; - b.b_blkno = SR_META_OFFSET; - b.b_bcount = sz; - b.b_bufsize = sz; - b.b_resid = sz; - b.b_data = (void *)sm; - b.b_error = 0; - b.b_proc = curproc; - b.b_dev = src->src_dev_mm; - b.b_vp = NULL; - b.b_iodone = NULL; - LIST_INIT(&b.b_dep); - bdevsw_lookup(b.b_dev)->d_strategy(&b); - - biowait(&b); - - /* make sure in memory copy is clean */ - if (sd->sd_type == SR_MD_CRYPTO) - bzero(im_so, sizeof(*im_so)); - sm->ssd_vd_volid = 0; - sm->ssd_chunk_id = 0; - sm->ssd_checksum = 0; - - /* XXX do something smart here */ - /* mark chunk offline and restart metadata write */ - if (b.b_flags & B_ERROR) { - printf("%s: %s i/o error on block %lld while writing " - "metadata %d\n", DEVNAME(sc), - src->src_meta.scm_devname, b.b_blkno, b.b_error); - goto bad; - } - - DNPRINTF(SR_D_META, "%s: sr_save_metadata written to %s\n", - DEVNAME(sc), src->src_meta.scm_devname); + return (1); } - bzero(&wu, sizeof(wu)); - wu.swu_fake = 1; - wu.swu_dis = sd; - sd->sd_scsi_sync(&wu); - - rv = 0; -bad: - return (rv); + return (0); } int -sr_boot_assembly(struct sr_softc *sc) +sr_raid_request_sense(struct sr_workunit *wu) { - struct device *dv; - struct buf *bp; - struct bdevsw *bdsw; - struct disklabel label; - struct sr_metadata *sm; - struct sr_metadata_list_head mlh; - struct sr_metadata_list *mle, *mle2; - struct sr_vol_meta *vm; - struct bioc_createraid bc; - dev_t dev, devr, *dt = NULL; - int error, majdev, i, no_dev, rv = 0; - size_t sz = SR_META_SIZE * 512; - - DNPRINTF(SR_D_META, "%s: sr_boot_assembly\n", DEVNAME(sc)); + struct sr_discipline *sd = wu->swu_dis; + struct scsi_xfer *xs = wu->swu_xs; - SLIST_INIT(&mlh); - bp = geteblk(sz); - if (!bp) - return (ENOMEM); + DNPRINTF(SR_D_DIS, "%s: sr_raid_request_sense\n", + DEVNAME(sd->sd_sc)); - TAILQ_FOREACH(dv, &alldevs, dv_list) { - if (dv->dv_class != DV_DISK) - continue; + /* use latest sense data */ + bcopy(&sd->sd_scsi_sense, &xs->sense, sizeof(xs->sense)); - majdev = findblkmajor(dv); - if (majdev == -1) - continue; + /* clear sense data */ + bzero(&sd->sd_scsi_sense, sizeof(sd->sd_scsi_sense)); - bp->b_dev = dev = MAKEDISKDEV(majdev, dv->dv_unit, RAW_PART); - bdsw = &bdevsw[majdev]; + return (0); +} - /* XXX is there a better way of excluding some devices? */ - if (!strncmp(dv->dv_xname, "fd", 2) || - !strncmp(dv->dv_xname, "cd", 2) || - !strncmp(dv->dv_xname, "rx", 2)) - continue; - /* - * The devices are being opened with S_IFCHR instead of - * S_IFBLK so that the SCSI mid-layer does not whine when - * media is not inserted in certain devices like zip drives - * and such. - */ +int +sr_raid_start_stop(struct sr_workunit *wu) +{ + struct sr_discipline *sd = wu->swu_dis; + struct scsi_xfer *xs = wu->swu_xs; + struct scsi_start_stop *ss = (struct scsi_start_stop *)xs->cmd; + int rv = 1; - /* open device */ - error = (*bdsw->d_open)(dev, FREAD, S_IFCHR, curproc); - if (error) { - DNPRINTF(SR_D_META, "%s: sr_boot_assembly open failed" - "\n", DEVNAME(sc)); - continue; - } + DNPRINTF(SR_D_DIS, "%s: sr_raid_start_stop\n", + DEVNAME(sd->sd_sc)); - /* get disklabel */ - error = (*bdsw->d_ioctl)(dev, DIOCGDINFO, (void *)&label, - FREAD, curproc); - if (error) { - DNPRINTF(SR_D_META, "%s: sr_boot_assembly ioctl " - "failed\n", DEVNAME(sc)); - error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc); - continue; - } + if (!ss) + return (rv); - /* we are done, close device */ - error = (*bdsw->d_close)(dev, FREAD, S_IFCHR, curproc); - if (error) { - DNPRINTF(SR_D_META, "%s: sr_boot_assembly close " - "failed\n", DEVNAME(sc)); - continue; + if (ss->byte2 == 0x00) { + /* START */ + if (sd->sd_vol_status == BIOC_SVOFFLINE) { + /* bring volume online */ + /* XXX check to see if volume can be brought online */ + sd->sd_vol_status = BIOC_SVONLINE; } - - /* are we a softraid partition? */ - for (i = 0; i < MAXPARTITIONS; i++) { - if (label.d_partitions[i].p_fstype != FS_RAID) - continue; - - /* open device */ - bp->b_dev = devr = MAKEDISKDEV(majdev, dv->dv_unit, i); - error = (*bdsw->d_open)(devr, FREAD, S_IFCHR, curproc); - if (error) { - DNPRINTF(SR_D_META, "%s: sr_boot_assembly " - "open failed, partition %d\n", - DEVNAME(sc), i); - continue; - } - /* read metadat */ - bp->b_flags = B_BUSY | B_READ; - bp->b_blkno = SR_META_OFFSET; - bp->b_cylinder = 0; - bp->b_bcount = sz; - bp->b_bufsize = sz; - bp->b_resid = sz; - (*bdsw->d_strategy)(bp); - if ((error = biowait(bp))) { - DNPRINTF(SR_D_META, "%s: sr_boot_assembly " - "strategy failed, partition %d\n", - DEVNAME(sc)); - error = (*bdsw->d_close)(devr, FREAD, S_IFCHR, - curproc); - continue; - } - - sm = (struct sr_metadata *)bp->b_data; - if (!sr_validate_metadata(sc, devr, sm)) { - /* we got one; save it off */ - mle = malloc(sizeof(*mle), M_DEVBUF, - M_WAITOK | M_ZERO); - mle->sml_metadata = malloc(sz, M_DEVBUF, - M_WAITOK | M_ZERO); - bcopy(sm, mle->sml_metadata, sz); - mle->sml_mm = devr; - SLIST_INSERT_HEAD(&mlh, mle, sml_link); - } - - /* we are done, close device */ - error = (*bdsw->d_close)(devr, FREAD, S_IFCHR, - curproc); - if (error) { - DNPRINTF(SR_D_META, "%s: sr_boot_assembly " - "close failed\n", DEVNAME(sc)); - continue; - } + rv = 0; + } else /* XXX is this the check? if (byte == 0x01) */ { + /* STOP */ + if (sd->sd_vol_status == BIOC_SVONLINE) { + /* bring volume offline */ + sd->sd_vol_status = BIOC_SVOFFLINE; } + rv = 0; } - /* - * XXX poor mans hack that doesn't keep disks in order and does not - * roam disks correctly. replace this with something smarter that - * orders disks by volid, chunkid and uuid. - */ - dt = malloc(BIOC_CRMAXLEN, M_DEVBUF, M_WAITOK); - SLIST_FOREACH(mle, &mlh, sml_link) { - /* chunk used already? */ - if (mle->sml_used) - continue; - - no_dev = 0; - bzero(dt, BIOC_CRMAXLEN); - SLIST_FOREACH(mle2, &mlh, sml_link) { - /* chunk used already? */ - if (mle2->sml_used) - continue; + return (rv); +} - /* are we the same volume? */ - if (mle->sml_metadata->ssd_vd_volid != - mle2->sml_metadata->ssd_vd_volid) - continue; +int +sr_raid_sync(struct sr_workunit *wu) +{ + struct sr_discipline *sd = wu->swu_dis; + int s, rv = 0, ios; - /* same uuid? */ - if (bcmp(&mle->sml_metadata->ssd_uuid, - &mle2->sml_metadata->ssd_uuid, - sizeof(mle->sml_metadata->ssd_uuid))) - continue; + DNPRINTF(SR_D_DIS, "%s: sr_raid_sync\n", DEVNAME(sd->sd_sc)); - /* sanity */ - if (dt[mle2->sml_metadata->ssd_chunk_id]) { - printf("%s: chunk id already in use; can not " - "assemble volume\n", DEVNAME(sc)); - goto unwind; - } - dt[mle2->sml_metadata->ssd_chunk_id] = mle2->sml_mm; - no_dev++; - mle2->sml_used = 1; - } - if (mle->sml_metadata->ssd_chunk_no != no_dev) { - printf("%s: not assembling partial disk that used to " - "be volume %d\n", DEVNAME(sc), - mle->sml_metadata->ssd_vd_volid); - continue; - } + /* when doing a fake sync don't coun't the wu */ + ios = wu->swu_fake ? 0 : 1; - bzero(&bc, sizeof(bc)); - vm = (struct sr_vol_meta *)(mle->sml_metadata + 1); - bc.bc_level = vm->svm_level; - bc.bc_dev_list_len = no_dev * sizeof(dev_t); - bc.bc_dev_list = dt; - bc.bc_flags = BIOC_SCDEVT; - sr_ioctl_createraid(sc, &bc, 0); - rv++; - } + s = splbio(); + sd->sd_sync = 1; -unwind: - if (dt) - free(dt, M_DEVBUF); + while (sd->sd_wu_pending > ios) + if (tsleep(sd, PRIBIO, "sr_sync", 15 * hz) == EWOULDBLOCK) { + DNPRINTF(SR_D_DIS, "%s: sr_raid_sync timeout\n", + DEVNAME(sd->sd_sc)); + rv = 1; + break; + } - for (mle = SLIST_FIRST(&mlh); mle != SLIST_END(&mlh); mle = mle2) { - mle2 = SLIST_NEXT(mle, sml_link); + sd->sd_sync = 0; + splx(s); - free(mle->sml_metadata, M_DEVBUF); - free(mle, M_DEVBUF); - } - SLIST_INIT(&mlh); + wakeup(&sd->sd_sync); return (rv); } -int -sr_validate_metadata(struct sr_softc *sc, dev_t dev, struct sr_metadata *sm) +void +sr_raid_startwu(struct sr_workunit *wu) { - struct sr_vol_meta *mv; - struct sr_chunk_meta *mc; - char *name, devname[32]; - int maj, part, unit; - u_int32_t chk; + struct sr_discipline *sd = wu->swu_dis; + struct sr_ccb *ccb; - DNPRINTF(SR_D_META, "%s: sr_validate_metadata(0x%x)\n", - DEVNAME(sc), dev); + splassert(IPL_BIO); - bzero(devname, sizeof(devname)); + if (wu->swu_state == SR_WU_RESTART) + /* + * no need to put the wu on the pending queue since we + * are restarting the io + */ + ; + else + /* move wu to pending queue */ + TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link); - if (sm->ssd_magic != SR_MAGIC) - goto bad; + /* start all individual ios */ + TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link) { + bdevsw_lookup(ccb->ccb_buf.b_dev)->d_strategy(&ccb->ccb_buf); + } +} - maj = major(dev); - part = DISKPART(dev); - unit = DISKUNIT(dev); +void +sr_checksum_print(u_int8_t *md5) +{ + int i; - name = findblkname(maj); - if (name == NULL) - goto bad; + for (i = 0; i < MD5_DIGEST_LENGTH; i++) + printf("%02x", md5[i]); +} - snprintf(devname, sizeof(devname), - "%s%d%c", name, unit, part + 'a'); - name = devname; +void +sr_checksum(struct sr_softc *sc, void *src, void *md5, u_int32_t len) +{ + MD5_CTX ctx; - /* validate metadata */ - if (sm->ssd_version != SR_META_VERSION) { - printf("%s: %s can not read metadata version %d, " - "expected %d\n", DEVNAME(sc), - devname, sm->ssd_version, - SR_META_VERSION); - goto bad; - } - if (sm->ssd_size != sizeof(struct sr_metadata)) { - printf("%s: %s invalid metadata size %d, " - "expected %d\n", DEVNAME(sc), - devname, sm->ssd_size, - sizeof(struct sr_metadata)); - goto bad; - } - chk = sr_checksum(DEVNAME(sc), (u_int32_t *)sm, sm->ssd_size); - /* - * since the checksum value is part of the checksum a good - * result equals 0 - */ - if (chk != 0) { - printf("%s: %s invalid metadata checksum 0x%x, " - "expected 0x%x\n", DEVNAME(sc), - devname, sm->ssd_checksum, chk); - goto bad; - } + DNPRINTF(SR_D_MISC, "%s: sr_checksum(%p %p %d)\n", DEVNAME(sc), src, + md5, len); - /* validate volume metadata */ - if (sm->ssd_vd_ver != SR_VOL_VERSION) { - printf("%s: %s can not read volume metadata version " - "%d, expected %d\n", DEVNAME(sc), - devname, sm->ssd_vd_ver, - SR_VOL_VERSION); - goto bad; - } - if (sm->ssd_vd_size != sizeof(struct sr_vol_meta)) { - printf("%s: %s invalid volume metadata size %d, " - "expected %d\n", DEVNAME(sc), - devname, sm->ssd_vd_size, - sizeof(struct sr_vol_meta)); - goto bad; - } - mv = (struct sr_vol_meta *)(sm + 1); - chk = sr_checksum(DEVNAME(sc), (u_int32_t *)mv, sm->ssd_vd_size); - if (chk != sm->ssd_vd_chk) { - printf("%s: %s invalid volume metadata checksum 0x%x, " - "expected 0x%x\n", DEVNAME(sc), - devname, sm->ssd_vd_chk, chk); - goto bad; - } + MD5Init(&ctx); + MD5Update(&ctx, src, len); + MD5Final(md5, &ctx); +} - /* validate chunk metadata */ - if (sm->ssd_chunk_ver != SR_CHUNK_VERSION) { - printf("%s: %s can not read chunk metadata version " - "%d, expected %d\n", DEVNAME(sc), - devname, sm->ssd_chunk_ver, - SR_CHUNK_VERSION); - goto bad; - } - if (sm->ssd_chunk_size != sizeof(struct sr_chunk_meta)) { - printf("%s: %s invalid chunk metadata size %d, " - "expected %d\n", DEVNAME(sc), - devname, sm->ssd_chunk_size, - sizeof(struct sr_chunk_meta)); - goto bad; - } +void +sr_uuid_get(struct sr_uuid *uuid) +{ + /* XXX replace with idgen32 */ + arc4random_buf(uuid->sui_id, sizeof(uuid->sui_id)); +} - mc = (struct sr_chunk_meta *)(mv + 1); - /* checksum is calculated over ALL chunks */ - chk = sr_checksum(DEVNAME(sc), (u_int32_t *)(mc), - sm->ssd_chunk_size * sm->ssd_chunk_no); +void +sr_uuid_print(struct sr_uuid *uuid, int cr) +{ + int i; - if (chk != sm->ssd_chunk_chk) { - printf("%s: %s invalid chunk metadata checksum 0x%x, " - "expected 0x%x\n", DEVNAME(sc), - devname, sm->ssd_chunk_chk, chk); - goto bad; - } + for (i = 0; i < SR_UUID_MAX; i++) + printf("%x%s", uuid->sui_id[i], + i < SR_UUID_MAX - 1 ? ":" : ""); - /* warn if disk changed order */ - if (strncmp(mc[sm->ssd_chunk_id].scm_devname, name, - sizeof(mc[sm->ssd_chunk_id].scm_devname))) - printf("%s: roaming device %s -> %s\n", DEVNAME(sc), - mc[sm->ssd_chunk_id].scm_devname, name); + if (cr) + printf("\n"); +} - /* we have meta data on disk */ - DNPRINTF(SR_D_META, "%s: sr_validate_metadata valid metadata %s\n", - DEVNAME(sc), devname); +int +sr_already_assembled(struct sr_discipline *sd) +{ + struct sr_softc *sc = sd->sd_sc; + int i; - return (0); -bad: - DNPRINTF(SR_D_META, "%s: sr_validate_metadata invalid metadata %s\n", - DEVNAME(sc), devname); + for (i = 0; i < SR_MAXSCSIBUS; i++) + if (sc->sc_dis[i]) + if (!bcmp(&sd->sd_meta->ssdi.ssd_uuid, + &sc->sc_dis[i]->sd_meta->ssdi.ssd_uuid, + sizeof(sd->sd_meta->ssdi.ssd_uuid))) + return (1); - return (1); + return (0); } int32_t @@ -2324,11 +2485,11 @@ sr_shutdown(void *arg) struct sr_softc *sc = sd->sd_sc; #endif DNPRINTF(SR_D_DIS, "%s: sr_shutdown %s\n", - DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname); + DEVNAME(sc), sd->sd_meta->ssd_devname); - sr_save_metadata(sd, 0); + sr_meta_save(sd, 0); - sr_shutdown_discipline(sd); + sr_discipline_shutdown(sd); } int @@ -2341,7 +2502,7 @@ sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) DNPRINTF(SR_D_DIS, "%s: %s 0x%02x\n", DEVNAME(sd->sd_sc), func, xs->cmd->opcode); - if (sd->sd_vol.sv_meta.svm_status == BIOC_SVOFFLINE) { + if (sd->sd_vol_status == BIOC_SVOFFLINE) { DNPRINTF(SR_D_DIS, "%s: %s device offline\n", DEVNAME(sd->sd_sc)); goto bad; @@ -2349,7 +2510,7 @@ sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) if (xs->datalen == 0) { printf("%s: %s: illegal block count\n", - DEVNAME(sd->sd_sc), func, sd->sd_vol.sv_meta.svm_devname); + DEVNAME(sd->sd_sc), func, sd->sd_meta->ssd_devname); goto bad; } @@ -2361,14 +2522,14 @@ sr_validate_io(struct sr_workunit *wu, daddr64_t *blk, char *func) *blk = _3btol(((struct scsi_rw *)xs->cmd)->addr); else { printf("%s: %s: illegal cmdlen\n", DEVNAME(sd->sd_sc), func, - sd->sd_vol.sv_meta.svm_devname); + sd->sd_meta->ssd_devname); goto bad; } wu->swu_blk_start = *blk; wu->swu_blk_end = *blk + (xs->datalen >> DEV_BSHIFT) - 1; - if (wu->swu_blk_end > sd->sd_vol.sv_meta.svm_size) { + if (wu->swu_blk_end > sd->sd_meta->ssdi.ssd_size) { DNPRINTF(SR_D_DIS, "%s: %s out of bounds start: %lld " "end: %lld length: %d\n", DEVNAME(sd->sd_sc), func, wu->swu_blk_start, @@ -2422,26 +2583,26 @@ queued: #ifndef SMALL_KERNEL int -sr_create_sensors(struct sr_discipline *sd) +sr_sensors_create(struct sr_discipline *sd) { struct sr_softc *sc = sd->sd_sc; int rv = 1; - DNPRINTF(SR_D_STATE, "%s: %s: sr_create_sensors\n", - DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname); + DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_create\n", + DEVNAME(sc), sd->sd_meta->ssd_devname); strlcpy(sd->sd_vol.sv_sensordev.xname, DEVNAME(sc), sizeof(sd->sd_vol.sv_sensordev.xname)); sd->sd_vol.sv_sensor.type = SENSOR_DRIVE; sd->sd_vol.sv_sensor.status = SENSOR_S_UNKNOWN; - strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_vol.sv_meta.svm_devname, + strlcpy(sd->sd_vol.sv_sensor.desc, sd->sd_meta->ssd_devname, sizeof(sd->sd_vol.sv_sensor.desc)); sensor_attach(&sd->sd_vol.sv_sensordev, &sd->sd_vol.sv_sensor); if (sc->sc_sensors_running == 0) { - if (sensor_task_register(sc, sr_refresh_sensors, 10) == NULL) + if (sensor_task_register(sc, sr_sensors_refresh, 10) == NULL) goto bad; sc->sc_sensors_running = 1; } @@ -2453,35 +2614,37 @@ bad: } void -sr_delete_sensors(struct sr_discipline *sd) +sr_sensors_delete(struct sr_discipline *sd) { #ifdef SR_DEBUG struct sr_softc *sc = sd->sd_sc; #endif - DNPRINTF(SR_D_STATE, "%s: %s: sr_delete_sensors\n", - DEVNAME(sc), sd->sd_vol.sv_meta.svm_devname); + DNPRINTF(SR_D_STATE, "%s: %s: sr_sensors_delete\n", + DEVNAME(sc), sd->sd_meta->ssd_devname); if (sd->sd_vol.sv_sensor_valid) sensordev_deinstall(&sd->sd_vol.sv_sensordev); } void -sr_refresh_sensors(void *arg) +sr_sensors_refresh(void *arg) { struct sr_softc *sc = arg; - int i, vol; struct sr_volume *sv; + struct sr_discipline *sd; + int i, vol; - DNPRINTF(SR_D_STATE, "%s: sr_refresh_sensors\n", DEVNAME(sc)); + DNPRINTF(SR_D_STATE, "%s: sr_sensors_refresh\n", DEVNAME(sc)); for (i = 0, vol = -1; i < SR_MAXSCSIBUS; i++) { /* XXX this will not work when we stagger disciplines */ if (!sc->sc_dis[i]) continue; - sv = &sc->sc_dis[i]->sd_vol; + sd = sc->sc_dis[i]; + sv = &sd->sd_vol; - switch(sv->sv_meta.svm_status) { + switch(sd->sd_vol_status) { case BIOC_SVOFFLINE: sv->sv_sensor.value = SENSOR_DRIVE_FAIL; sv->sv_sensor.status = SENSOR_S_CRIT; @@ -2535,7 +2698,7 @@ sr_print_stats(void) sd = sc->sc_dis[i]; printf("%s: ios pending: %d collisions %llu\n", - sd->sd_vol.sv_meta.svm_devname, + sd->sd_meta->ssd_devname, sd->sd_wu_pending, sd->sd_wu_collisions); } @@ -2544,73 +2707,58 @@ sr_print_stats(void) #ifdef SR_DEBUG void -sr_print_metadata(struct sr_metadata *sm) +sr_meta_print(struct sr_metadata *m) { - struct sr_vol_meta *im_sv; - struct sr_chunk_meta *im_sc; - struct sr_opt_meta *im_so; - int ch; + int i; + struct sr_meta_chunk *mc; + struct sr_meta_opt *mo; if (!(sr_debug & SR_D_META)) return; - im_sv = (struct sr_vol_meta *)(sm + 1); - im_sc = (struct sr_chunk_meta *)(im_sv + 1); - im_so = (struct sr_opt_meta *)(im_sc + im_sv->svm_no_chunk); - - DNPRINTF(SR_D_META, "\tmeta magic 0x%llx\n", sm->ssd_magic); - DNPRINTF(SR_D_META, "\tmeta version %d\n", sm->ssd_version); - DNPRINTF(SR_D_META, "\tmeta checksum 0x%x\n", sm->ssd_checksum); - DNPRINTF(SR_D_META, "\tmeta size %d\n", sm->ssd_size); - DNPRINTF(SR_D_META, "\tmeta on disk version %u\n", sm->ssd_ondisk); - DNPRINTF(SR_D_META, "\tmeta uuid "); - sr_print_uuid(&sm->ssd_uuid, 1); - DNPRINTF(SR_D_META, "\tvd version %d\n", sm->ssd_vd_ver); - DNPRINTF(SR_D_META, "\tvd size %lu\n", sm->ssd_vd_size); - DNPRINTF(SR_D_META, "\tvd id %u\n", sm->ssd_vd_volid); - DNPRINTF(SR_D_META, "\tvd checksum 0x%x\n", sm->ssd_vd_chk); - DNPRINTF(SR_D_META, "\tchunk version %d\n", sm->ssd_chunk_ver); - DNPRINTF(SR_D_META, "\tchunks %d\n", sm->ssd_chunk_no); - DNPRINTF(SR_D_META, "\tchunk size %u\n", sm->ssd_chunk_size); - DNPRINTF(SR_D_META, "\tchunk id %u\n", sm->ssd_chunk_id); - DNPRINTF(SR_D_META, "\tchunk checksum 0x%x\n", sm->ssd_chunk_chk); - if (sm->ssd_opt_no) { - DNPRINTF(SR_D_META, "\topt version %d\n", sm->ssd_opt_ver); - DNPRINTF(SR_D_META, "\topt items %d\n", sm->ssd_opt_no); - DNPRINTF(SR_D_META, "\topt size %d\n", sm->ssd_opt_size); - DNPRINTF(SR_D_META, "\topt chk 0x%x\n", sm->ssd_opt_chk); - } - - - DNPRINTF(SR_D_META, "\t\tvol id %d\n", im_sv->svm_volid); - DNPRINTF(SR_D_META, "\t\tvol status %d\n", im_sv->svm_status); - DNPRINTF(SR_D_META, "\t\tvol flags 0x%x\n", im_sv->svm_flags); - DNPRINTF(SR_D_META, "\t\tvol level %d\n", im_sv->svm_level); - DNPRINTF(SR_D_META, "\t\tvol size %lld\n", im_sv->svm_size); - DNPRINTF(SR_D_META, "\t\tvol name %s\n", im_sv->svm_devname); - DNPRINTF(SR_D_META, "\t\tvol vendor %s\n", im_sv->svm_vendor); - DNPRINTF(SR_D_META, "\t\tvol prod %s\n", im_sv->svm_product); - DNPRINTF(SR_D_META, "\t\tvol rev %s\n", im_sv->svm_revision); - DNPRINTF(SR_D_META, "\t\tvol no chunks %d\n", im_sv->svm_no_chunk); - DNPRINTF(SR_D_META, "\t\tvol uuid "); - sr_print_uuid(& im_sv->svm_uuid, 1); - DNPRINTF(SR_D_META, "\t\tvol stripsize %d\n", im_sv->svm_strip_size); - - for (ch = 0; ch < im_sv->svm_no_chunk; ch++) { - DNPRINTF(SR_D_META, "\t\t\tchunk vol id %d\n", - im_sc[ch].scm_volid); - DNPRINTF(SR_D_META, "\t\t\tchunk id %d\n", - im_sc[ch].scm_chunk_id); - DNPRINTF(SR_D_META, "\t\t\tchunk status %d\n", - im_sc[ch].scm_status); - DNPRINTF(SR_D_META, "\t\t\tchunk name %s\n", - im_sc[ch].scm_devname); - DNPRINTF(SR_D_META, "\t\t\tchunk size %lld\n", - im_sc[ch].scm_size); - DNPRINTF(SR_D_META, "\t\t\tchunk coerced size %lld\n", - im_sc[ch].scm_coerced_size); - DNPRINTF(SR_D_META, "\t\t\tchunk uuid "); - sr_print_uuid(&im_sc[ch].scm_uuid, 1); + printf("\tssd_magic 0x%llx\n", m->ssdi.ssd_magic); + printf("\tssd_version %d\n", m->ssdi.ssd_version); + printf("\tssd_flags 0x%x\n", m->ssdi.ssd_flags); + printf("\tssd_uuid "); + sr_uuid_print(&m->ssdi.ssd_uuid, 1); + printf("\tssd_chunk_no %d\n", m->ssdi.ssd_chunk_no); + printf("\tssd_chunk_id %d\n", m->ssdi.ssd_chunk_id); + printf("\tssd_opt_no %d\n", m->ssdi.ssd_opt_no); + printf("\tssd_volid %d\n", m->ssdi.ssd_volid); + printf("\tssd_level %d\n", m->ssdi.ssd_level); + printf("\tssd_level %lld\n", m->ssdi.ssd_size); + printf("\tssd_devname %s\n", m->ssd_devname); + printf("\tssd_vendor %s\n", m->ssdi.ssd_vendor); + printf("\tssd_product %s\n", m->ssdi.ssd_product); + printf("\tssd_revision %s\n", m->ssdi.ssd_revision); + printf("\tssd_strip_size %d\n", m->ssdi.ssd_strip_size); + printf("\tssd_checksum "); + sr_checksum_print(m->ssd_checksum); + printf("\n"); + printf("\tssd_meta_flags 0x%x\n", m->ssd_meta_flags); + printf("\tssd_ondisk %llu\n", m->ssd_ondisk); + + mc = (struct sr_meta_chunk *)(m + 1); + for (i = 0; i < m->ssdi.ssd_chunk_no; i++, mc++) { + printf("\t\tscm_volid %d\n", mc->scmi.scm_volid); + printf("\t\tscm_chunk_id %d\n", mc->scmi.scm_chunk_id); + printf("\t\tscm_devname %s\n", mc->scmi.scm_devname); + printf("\t\tscm_size %lld\n", mc->scmi.scm_size); + printf("\t\tscm_coerced_size %lld\n",mc->scmi.scm_coerced_size); + printf("\t\tscm_uuid "); + sr_uuid_print(&mc->scmi.scm_uuid, 1); + printf("\t\tscm_checksum "); + sr_checksum_print(mc->scm_checksum); + printf("\n"); + printf("\t\tscm_status %d\n", mc->scm_status); + } + + mo = (struct sr_meta_opt *)(mc); + for (i = 0; i < m->ssdi.ssd_opt_no; i++, mo++) { + printf("\t\t\tsom_type %d\n", mo->somi.som_type); + printf("\t\t\tsom_checksum "); + sr_checksum_print(mo->som_checksum); + printf("\n"); } } diff --git a/sys/dev/softraid_crypto.c b/sys/dev/softraid_crypto.c index 5d955d561f7..804e60f30ac 100644 --- a/sys/dev/softraid_crypto.c +++ b/sys/dev/softraid_crypto.c @@ -1,4 +1,4 @@ -/* $OpenBSD: softraid_crypto.c,v 1.28 2008/06/25 17:43:09 thib Exp $ */ +/* $OpenBSD: softraid_crypto.c,v 1.29 2008/07/19 22:41:58 marco Exp $ */ /* * Copyright (c) 2007 Marco Peereboom * Copyright (c) 2008 Hans-Joerg Hoexer @@ -100,16 +100,23 @@ sr_crypto_getcryptop(struct sr_workunit *wu, int encrypt) struct sr_discipline *sd = wu->swu_dis; struct cryptop *crp; struct cryptodesc *crd; - struct uio *uio; - int flags, i, n; - daddr64_t blk = 0; - u_int keyndx; + struct uio *uio = NULL; + int flags, i, n, s; + daddr64_t blk = 0; + u_int keyndx; DNPRINTF(SR_D_DIS, "%s: sr_crypto_getcryptop wu: %p encrypt: %d\n", DEVNAME(sd->sd_sc), wu, encrypt); - uio = pool_get(&sr_uiopl, PR_WAITOK|PR_ZERO); - uio->uio_iov = pool_get(&sr_iovpl, PR_WAITOK); + s = splbio(); + uio = pool_get(&sd->mds.mdd_crypto.sr_uiopl, PR_ZERO); + if (uio == NULL) + goto unwind; + uio->uio_iov = pool_get(&sd->mds.mdd_crypto.sr_iovpl, 0); + if (uio->uio_iov == NULL) + goto unwind; + splx(s); + uio->uio_iovcnt = 1; uio->uio_iov->iov_len = xs->datalen; if (xs->flags & SCSI_DATA_OUT) { @@ -172,8 +179,14 @@ unwind: crypto_freereq(crp); if (wu->swu_xs->flags & SCSI_DATA_OUT) free(uio->uio_iov->iov_base, M_DEVBUF); - pool_put(&sr_iovpl, uio->uio_iov); - pool_put(&sr_uiopl, uio); + + s = splbio(); + if (uio && uio->uio_iov) + pool_put(&sd->mds.mdd_crypto.sr_iovpl, uio->uio_iov); + if (uio) + pool_put(&sd->mds.mdd_crypto.sr_uiopl, uio); + splx(s); + return (NULL); } @@ -182,14 +195,18 @@ sr_crypto_putcryptop(struct cryptop *crp) { struct uio *uio = crp->crp_buf; struct sr_workunit *wu = crp->crp_opaque; + struct sr_discipline *sd = wu->swu_dis; + int s; DNPRINTF(SR_D_DIS, "%s: sr_crypto_putcryptop crp: %p\n", DEVNAME(wu->swu_dis->sd_sc), crp); if (wu->swu_xs->flags & SCSI_DATA_OUT) free(uio->uio_iov->iov_base, M_DEVBUF); - pool_put(&sr_iovpl, uio->uio_iov); - pool_put(&sr_uiopl, uio); + s = splbio(); + pool_put(&sd->mds.mdd_crypto.sr_iovpl, uio->uio_iov); + pool_put(&sd->mds.mdd_crypto.sr_uiopl, uio); + splx(s); crypto_freereq(crp); return (wu); @@ -434,12 +451,17 @@ sr_crypto_alloc_resources(struct sr_discipline *sd) DNPRINTF(SR_D_DIS, "%s: sr_crypto_alloc_resources\n", DEVNAME(sd->sd_sc)); + pool_init(&sd->mds.mdd_crypto.sr_uiopl, sizeof(struct uio), 0, 0, 0, + "sr_uiopl", NULL); + pool_init(&sd->mds.mdd_crypto.sr_iovpl, sizeof(struct iovec), 0, 0, 0, + "sr_iovpl", NULL); + for (i = 0; i < SR_CRYPTO_MAXKEYS; i++) sd->mds.mdd_crypto.scr_sid[i] = (u_int64_t)-1; - if (sr_alloc_wu(sd)) + if (sr_wu_alloc(sd)) return (ENOMEM); - if (sr_alloc_ccb(sd)) + if (sr_ccb_alloc(sd)) return (ENOMEM); if (sr_crypto_decrypt_key(sd)) return (EPERM); @@ -458,7 +480,7 @@ sr_crypto_alloc_resources(struct sr_discipline *sd) } /* Allocate a session for every 2^SR_CRYPTO_KEY_BLKSHIFT blocks */ - num_keys = sd->sd_vol.sv_meta.svm_size >> SR_CRYPTO_KEY_BLKSHIFT; + num_keys = sd->sd_meta->ssdi.ssd_size >> SR_CRYPTO_KEY_BLKSHIFT; if (num_keys >= SR_CRYPTO_MAXKEYS) return (EFBIG); for (i = 0; i <= num_keys; i++) { @@ -497,11 +519,11 @@ sr_crypto_free_resources(struct sr_discipline *sd) sd->mds.mdd_crypto.scr_sid[i] = (u_int64_t)-1; } - sr_free_wu(sd); - sr_free_ccb(sd); + sr_wu_free(sd); + sr_ccb_free(sd); - if (sd->sd_meta) - free(sd->sd_meta, M_DEVBUF); + pool_destroy(&sd->mds.mdd_crypto.sr_uiopl); + pool_destroy(&sd->mds.mdd_crypto.sr_iovpl); rv = 0; return (rv); @@ -570,11 +592,11 @@ sr_crypto_rw2(struct sr_workunit *wu, struct cryptop *crp) wu->swu_io_count = 1; - ccb = sr_get_ccb(sd); + ccb = sr_ccb_get(sd); if (!ccb) { /* should never happen but handle more gracefully */ printf("%s: %s: too many ccbs queued\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname); + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); goto bad; } @@ -608,7 +630,7 @@ sr_crypto_rw2(struct sr_workunit *wu, struct cryptop *crp) DNPRINTF(SR_D_DIS, "%s: %s: sr_crypto_rw2: b_bcount: %d " "b_blkno: %x b_flags 0x%0x b_data %p\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno, ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); @@ -623,7 +645,7 @@ queued: splx(s); return (0); bad: - /* wu is unwound by sr_put_wu */ + /* wu is unwound by sr_wu_put */ if (crp) crp->crp_etype = EINVAL; return (1); @@ -740,7 +762,7 @@ sr_crypto_finish_io(struct sr_workunit *wu) } /* do not change the order of these 2 functions */ - sr_put_wu(wu); + sr_wu_put(wu); scsi_done(xs); if (sd->sd_sync && sd->sd_wu_pending == 0) diff --git a/sys/dev/softraid_raid0.c b/sys/dev/softraid_raid0.c index d8994687878..9fa1fed1e69 100644 --- a/sys/dev/softraid_raid0.c +++ b/sys/dev/softraid_raid0.c @@ -1,4 +1,4 @@ -/* $OpenBSD: softraid_raid0.c,v 1.8 2008/02/05 16:49:25 marco Exp $ */ +/* $OpenBSD: softraid_raid0.c,v 1.9 2008/07/19 22:41:58 marco Exp $ */ /* * Copyright (c) 2008 Marco Peereboom * @@ -55,14 +55,14 @@ sr_raid0_alloc_resources(struct sr_discipline *sd) DNPRINTF(SR_D_DIS, "%s: sr_raid0_alloc_resources\n", DEVNAME(sd->sd_sc)); - if (sr_alloc_wu(sd)) + if (sr_wu_alloc(sd)) goto bad; - if (sr_alloc_ccb(sd)) + if (sr_ccb_alloc(sd)) goto bad; /* setup runtime values */ sd->mds.mdd_raid0.sr0_strip_bits = - sr_validate_stripsize(sd->sd_vol.sv_meta.svm_strip_size); + sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size); if (sd->mds.mdd_raid0.sr0_strip_bits == -1) goto bad; @@ -82,11 +82,8 @@ sr_raid0_free_resources(struct sr_discipline *sd) DNPRINTF(SR_D_DIS, "%s: sr_raid0_free_resources\n", DEVNAME(sd->sd_sc)); - sr_free_wu(sd); - sr_free_ccb(sd); - - if (sd->sd_meta) - free(sd->sd_meta, M_DEVBUF); + sr_wu_free(sd); + sr_ccb_free(sd); rv = 0; return (rv); @@ -98,8 +95,8 @@ sr_raid0_set_chunk_state(struct sr_discipline *sd, int c, int new_state) int old_state, s; DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, - sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, c, new_state); + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, + sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); /* ok to go to splbio since this only happens in error path */ s = splbio(); @@ -125,8 +122,8 @@ die: splx(s); /* XXX */ panic("%s: %s: %s: invalid chunk state transition " "%d -> %d\n", DEVNAME(sd->sd_sc), - sd->sd_vol.sv_meta.svm_devname, - sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, + sd->sd_meta->ssd_devname, + sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, old_state, new_state); /* NOTREACHED */ } @@ -135,7 +132,7 @@ die: sd->sd_set_vol_state(sd); sd->sd_must_flush = 1; - workq_add_task(NULL, 0, sr_save_metadata_callback, sd, NULL); + workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); done: splx(s); } @@ -145,12 +142,12 @@ sr_raid0_set_vol_state(struct sr_discipline *sd) { int states[SR_MAX_STATES]; int new_state, i, s, nd; - int old_state = sd->sd_vol.sv_meta.svm_status; + int old_state = sd->sd_vol_status; DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname); + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); - nd = sd->sd_vol.sv_meta.svm_no_chunk; + nd = sd->sd_meta->ssdi.ssd_chunk_no; for (i = 0; i < SR_MAX_STATES; i++) states[i] = 0; @@ -160,8 +157,8 @@ sr_raid0_set_vol_state(struct sr_discipline *sd) if (s > SR_MAX_STATES) panic("%s: %s: %s: invalid chunk state", DEVNAME(sd->sd_sc), - sd->sd_vol.sv_meta.svm_devname, - sd->sd_vol.sv_chunks[i]->src_meta.scm_devname); + sd->sd_meta->ssd_devname, + sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); states[s]++; } @@ -171,7 +168,7 @@ sr_raid0_set_vol_state(struct sr_discipline *sd) new_state = BIOC_SVOFFLINE; DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, old_state, new_state); switch (old_state) { @@ -190,12 +187,12 @@ sr_raid0_set_vol_state(struct sr_discipline *sd) die: panic("%s: %s: invalid volume state transition " "%d -> %d\n", DEVNAME(sd->sd_sc), - sd->sd_vol.sv_meta.svm_devname, + sd->sd_meta->ssd_devname, old_state, new_state); /* NOTREACHED */ } - sd->sd_vol.sv_meta.svm_status = new_state; + sd->sd_vol_status = new_state; } int @@ -215,12 +212,12 @@ sr_raid0_rw(struct sr_workunit *wu) if (sr_validate_io(wu, &blk, "sr_raid0_rw")) goto bad; - strip_size = sd->sd_vol.sv_meta.svm_strip_size; + strip_size = sd->sd_meta->ssdi.ssd_strip_size; strip_bits = sd->mds.mdd_raid0.sr0_strip_bits; - no_chunk = sd->sd_vol.sv_meta.svm_no_chunk; + no_chunk = sd->sd_meta->ssdi.ssd_chunk_no; DNPRINTF(SR_D_DIS, "%s: %s: front end io: lba %lld size %d\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, blk, xs->datalen); /* all offs are in bytes */ @@ -238,16 +235,16 @@ sr_raid0_rw(struct sr_workunit *wu) /* make sure chunk is online */ scp = sd->sd_vol.sv_chunks[chunk]; if (scp->src_meta.scm_status != BIOC_SDONLINE) { - sr_put_ccb(ccb); + sr_ccb_put(ccb); goto bad; } - ccb = sr_get_ccb(sd); + ccb = sr_ccb_get(sd); if (!ccb) { /* should never happen but handle more gracefully */ printf("%s: %s: too many ccbs queued\n", DEVNAME(sd->sd_sc), - sd->sd_vol.sv_meta.svm_devname); + sd->sd_meta->ssd_devname); goto bad; } @@ -255,7 +252,7 @@ sr_raid0_rw(struct sr_workunit *wu) "strip_no: %lld chunk: %lld stripoffs: %lld " "chunkoffs: %lld physoffs: %lld length: %lld " "leftover: %lld data: %p\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, lbaoffs, + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, lbaoffs, strip_no, chunk, stripoffs, chunkoffs, physoffs, length, leftover, data); @@ -279,7 +276,7 @@ sr_raid0_rw(struct sr_workunit *wu) DNPRINTF(SR_D_DIS, "%s: %s: sr_raid0: b_bcount: %d " "b_blkno: %lld b_flags 0x%0x b_data %p\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno, ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); @@ -306,7 +303,7 @@ queued: splx(s); return (0); bad: - /* wu is unwound by sr_put_wu */ + /* wu is unwound by sr_wu_put */ return (1); } @@ -385,7 +382,7 @@ sr_raid0_intr(struct buf *bp) DEVNAME(sc), wu); /* do not change the order of these 2 functions */ - sr_put_wu(wu); + sr_wu_put(wu); scsi_done(xs); if (sd->sd_sync && sd->sd_wu_pending == 0) @@ -397,7 +394,7 @@ sr_raid0_intr(struct buf *bp) bad: xs->error = XS_DRIVER_STUFFUP; xs->flags |= ITSDONE; - sr_put_wu(wu); + sr_wu_put(wu); scsi_done(xs); splx(s); } diff --git a/sys/dev/softraid_raid1.c b/sys/dev/softraid_raid1.c index 85a789ffc0c..a89294ce1c3 100644 --- a/sys/dev/softraid_raid1.c +++ b/sys/dev/softraid_raid1.c @@ -1,4 +1,4 @@ -/* $OpenBSD: softraid_raid1.c,v 1.5 2008/02/05 16:49:25 marco Exp $ */ +/* $OpenBSD: softraid_raid1.c,v 1.6 2008/07/19 22:41:58 marco Exp $ */ /* * Copyright (c) 2007 Marco Peereboom * @@ -55,9 +55,9 @@ sr_raid1_alloc_resources(struct sr_discipline *sd) DNPRINTF(SR_D_DIS, "%s: sr_raid1_alloc_resources\n", DEVNAME(sd->sd_sc)); - if (sr_alloc_wu(sd)) + if (sr_wu_alloc(sd)) goto bad; - if (sr_alloc_ccb(sd)) + if (sr_ccb_alloc(sd)) goto bad; rv = 0; @@ -76,11 +76,8 @@ sr_raid1_free_resources(struct sr_discipline *sd) DNPRINTF(SR_D_DIS, "%s: sr_raid1_free_resources\n", DEVNAME(sd->sd_sc)); - sr_free_wu(sd); - sr_free_ccb(sd); - - if (sd->sd_meta) - free(sd->sd_meta, M_DEVBUF); + sr_wu_free(sd); + sr_ccb_free(sd); rv = 0; return (rv); @@ -92,8 +89,8 @@ sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state) int old_state, s; DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, - sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, c, new_state); + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, + sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state); /* ok to go to splbio since this only happens in error path */ s = splbio(); @@ -148,8 +145,8 @@ die: splx(s); /* XXX */ panic("%s: %s: %s: invalid chunk state transition " "%d -> %d\n", DEVNAME(sd->sd_sc), - sd->sd_vol.sv_meta.svm_devname, - sd->sd_vol.sv_chunks[c]->src_meta.scm_devname, + sd->sd_meta->ssd_devname, + sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, old_state, new_state); /* NOTREACHED */ } @@ -158,7 +155,7 @@ die: sd->sd_set_vol_state(sd); sd->sd_must_flush = 1; - workq_add_task(NULL, 0, sr_save_metadata_callback, sd, NULL); + workq_add_task(NULL, 0, sr_meta_save_callback, sd, NULL); done: splx(s); } @@ -168,12 +165,12 @@ sr_raid1_set_vol_state(struct sr_discipline *sd) { int states[SR_MAX_STATES]; int new_state, i, s, nd; - int old_state = sd->sd_vol.sv_meta.svm_status; + int old_state = sd->sd_vol_status; DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname); + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname); - nd = sd->sd_vol.sv_meta.svm_no_chunk; + nd = sd->sd_meta->ssdi.ssd_chunk_no; for (i = 0; i < SR_MAX_STATES; i++) states[i] = 0; @@ -183,8 +180,8 @@ sr_raid1_set_vol_state(struct sr_discipline *sd) if (s > SR_MAX_STATES) panic("%s: %s: %s: invalid chunk state", DEVNAME(sd->sd_sc), - sd->sd_vol.sv_meta.svm_devname, - sd->sd_vol.sv_chunks[i]->src_meta.scm_devname); + sd->sd_meta->ssd_devname, + sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname); states[s]++; } @@ -207,7 +204,7 @@ sr_raid1_set_vol_state(struct sr_discipline *sd) } DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, old_state, new_state); switch (old_state) { @@ -274,12 +271,12 @@ sr_raid1_set_vol_state(struct sr_discipline *sd) die: panic("%s: %s: invalid volume state transition " "%d -> %d\n", DEVNAME(sd->sd_sc), - sd->sd_vol.sv_meta.svm_devname, + sd->sd_meta->ssd_devname, old_state, new_state); /* NOTREACHED */ } - sd->sd_vol.sv_meta.svm_status = new_state; + sd->sd_vol_status = new_state; } int @@ -302,16 +299,16 @@ sr_raid1_rw(struct sr_workunit *wu) if (xs->flags & SCSI_DATA_IN) ios = 1; else - ios = sd->sd_vol.sv_meta.svm_no_chunk; + ios = sd->sd_meta->ssdi.ssd_chunk_no; wu->swu_io_count = ios; for (i = 0; i < ios; i++) { - ccb = sr_get_ccb(sd); + ccb = sr_ccb_get(sd); if (!ccb) { /* should never happen but handle more gracefully */ printf("%s: %s: too many ccbs queued\n", DEVNAME(sd->sd_sc), - sd->sd_vol.sv_meta.svm_devname); + sd->sd_meta->ssd_devname); goto bad; } @@ -337,7 +334,7 @@ sr_raid1_rw(struct sr_workunit *wu) ragain: /* interleave reads */ x = sd->mds.mdd_raid1.sr1_counter++ % - sd->sd_vol.sv_meta.svm_no_chunk; + sd->sd_meta->ssdi.ssd_chunk_no; scp = sd->sd_vol.sv_chunks[x]; switch (scp->src_meta.scm_status) { case BIOC_SDONLINE: @@ -348,7 +345,7 @@ ragain: case BIOC_SDOFFLINE: case BIOC_SDREBUILD: case BIOC_SDHOTSPARE: - if (rt++ < sd->sd_vol.sv_meta.svm_no_chunk) + if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no) goto ragain; /* FALLTHROUGH */ @@ -356,7 +353,7 @@ ragain: /* volume offline */ printf("%s: is offline, can't read\n", DEVNAME(sd->sd_sc)); - sr_put_ccb(ccb); + sr_ccb_put(ccb); goto bad; } } else { @@ -373,7 +370,7 @@ ragain: case BIOC_SDHOTSPARE: /* should never happen */ case BIOC_SDOFFLINE: wu->swu_io_count--; - sr_put_ccb(ccb); + sr_ccb_put(ccb); continue; default: @@ -391,7 +388,7 @@ ragain: DNPRINTF(SR_D_DIS, "%s: %s: sr_raid1: b_bcount: %d " "b_blkno: %x b_flags 0x%0x b_data %p\n", - DEVNAME(sd->sd_sc), sd->sd_vol.sv_meta.svm_devname, + DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, ccb->ccb_buf.b_bcount, ccb->ccb_buf.b_blkno, ccb->ccb_buf.b_flags, ccb->ccb_buf.b_data); } @@ -415,7 +412,7 @@ queued: splx(s); return (0); bad: - /* wu is unwound by sr_put_wu */ + /* wu is unwound by sr_wu_put */ return (1); } @@ -465,7 +462,7 @@ sr_raid1_intr(struct buf *bp) if (xs->flags & SCSI_DATA_IN) { printf("%s: retrying read on block %lld\n", DEVNAME(sc), ccb->ccb_buf.b_blkno); - sr_put_ccb(ccb); + sr_ccb_put(ccb); TAILQ_INIT(&wu->swu_ccb); wu->swu_state = SR_WU_RESTART; if (sd->sd_scsi_rw(wu)) @@ -513,7 +510,7 @@ sr_raid1_intr(struct buf *bp) DEVNAME(sc), wu); /* do not change the order of these 2 functions */ - sr_put_wu(wu); + sr_wu_put(wu); scsi_done(xs); if (sd->sd_sync && sd->sd_wu_pending == 0) @@ -526,7 +523,7 @@ retry: bad: xs->error = XS_DRIVER_STUFFUP; xs->flags |= ITSDONE; - sr_put_wu(wu); + sr_wu_put(wu); scsi_done(xs); splx(s); } @@ -544,7 +541,7 @@ sr_raid1_recreate_wu(struct sr_workunit *wu) /* toss all ccbs */ while ((ccb = TAILQ_FIRST(&wup->swu_ccb)) != NULL) { TAILQ_REMOVE(&wup->swu_ccb, ccb, ccb_link); - sr_put_ccb(ccb); + sr_ccb_put(ccb); } TAILQ_INIT(&wup->swu_ccb); diff --git a/sys/dev/softraidvar.h b/sys/dev/softraidvar.h index 0b40b5ce047..1b87e3e6a77 100644 --- a/sys/dev/softraidvar.h +++ b/sys/dev/softraidvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: softraidvar.h,v 1.60 2008/06/25 17:43:09 thib Exp $ */ +/* $OpenBSD: softraidvar.h,v 1.61 2008/07/19 22:41:58 marco Exp $ */ /* * Copyright (c) 2006 Marco Peereboom * Copyright (c) 2008 Chris Kuethe @@ -19,89 +19,129 @@ #ifndef SOFTRAIDVAR_H #define SOFTRAIDVAR_H +#include + #define SR_UUID_MAX 4 struct sr_uuid { u_int32_t sui_id[SR_UUID_MAX]; } __packed; -#define SR_META_SIZE 32 /* save space at chunk beginning */ +#define SR_META_SIZE 64 /* save space at chunk beginning */ #define SR_META_OFFSET 16 /* skip 8192 bytes at chunk beginning */ -#define SR_META_VERSION 1 /* bump when sr_metadata changes */ +#define SR_META_VERSION 2 /* bump when sr_metadata changes */ struct sr_metadata { - /* do not change order of ssd_magic, ssd_version & ssd_checksum */ - u_int64_t ssd_magic; /* magic id */ + struct sr_meta_invariant { + /* do not change order of ssd_magic, ssd_version */ + u_int64_t ssd_magic; /* magic id */ #define SR_MAGIC 0x4d4152436372616dLLU - u_int8_t ssd_version; /* meta data version */ - u_int8_t ssd_pad1[3]; - u_int32_t ssd_flags; /* flags */ - - /* meta-data */ - u_int32_t ssd_checksum; /* xor of the structure */ - u_int32_t ssd_size; /* sizeof(sr_metadata) */ - u_int32_t ssd_ondisk; /* on disk version counter */ - u_int32_t ssd_pad2; - struct sr_uuid ssd_uuid; /* unique identifier */ - - /* virtual disk data */ - u_int32_t ssd_vd_ver; /* vd structure version */ - u_int32_t ssd_vd_size; /* vd structure size */ - u_int32_t ssd_vd_volid; /* volume id */ - u_int32_t ssd_vd_chk; /* vd structure xor */ - - /* chunk data */ - u_int32_t ssd_chunk_ver; /* chunk structure version */ - u_int32_t ssd_chunk_no; /* number of chunks */ - u_int32_t ssd_chunk_size; /* chunk structure size */ - u_int32_t ssd_chunk_id; /* chunk identifier */ - u_int32_t ssd_chunk_chk; /* chunk structure xor */ - u_int32_t ssd_pad3; - - /* optional metadata */ - u_int32_t ssd_opt_ver; /* optinal meta version */ - u_int32_t ssd_opt_no; /* nr of optional md elements */ - u_int32_t ssd_opt_size; /* sizeof optional metadata */ - u_int32_t ssd_opt_chk; /* optional metadata xor */ -} __packed; - -#define SR_VOL_VERSION 2 /* bump when sr_vol_meta changes */ -struct sr_vol_meta { - u_int32_t svm_volid; /* volume id */ - u_int32_t svm_status; /* use bioc_vol status */ - u_int32_t svm_flags; /* flags */ -#define SR_VOL_DIRTY 0x01 - u_int32_t svm_level; /* raid level */ - int64_t svm_size; /* virt disk size in blocks */ - char svm_devname[32];/* /dev/XXXXX */ - char svm_vendor[8]; /* scsi vendor */ - char svm_product[16];/* scsi product */ - char svm_revision[4];/* scsi revision */ - u_int32_t svm_no_chunk; /* number of chunks */ - struct sr_uuid svm_uuid; /* volume unique identifier */ - - /* optional members */ - u_int32_t svm_strip_size; /* strip size */ + u_int32_t ssd_version; /* meta data version */ + u_int32_t ssd_flags; + struct sr_uuid ssd_uuid; /* unique identifier */ + + /* chunks */ + u_int32_t ssd_chunk_no; /* number of chunks */ + u_int32_t ssd_chunk_id; /* chunk identifier */ + + /* optional */ + u_int32_t ssd_opt_no; /* nr of optional md * elements */ + u_int32_t ssd_pad; + + /* volume metadata */ + u_int32_t ssd_volid; /* volume id */ + u_int32_t ssd_level; /* raid level */ + int64_t ssd_size; /* virt disk size in blocks */ + char ssd_vendor[8]; /* scsi vendor */ + char ssd_product[16];/* scsi product */ + char ssd_revision[4];/* scsi revision */ + /* optional volume members */ + u_int32_t ssd_strip_size; /* strip size */ + } _sdd_invariant; +#define ssdi _sdd_invariant + /* MD5 of invariant metadata */ + u_int8_t ssd_checksum[MD5_DIGEST_LENGTH]; + char ssd_devname[32];/* /dev/XXXXX */ + u_int32_t ssd_meta_flags; +#define SR_META_DIRTY 0x1 + u_int32_t ssd_pad; + u_int64_t ssd_ondisk; /* on disk version counter */ } __packed; -#define SR_CHUNK_VERSION 1 /* bump when sr_chunk_meta changes */ -struct sr_chunk_meta { - u_int32_t scm_volid; /* vd we belong to */ - u_int32_t scm_chunk_id; /* chunk id */ +struct sr_meta_chunk { + struct sr_meta_chunk_invariant { + u_int32_t scm_volid; /* vd we belong to */ + u_int32_t scm_chunk_id; /* chunk id */ + char scm_devname[32];/* /dev/XXXXX */ + int64_t scm_size; /* size of partition in blocks*/ + int64_t scm_coerced_size; /* coerced sz of part in blk*/ + struct sr_uuid scm_uuid; /* unique identifier */ + } _scm_invariant; +#define scmi _scm_invariant + /* MD5 of invariant chunk metadata */ + u_int8_t scm_checksum[MD5_DIGEST_LENGTH]; u_int32_t scm_status; /* use bio bioc_disk status */ - u_int32_t scm_pad1; - char scm_devname[32];/* /dev/XXXXX */ - int64_t scm_size; /* size of partition in blocks*/ - int64_t scm_coerced_size; /* coerced sz of part in blk*/ - struct sr_uuid scm_uuid; /* unique identifier */ } __packed; -#define SR_CRYPTO_MAXKEYBYTES 32 -#define SR_CRYPTO_MAXKEYS 32 +#define SR_CRYPTO_MAXKEYBYTES 32 /* max bytes in a key (AES-XTS-256) */ +#define SR_CRYPTO_MAXKEYS 32 /* max keys per volume */ #define SR_CRYPTO_KEYBITS 512 /* AES-XTS with 2 * 256 bit keys */ #define SR_CRYPTO_KEYBYTES (SR_CRYPTO_KEYBITS >> 3) -#define SR_CRYPTO_KDFHINTBYTES 256 -#define SR_CRYPTO_CHECKBYTES 64 +#define SR_CRYPTO_KDFHINTBYTES 256 /* size of opaque KDF hint */ +#define SR_CRYPTO_CHECKBYTES 64 /* size of generic key chksum struct */ #define SR_CRYPTO_KEY_BLKSHIFT 30 /* 0.5TB per key */ +/* + * Check that HMAC-SHA1_k(decrypted scm_key) == sch_mac, where + * k = SHA1(masking key) + */ +struct sr_crypto_chk_hmac_sha1 { + u_int8_t sch_mac[20]; +} __packed; + +struct sr_meta_crypto { + u_int32_t scm_alg; /* vol crypto algorithm */ +#define SR_CRYPTOA_AES_XTS_128 1 +#define SR_CRYPTOA_AES_XTS_256 2 + u_int32_t scm_flags; /* key & kdfhint valid */ +#define SR_CRYPTOF_INVALID (0) +#define SR_CRYPTOF_KEY (1<<0) +#define SR_CRYPTOF_KDFHINT (1<<1) + u_int32_t scm_mask_alg; /* disk key masking crypt alg */ +#define SR_CRYPTOM_AES_ECB_256 1 + u_int32_t scm_pad1; + u_int8_t scm_reserved[64]; + + /* symmetric keys used for disk encryption */ + u_int8_t scm_key[SR_CRYPTO_MAXKEYS][SR_CRYPTO_KEYBYTES]; + /* hint to kdf algorithm (opaque to kernel) */ + u_int8_t scm_kdfhint[SR_CRYPTO_KDFHINTBYTES]; + + u_int32_t scm_check_alg; /* key chksum algorithm */ +#define SR_CRYPTOC_HMAC_SHA1 1 + u_int32_t scm_pad2; + union { + struct sr_crypto_chk_hmac_sha1 chk_hmac_sha1; + u_int8_t chk_reserved2[64]; + } _scm_chk; +#define chk_hmac_sha1 _scm_chk.chk_hmac_sha1 +} __packed; + +struct sr_meta_opt { + struct sr_meta_opt_invariant { + u_int32_t som_type; /* optional type */ +#define SR_OPT_INVALID 0x00 +#define SR_OPT_CRYPTO 0x01 + u_int32_t som_pad; + union { + struct sr_meta_crypto smm_crypto; + } som_meta; + } _som_invariant; +#define somi _som_invariant +#define somi_crypto _som_invariant.smm_crypto + /* MD5 of invariant optional metadata */ + u_int8_t som_checksum[MD5_DIGEST_LENGTH]; +} __packed; + +/* this is a generic hint for KDF done in userland, not interpreted by the kernel. */ struct sr_crypto_genkdf { u_int32_t len; u_int32_t type; @@ -109,6 +149,7 @@ struct sr_crypto_genkdf { #define SR_CRYPTOKDFT_PBKDF2 (1<<0) }; +/* this is a hint for KDF using PKCS#5. Not interpreted by the kernel */ struct sr_crypto_kdf_pbkdf2 { u_int32_t len; u_int32_t type; @@ -117,13 +158,9 @@ struct sr_crypto_kdf_pbkdf2 { }; /* - * Check that HMAC-SHA1_k(decrypted scm_key) == sch_mac, where - * k = SHA1(masking key) + * this structure is used to copy masking keys and KDF hints from/to userland. + * the embedded hint structures are not interpreted by the kernel. */ -struct sr_crypto_chk_hmac_sha1 { - u_int8_t sch_mac[20]; -}; - struct sr_crypto_kdfinfo { u_int32_t len; u_int32_t flags; @@ -139,46 +176,11 @@ struct sr_crypto_kdfinfo { #define pbkdf2 _kdfhint.pbkdf2 }; -struct sr_crypto_metadata { - u_int32_t scm_alg; -#define SR_CRYPTOA_AES_XTS_128 1 -#define SR_CRYPTOA_AES_XTS_256 2 - u_int32_t scm_flags; -#define SR_CRYPTOF_INVALID (0) -#define SR_CRYPTOF_KEY (1<<0) -#define SR_CRYPTOF_KDFHINT (1<<1) - u_int32_t scm_mask_alg; -#define SR_CRYPTOM_AES_ECB_256 1 - u_int8_t scm_reserved[64]; - - u_int8_t scm_key[SR_CRYPTO_MAXKEYS][SR_CRYPTO_KEYBYTES]; - u_int8_t scm_kdfhint[SR_CRYPTO_KDFHINTBYTES]; - - u_int32_t scm_check_alg; -#define SR_CRYPTOC_HMAC_SHA1 1 - union { - struct sr_crypto_chk_hmac_sha1 chk_hmac_sha1; - u_int8_t chk_reserved2[64]; - } _scm_chk; -#define chk_hmac_sha1 _scm_chk.chk_hmac_sha1 -}; - -#define SR_OPT_VERSION 1 /* bump when sr_opt_meta changes */ -struct sr_opt_meta { - u_int32_t som_type; -#define SR_OPT_INVALID 0x00 -#define SR_OPT_CRYPTO 0x01 - u_int32_t som_pad; - union { - struct sr_crypto_metadata smm_crypto; - } som_meta; -}; - #ifdef _KERNEL - #include #include +#include #include #include @@ -211,6 +213,7 @@ extern u_int32_t sr_debug; #define SR_MAX_LD 1 #define SR_MAX_CMDS 16 #define SR_MAX_STATES 7 +#define SR_VM_IGNORE_DIRTY 1 /* forward define to prevent dependency goo */ struct sr_softc; @@ -289,15 +292,19 @@ struct sr_raid1 { /* CRYPTO */ #define SR_CRYPTO_NOWU 16 struct sr_crypto { - struct sr_crypto_metadata scr_meta; + struct sr_meta_crypto scr_meta; + struct pool sr_uiopl; + struct pool sr_iovpl; + + /* XXX only keep scr_sid over time */ u_int8_t scr_key[SR_CRYPTO_MAXKEYS][SR_CRYPTO_KEYBYTES]; u_int8_t scr_maskkey[SR_CRYPTO_MAXKEYBYTES]; u_int64_t scr_sid[SR_CRYPTO_MAXKEYS]; }; struct sr_metadata_list { - struct sr_metadata *sml_metadata; + u_int8_t sml_metadata[SR_META_SIZE * 512]; dev_t sml_mm; int sml_used; @@ -307,7 +314,8 @@ struct sr_metadata_list { SLIST_HEAD(sr_metadata_list_head, sr_metadata_list); struct sr_chunk { - struct sr_chunk_meta src_meta; /* chunk meta data */ + struct sr_meta_chunk src_meta; /* chunk meta data */ + struct sr_meta_opt src_opt; /* optional metadata */ /* runtime data */ dev_t src_dev_mm; /* major/minor */ @@ -323,8 +331,6 @@ struct sr_chunk { SLIST_HEAD(sr_chunk_head, sr_chunk); struct sr_volume { - struct sr_vol_meta sv_meta; /* meta data */ - /* runtime data */ struct sr_chunk_head sv_chunk_list; /* linked list of all chunks */ struct sr_chunk **sv_chunks; /* array to same chunks */ @@ -356,7 +362,9 @@ struct sr_discipline { /* discipline metadata */ struct sr_metadata *sd_meta; /* in memory copy of metadata */ + void *sd_meta_foreign; /* non native metadata */ u_int32_t sd_meta_flags; + int sd_meta_type; /* metadata functions */ int sd_sync; int sd_must_flush; @@ -368,7 +376,7 @@ struct sr_discipline { /* discipline volume */ struct sr_volume sd_vol; /* volume associated */ - + int sd_vol_status; /* runtime vol status */ /* discipline resources */ struct sr_ccb *sd_ccb; struct sr_ccb_list sd_ccb_freeq; @@ -429,23 +437,19 @@ struct sr_softc { struct sr_discipline *sc_dis[SR_MAXSCSIBUS]; /* scsibus is u_int8_t */ }; -struct pool; -extern struct pool sr_uiopl; -extern struct pool sr_iovpl; - /* work units & ccbs */ -int sr_alloc_ccb(struct sr_discipline *); -void sr_free_ccb(struct sr_discipline *); -struct sr_ccb *sr_get_ccb(struct sr_discipline *); -void sr_put_ccb(struct sr_ccb *); -int sr_alloc_wu(struct sr_discipline *); -void sr_free_wu(struct sr_discipline *); -struct sr_workunit *sr_get_wu(struct sr_discipline *); -void sr_put_wu(struct sr_workunit *); +int sr_ccb_alloc(struct sr_discipline *); +void sr_ccb_free(struct sr_discipline *); +struct sr_ccb *sr_ccb_get(struct sr_discipline *); +void sr_ccb_put(struct sr_ccb *); +int sr_wu_alloc(struct sr_discipline *); +void sr_wu_free(struct sr_discipline *); +struct sr_workunit *sr_wu_get(struct sr_discipline *); +void sr_wu_put(struct sr_workunit *); /* misc functions */ int32_t sr_validate_stripsize(u_int32_t); -void sr_save_metadata_callback(void *, void *); +void sr_meta_save_callback(void *, void *); int sr_validate_io(struct sr_workunit *, daddr64_t *, char *); int sr_check_io_collision(struct sr_workunit *); -- 2.20.1