From 2bd648c0c791046970147cb6db731832ad511740 Mon Sep 17 00:00:00 2001 From: mpi Date: Mon, 20 Aug 2018 16:00:22 +0000 Subject: [PATCH] Reorder checks in the read/write(2) family of syscalls to prepare making file operations mp-safe. This change makes it clear that `f_offset' is only accessed in vn_read() and vn_write(), which will help taking it out of the KERNEL_LOCK(). This refactoring uncovered a race in vn_read() which is now documented and will be addressed in a later diff. ok visa@ --- sys/dev/pci/drm/drm_linux.c | 6 +- sys/kern/kern_event.c | 13 +- sys/kern/sys_generic.c | 314 +++++++++++++++++++----------------- sys/kern/sys_pipe.c | 10 +- sys/kern/sys_socket.c | 6 +- sys/kern/vfs_syscalls.c | 136 ++++++---------- sys/kern/vfs_vnops.c | 47 ++++-- sys/sys/file.h | 13 +- sys/sys/socketvar.h | 17 +- sys/sys/uio.h | 11 +- 10 files changed, 276 insertions(+), 297 deletions(-) diff --git a/sys/dev/pci/drm/drm_linux.c b/sys/dev/pci/drm/drm_linux.c index 18ae95f3c18..a414a2623a7 100644 --- a/sys/dev/pci/drm/drm_linux.c +++ b/sys/dev/pci/drm/drm_linux.c @@ -1,4 +1,4 @@ -/* $OpenBSD: drm_linux.c,v 1.29 2018/08/20 14:59:02 visa Exp $ */ +/* $OpenBSD: drm_linux.c,v 1.30 2018/08/20 16:00:22 mpi Exp $ */ /* * Copyright (c) 2013 Jonathan Gray * Copyright (c) 2015, 2016 Mark Kettenis @@ -809,13 +809,13 @@ fence_context_alloc(unsigned int num) } int -dmabuf_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +dmabuf_read(struct file *fp, struct uio *uio, int fflags) { return (ENXIO); } int -dmabuf_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +dmabuf_write(struct file *fp, struct uio *uio, int fflags) { return (ENXIO); } diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 4556569063f..81b591837c7 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_event.c,v 1.97 2018/08/15 13:19:06 visa Exp $ */ +/* $OpenBSD: kern_event.c,v 1.98 2018/08/20 16:00:22 mpi Exp $ */ /*- * Copyright (c) 1999,2000,2001 Jonathan Lemon @@ -58,10 +58,8 @@ int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp, const struct timespec *timeout, struct proc *p, int *retval); -int kqueue_read(struct file *fp, off_t *poff, struct uio *uio, - struct ucred *cred); -int kqueue_write(struct file *fp, off_t *poff, struct uio *uio, - struct ucred *cred); +int kqueue_read(struct file *, struct uio *, int); +int kqueue_write(struct file *, struct uio *, int); int kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct proc *p); int kqueue_poll(struct file *fp, int events, struct proc *p); @@ -850,14 +848,13 @@ done: * This could be expanded to call kqueue_scan, if desired. */ int -kqueue_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +kqueue_read(struct file *fp, struct uio *uio, int fflags) { return (ENXIO); } int -kqueue_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) - +kqueue_write(struct file *fp, struct uio *uio, int fflags) { return (ENXIO); } diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 0411e1c009e..df0ec40c11d 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_generic.c,v 1.121 2018/07/14 10:21:48 jsg Exp $ */ +/* $OpenBSD: sys_generic.c,v 1.122 2018/08/20 16:00:22 mpi Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,62 @@ int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, int doppoll(struct proc *, struct pollfd *, u_int, const struct timespec *, const sigset_t *, register_t *); +int +iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov, + unsigned int iovcnt, size_t *residp) +{ +#ifdef KTRACE + struct proc *p = curproc; +#endif + struct iovec *iov; + int error, i; + size_t resid = 0; + + if (iovcnt > UIO_SMALLIOV) { + if (iovcnt > IOV_MAX) + return (EINVAL); + iov = mallocarray(iovcnt, sizeof(*iov), M_IOV, M_WAITOK); + } else if (iovcnt > 0) { + iov = aiov; + } else { + return (EINVAL); + } + *iovp = iov; + + if ((error = copyin(uiov, iov, iovcnt * sizeof(*iov)))) + return (error); + +#ifdef KTRACE + if (KTRPOINT(p, KTR_STRUCT)) + ktriovec(p, iov, iovcnt); +#endif + + for (i = 0; i < iovcnt; i++) { + resid += iov->iov_len; + /* + * Writes return ssize_t because -1 is returned on error. + * Therefore we must restrict the length to SSIZE_MAX to + * avoid garbage return values. Note that the addition is + * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. + */ + if (iov->iov_len > SSIZE_MAX || resid > SSIZE_MAX) + return (EINVAL); + iov++; + } + + if (residp != NULL) + *residp = resid; + + return (0); +} + +void +iovec_free(struct iovec *iov, unsigned int iovcnt) +{ + if (iovcnt > UIO_SMALLIOV) + free(iov, M_IOV, iovcnt * sizeof(*iov)); +} + /* * Read system call. */ @@ -84,18 +141,18 @@ sys_read(struct proc *p, void *v, register_t *retval) syscallarg(size_t) nbyte; } */ *uap = v; struct iovec iov; - int fd = SCARG(uap, fd); - struct file *fp; - struct filedesc *fdp = p->p_fd; - - if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) - return (EBADF); + struct uio auio; iov.iov_base = SCARG(uap, buf); iov.iov_len = SCARG(uap, nbyte); + if (iov.iov_len > SSIZE_MAX) + return (EINVAL); + + auio.uio_iov = &iov; + auio.uio_iovcnt = 1; + auio.uio_resid = iov.iov_len; - /* dofilereadv() will FRELE the descriptor for us */ - return (dofilereadv(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); + return (dofilereadv(p, SCARG(uap, fd), &auio, 0, retval)); } /* @@ -109,99 +166,79 @@ sys_readv(struct proc *p, void *v, register_t *retval) syscallarg(const struct iovec *) iovp; syscallarg(int) iovcnt; } */ *uap = v; - int fd = SCARG(uap, fd); - struct file *fp; - struct filedesc *fdp = p->p_fd; + struct iovec aiov[UIO_SMALLIOV], *iov = NULL; + int error, iovcnt = SCARG(uap, iovcnt); + struct uio auio; + size_t resid; - if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) - return (EBADF); + error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); + if (error) + goto done; - /* dofilereadv() will FRELE the descriptor for us */ - return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, - &fp->f_offset, retval)); + auio.uio_iov = iov; + auio.uio_iovcnt = iovcnt; + auio.uio_resid = resid; + + error = dofilereadv(p, SCARG(uap, fd), &auio, 0, retval); + done: + iovec_free(iov, iovcnt); + return (error); } int -dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, - int iovcnt, int userspace, off_t *offset, register_t *retval) +dofilereadv(struct proc *p, int fd, struct uio *uio, int flags, + register_t *retval) { - struct iovec aiov[UIO_SMALLIOV]; - struct uio auio; - struct iovec *iov; - struct iovec *needfree = NULL; - long i, cnt, error = 0; + struct filedesc *fdp = p->p_fd; + struct file *fp; + long cnt, error = 0; u_int iovlen; #ifdef KTRACE struct iovec *ktriov = NULL; #endif - /* note: can't use iovlen until iovcnt is validated */ - iovlen = iovcnt * sizeof(struct iovec); + KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); + iovlen = uio->uio_iovcnt * sizeof(struct iovec); - /* - * If the iovec array exists in userspace, it needs to be copied in; - * otherwise, it can be used directly. - */ - if (userspace) { - if ((u_int)iovcnt > UIO_SMALLIOV) { - if ((u_int)iovcnt > IOV_MAX) { - error = EINVAL; - goto out; - } - iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); - } else if ((u_int)iovcnt > 0) { - iov = aiov; - needfree = NULL; - } else { - error = EINVAL; - goto out; - } - if ((error = copyin(iovp, iov, iovlen))) + if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) + return (EBADF); + + /* Checks for positioned read. */ + if (flags & FO_POSITION) { + struct vnode *vp = fp->f_data; + + if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || + (vp->v_flag & VISTTY)) { + error = ESPIPE; goto done; -#ifdef KTRACE - if (KTRPOINT(p, KTR_STRUCT)) - ktriovec(p, iov, iovcnt); -#endif - } else { - iov = (struct iovec *)iovp; /* de-constify */ - } + } - auio.uio_iov = iov; - auio.uio_iovcnt = iovcnt; - auio.uio_rw = UIO_READ; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = 0; - for (i = 0; i < iovcnt; i++) { - auio.uio_resid += iov->iov_len; - /* - * Reads return ssize_t because -1 is returned on error. - * Therefore we must restrict the length to SSIZE_MAX to - * avoid garbage return values. Note that the addition is - * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. - */ - if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { + if (uio->uio_offset < 0 && vp->v_type != VCHR) { error = EINVAL; goto done; } - iov++; } + + uio->uio_rw = UIO_READ; + uio->uio_segflg = UIO_USERSPACE; + uio->uio_procp = p; #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { ktriov = malloc(iovlen, M_TEMP, M_WAITOK); - memcpy(ktriov, auio.uio_iov, iovlen); + memcpy(ktriov, uio->uio_iov, iovlen); } #endif - cnt = auio.uio_resid; - error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred); - if (error) - if (auio.uio_resid != cnt && (error == ERESTART || + cnt = uio->uio_resid; + error = (*fp->f_ops->fo_read)(fp, uio, flags); + if (error) { + if (uio->uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; - cnt -= auio.uio_resid; + } + cnt -= uio->uio_resid; mtx_enter(&fp->f_mtx); fp->f_rxfer++; @@ -216,9 +253,6 @@ dofilereadv(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, #endif *retval = cnt; done: - if (needfree) - free(needfree, M_IOV, iovlen); - out: FRELE(fp, p); return (error); } @@ -235,18 +269,18 @@ sys_write(struct proc *p, void *v, register_t *retval) syscallarg(size_t) nbyte; } */ *uap = v; struct iovec iov; - int fd = SCARG(uap, fd); - struct file *fp; - struct filedesc *fdp = p->p_fd; - - if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) - return (EBADF); + struct uio auio; iov.iov_base = (void *)SCARG(uap, buf); iov.iov_len = SCARG(uap, nbyte); + if (iov.iov_len > SSIZE_MAX) + return (EINVAL); + + auio.uio_iov = &iov; + auio.uio_iovcnt = 1; + auio.uio_resid = iov.iov_len; - /* dofilewritev() will FRELE the descriptor for us */ - return (dofilewritev(p, fd, fp, &iov, 1, 0, &fp->f_offset, retval)); + return (dofilewritev(p, SCARG(uap, fd), &auio, 0, retval)); } /* @@ -260,102 +294,81 @@ sys_writev(struct proc *p, void *v, register_t *retval) syscallarg(const struct iovec *) iovp; syscallarg(int) iovcnt; } */ *uap = v; - int fd = SCARG(uap, fd); - struct file *fp; - struct filedesc *fdp = p->p_fd; + struct iovec aiov[UIO_SMALLIOV], *iov = NULL; + int error, iovcnt = SCARG(uap, iovcnt); + struct uio auio; + size_t resid; - if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) - return (EBADF); + error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); + if (error) + goto done; + + auio.uio_iov = iov; + auio.uio_iovcnt = iovcnt; + auio.uio_resid = resid; - /* dofilewritev() will FRELE the descriptor for us */ - return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, - &fp->f_offset, retval)); + error = dofilewritev(p, SCARG(uap, fd), &auio, 0, retval); + done: + iovec_free(iov, iovcnt); + return (error); } int -dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, - int iovcnt, int userspace, off_t *offset, register_t *retval) +dofilewritev(struct proc *p, int fd, struct uio *uio, int flags, + register_t *retval) { - struct iovec aiov[UIO_SMALLIOV]; - struct uio auio; - struct iovec *iov; - struct iovec *needfree = NULL; - long i, cnt, error = 0; + struct filedesc *fdp = p->p_fd; + struct file *fp; + long cnt, error = 0; u_int iovlen; #ifdef KTRACE struct iovec *ktriov = NULL; #endif - /* note: can't use iovlen until iovcnt is validated */ - iovlen = iovcnt * sizeof(struct iovec); + KASSERT(uio->uio_iov != NULL && uio->uio_iovcnt > 0); + iovlen = uio->uio_iovcnt * sizeof(struct iovec); - /* - * If the iovec array exists in userspace, it needs to be copied in; - * otherwise, it can be used directly. - */ - if (userspace) { - if ((u_int)iovcnt > UIO_SMALLIOV) { - if ((u_int)iovcnt > IOV_MAX) { - error = EINVAL; - goto out; - } - iov = needfree = malloc(iovlen, M_IOV, M_WAITOK); - } else if ((u_int)iovcnt > 0) { - iov = aiov; - needfree = NULL; - } else { - error = EINVAL; - goto out; - } - if ((error = copyin(iovp, iov, iovlen))) + if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) + return (EBADF); + + /* Checks for positioned write. */ + if (flags & FO_POSITION) { + struct vnode *vp = fp->f_data; + + if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || + (vp->v_flag & VISTTY)) { + error = ESPIPE; goto done; -#ifdef KTRACE - if (KTRPOINT(p, KTR_STRUCT)) - ktriovec(p, iov, iovcnt); -#endif - } else { - iov = (struct iovec *)iovp; /* de-constify */ - } + } - auio.uio_iov = iov; - auio.uio_iovcnt = iovcnt; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_USERSPACE; - auio.uio_procp = p; - auio.uio_resid = 0; - for (i = 0; i < iovcnt; i++) { - auio.uio_resid += iov->iov_len; - /* - * Writes return ssize_t because -1 is returned on error. - * Therefore we must restrict the length to SSIZE_MAX to - * avoid garbage return values. Note that the addition is - * guaranteed to not wrap because SSIZE_MAX * 2 < SIZE_MAX. - */ - if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) { + if (uio->uio_offset < 0 && vp->v_type != VCHR) { error = EINVAL; goto done; } - iov++; } + + uio->uio_rw = UIO_WRITE; + uio->uio_segflg = UIO_USERSPACE; + uio->uio_procp = p; #ifdef KTRACE /* * if tracing, save a copy of iovec */ if (KTRPOINT(p, KTR_GENIO)) { ktriov = malloc(iovlen, M_TEMP, M_WAITOK); - memcpy(ktriov, auio.uio_iov, iovlen); + memcpy(ktriov, uio->uio_iov, iovlen); } #endif - cnt = auio.uio_resid; - error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred); + cnt = uio->uio_resid; + error = (*fp->f_ops->fo_write)(fp, uio, flags); if (error) { - if (auio.uio_resid != cnt && (error == ERESTART || + if (uio->uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; if (error == EPIPE) ptsignal(p, SIGPIPE, STHREAD); } - cnt -= auio.uio_resid; + cnt -= uio->uio_resid; mtx_enter(&fp->f_mtx); fp->f_wxfer++; @@ -370,9 +383,6 @@ dofilewritev(struct proc *p, int fd, struct file *fp, const struct iovec *iovp, #endif *retval = cnt; done: - if (needfree) - free(needfree, M_IOV, iovlen); - out: FRELE(fp, p); return (error); } diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 7c8378602a8..7709a64cf4e 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_pipe.c,v 1.84 2018/08/15 13:19:06 visa Exp $ */ +/* $OpenBSD: sys_pipe.c,v 1.85 2018/08/20 16:00:22 mpi Exp $ */ /* * Copyright (c) 1996 John S. Dyson @@ -52,8 +52,8 @@ /* * interfaces to the outside world */ -int pipe_read(struct file *, off_t *, struct uio *, struct ucred *); -int pipe_write(struct file *, off_t *, struct uio *, struct ucred *); +int pipe_read(struct file *, struct uio *, int); +int pipe_write(struct file *, struct uio *, int); int pipe_close(struct file *, struct proc *); int pipe_poll(struct file *, int events, struct proc *); int pipe_kqfilter(struct file *fp, struct knote *kn); @@ -308,7 +308,7 @@ pipeselwakeup(struct pipe *cpipe) } int -pipe_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +pipe_read(struct file *fp, struct uio *uio, int fflags) { struct pipe *rpipe = fp->f_data; int error; @@ -424,7 +424,7 @@ unlocked_error: } int -pipe_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +pipe_write(struct file *fp, struct uio *uio, int fflags) { int error = 0; size_t orig_resid; diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 57a24d8f67d..22b3e266654 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_socket.c,v 1.40 2018/07/30 12:22:14 mpi Exp $ */ +/* $OpenBSD: sys_socket.c,v 1.41 2018/08/20 16:00:22 mpi Exp $ */ /* $NetBSD: sys_socket.c,v 1.13 1995/08/12 23:59:09 mycroft Exp $ */ /* @@ -59,7 +59,7 @@ struct fileops socketops = { }; int -soo_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +soo_read(struct file *fp, struct uio *uio, int fflags) { struct socket *so = (struct socket *)fp->f_data; int flags = 0; @@ -71,7 +71,7 @@ soo_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) } int -soo_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +soo_write(struct file *fp, struct uio *uio, int fflags) { struct socket *so = (struct socket *)fp->f_data; int flags = 0; diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 266b68ed6a3..767ec035e08 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_syscalls.c,v 1.303 2018/08/13 20:36:35 deraadt Exp $ */ +/* $OpenBSD: vfs_syscalls.c,v 1.304 2018/08/20 16:00:22 mpi Exp $ */ /* $NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $ */ /* @@ -3038,33 +3038,19 @@ sys_pread(struct proc *p, void *v, register_t *retval) syscallarg(off_t) offset; } */ *uap = v; struct iovec iov; - struct filedesc *fdp = p->p_fd; - struct file *fp; - struct vnode *vp; - off_t offset; - int fd = SCARG(uap, fd); + struct uio auio; iov.iov_base = SCARG(uap, buf); iov.iov_len = SCARG(uap, nbyte); - - if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) - return (EBADF); - - vp = fp->f_data; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || - (vp->v_flag & VISTTY)) { - FRELE(fp, p); - return (ESPIPE); - } - - offset = SCARG(uap, offset); - if (offset < 0 && vp->v_type != VCHR) { - FRELE(fp, p); + if (iov.iov_len > SSIZE_MAX) return (EINVAL); - } - /* dofilereadv() will FRELE the descriptor for us */ - return (dofilereadv(p, fd, fp, &iov, 1, 0, &offset, retval)); + auio.uio_iov = &iov; + auio.uio_iovcnt = 1; + auio.uio_resid = iov.iov_len; + auio.uio_offset = SCARG(uap, offset); + + return (dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval)); } /* @@ -3080,31 +3066,24 @@ sys_preadv(struct proc *p, void *v, register_t *retval) syscallarg(int) pad; syscallarg(off_t) offset; } */ *uap = v; - struct filedesc *fdp = p->p_fd; - struct file *fp; - struct vnode *vp; - off_t offset; - int fd = SCARG(uap, fd); - - if ((fp = fd_getfile_mode(fdp, fd, FREAD)) == NULL) - return (EBADF); + struct iovec aiov[UIO_SMALLIOV], *iov = NULL; + int error, iovcnt = SCARG(uap, iovcnt); + struct uio auio; + size_t resid; - vp = fp->f_data; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || - (vp->v_flag & VISTTY)) { - FRELE(fp, p); - return (ESPIPE); - } + error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); + if (error) + goto done; - offset = SCARG(uap, offset); - if (offset < 0 && vp->v_type != VCHR) { - FRELE(fp, p); - return (EINVAL); - } + auio.uio_iov = iov; + auio.uio_iovcnt = iovcnt; + auio.uio_resid = resid; + auio.uio_offset = SCARG(uap, offset); - /* dofilereadv() will FRELE the descriptor for us */ - return (dofilereadv(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), 1, - &offset, retval)); + error = dofilereadv(p, SCARG(uap, fd), &auio, FO_POSITION, retval); + done: + iovec_free(iov, iovcnt); + return (error); } /* @@ -3121,33 +3100,19 @@ sys_pwrite(struct proc *p, void *v, register_t *retval) syscallarg(off_t) offset; } */ *uap = v; struct iovec iov; - struct filedesc *fdp = p->p_fd; - struct file *fp; - struct vnode *vp; - off_t offset; - int fd = SCARG(uap, fd); + struct uio auio; iov.iov_base = (void *)SCARG(uap, buf); iov.iov_len = SCARG(uap, nbyte); - - if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) - return (EBADF); - - vp = fp->f_data; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || - (vp->v_flag & VISTTY)) { - FRELE(fp, p); - return (ESPIPE); - } - - offset = SCARG(uap, offset); - if (offset < 0 && vp->v_type != VCHR) { - FRELE(fp, p); + if (iov.iov_len > SSIZE_MAX) return (EINVAL); - } - /* dofilewritev() will FRELE the descriptor for us */ - return (dofilewritev(p, fd, fp, &iov, 1, 0, &offset, retval)); + auio.uio_iov = &iov; + auio.uio_iovcnt = 1; + auio.uio_resid = iov.iov_len; + auio.uio_offset = SCARG(uap, offset); + + return (dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval)); } /* @@ -3163,29 +3128,22 @@ sys_pwritev(struct proc *p, void *v, register_t *retval) syscallarg(int) pad; syscallarg(off_t) offset; } */ *uap = v; - struct filedesc *fdp = p->p_fd; - struct file *fp; - struct vnode *vp; - off_t offset; - int fd = SCARG(uap, fd); - - if ((fp = fd_getfile_mode(fdp, fd, FWRITE)) == NULL) - return (EBADF); + struct iovec aiov[UIO_SMALLIOV], *iov = NULL; + int error, iovcnt = SCARG(uap, iovcnt); + struct uio auio; + size_t resid; - vp = fp->f_data; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO || - (vp->v_flag & VISTTY)) { - FRELE(fp, p); - return (ESPIPE); - } + error = iovec_copyin(SCARG(uap, iovp), &iov, aiov, iovcnt, &resid); + if (error) + goto done; - offset = SCARG(uap, offset); - if (offset < 0 && vp->v_type != VCHR) { - FRELE(fp, p); - return (EINVAL); - } + auio.uio_iov = iov; + auio.uio_iovcnt = iovcnt; + auio.uio_resid = resid; + auio.uio_offset = SCARG(uap, offset); - /* dofilewritev() will FRELE the descriptor for us */ - return (dofilewritev(p, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt), - 1, &offset, retval)); + error = dofilewritev(p, SCARG(uap, fd), &auio, FO_POSITION, retval); + done: + iovec_free(iov, iovcnt); + return (error); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index bc7d9ec3d03..2d5d00e5eff 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: vfs_vnops.c,v 1.96 2018/08/15 13:19:06 visa Exp $ */ +/* $OpenBSD: vfs_vnops.c,v 1.97 2018/08/20 16:00:22 mpi Exp $ */ /* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */ /* @@ -59,8 +59,8 @@ #include #include -int vn_read(struct file *, off_t *, struct uio *, struct ucred *); -int vn_write(struct file *, off_t *, struct uio *, struct ucred *); +int vn_read(struct file *, struct uio *, int); +int vn_write(struct file *, struct uio *, int); int vn_poll(struct file *, int, struct proc *); int vn_kqfilter(struct file *, struct knote *); int vn_closefile(struct file *, struct proc *); @@ -335,24 +335,37 @@ vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, * File table vnode read routine. */ int -vn_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +vn_read(struct file *fp, struct uio *uio, int fflags) { struct vnode *vp = fp->f_data; - int error; + struct ucred *cred = fp->f_cred; size_t count = uio->uio_resid; + off_t offset; + int error; + + /* + * Check below can race. We can block on the vnode lock + * and resume with a different `fp->f_offset' value. + */ + if ((fflags & FO_POSITION) == 0) + offset = fp->f_offset; + else + offset = uio->uio_offset; /* no wrap around of offsets except on character devices */ - if (vp->v_type != VCHR && count > LLONG_MAX - *poff) + if (vp->v_type != VCHR && count > LLONG_MAX - offset) return (EINVAL); if (vp->v_type == VDIR) return (EISDIR); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - uio->uio_offset = *poff; + if ((fflags & FO_POSITION) == 0) + uio->uio_offset = fp->f_offset; error = VOP_READ(vp, uio, (fp->f_flag & FNONBLOCK) ? IO_NDELAY : 0, cred); - *poff += count - uio->uio_resid; + if ((fflags & FO_POSITION) == 0) + fp->f_offset += count - uio->uio_resid; VOP_UNLOCK(vp); return (error); } @@ -361,15 +374,16 @@ vn_read(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) * File table vnode write routine. */ int -vn_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) +vn_write(struct file *fp, struct uio *uio, int fflags) { struct vnode *vp = fp->f_data; + struct ucred *cred = fp->f_cred; int error, ioflag = IO_UNIT; size_t count; /* note: pwrite/pwritev are unaffected by O_APPEND */ if (vp->v_type == VREG && (fp->f_flag & O_APPEND) && - poff == &fp->f_offset) + (fflags & FO_POSITION) == 0) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; @@ -377,13 +391,16 @@ vn_write(struct file *fp, off_t *poff, struct uio *uio, struct ucred *cred) (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) ioflag |= IO_SYNC; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - uio->uio_offset = *poff; + if ((fflags & FO_POSITION) == 0) + uio->uio_offset = fp->f_offset; count = uio->uio_resid; error = VOP_WRITE(vp, uio, ioflag, cred); - if (ioflag & IO_APPEND) - *poff = uio->uio_offset; - else - *poff += count - uio->uio_resid; + if ((fflags & FO_POSITION) == 0) { + if (ioflag & IO_APPEND) + fp->f_offset = uio->uio_offset; + else + fp->f_offset += count - uio->uio_resid; + } VOP_UNLOCK(vp); return (error); } diff --git a/sys/sys/file.h b/sys/sys/file.h index ef543367479..b2a1f95e898 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -1,4 +1,4 @@ -/* $OpenBSD: file.h,v 1.52 2018/07/03 20:40:25 kettenis Exp $ */ +/* $OpenBSD: file.h,v 1.53 2018/08/20 16:00:22 mpi Exp $ */ /* $NetBSD: file.h,v 1.11 1995/03/26 20:24:13 jtc Exp $ */ /* @@ -47,18 +47,17 @@ struct file; struct ucred; struct fileops { - int (*fo_read)(struct file *, off_t *, struct uio *, - struct ucred *); - int (*fo_write)(struct file *, off_t *, struct uio *, - struct ucred *); - int (*fo_ioctl)(struct file *, u_long, caddr_t, - struct proc *); + int (*fo_read)(struct file *, struct uio *, int); + int (*fo_write)(struct file *, struct uio *, int); + int (*fo_ioctl)(struct file *, u_long, caddr_t, struct proc *); int (*fo_poll)(struct file *, int, struct proc *); int (*fo_kqfilter)(struct file *, struct knote *); int (*fo_stat)(struct file *, struct stat *, struct proc *); int (*fo_close)(struct file *, struct proc *); int (*fo_seek)(struct file *, off_t *, int, struct proc *); }; +#define FO_POSITION 0x01 /* positioned read/write */ + /* * Kernel descriptor table. diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index c3b2a362096..3c42c697e2d 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.86 2018/07/30 12:22:14 mpi Exp $ */ +/* $OpenBSD: socketvar.h,v 1.87 2018/08/20 16:00:22 mpi Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -275,15 +275,12 @@ struct knote; /* * File operations on sockets. */ -int soo_read(struct file *fp, off_t *, struct uio *uio, - struct ucred *cred); -int soo_write(struct file *fp, off_t *, struct uio *uio, - struct ucred *cred); -int soo_ioctl(struct file *fp, u_long cmd, caddr_t data, - struct proc *p); -int soo_poll(struct file *fp, int events, struct proc *p); -int soo_kqfilter(struct file *fp, struct knote *kn); -int soo_close(struct file *fp, struct proc *p); +int soo_read(struct file *, struct uio *, int); +int soo_write(struct file *, struct uio *, int); +int soo_ioctl(struct file *, u_long, caddr_t, struct proc *); +int soo_poll(struct file *, int events, struct proc *); +int soo_kqfilter(struct file *, struct knote *); +int soo_close(struct file *, struct proc *); int soo_stat(struct file *, struct stat *, struct proc *); void sbappend(struct socket *, struct sockbuf *, struct mbuf *); void sbappendstream(struct socket *, struct sockbuf *, struct mbuf *); diff --git a/sys/sys/uio.h b/sys/sys/uio.h index c7282f2e768..fdc46903e0e 100644 --- a/sys/sys/uio.h +++ b/sys/sys/uio.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uio.h,v 1.18 2015/01/18 20:35:44 guenther Exp $ */ +/* $OpenBSD: uio.h,v 1.19 2018/08/20 16:00:22 mpi Exp $ */ /* $NetBSD: uio.h,v 1.12 1996/02/09 18:25:45 christos Exp $ */ /* @@ -97,10 +97,11 @@ __END_DECLS int ureadc(int c, struct uio *); struct file; -int dofilereadv(struct proc *, int, struct file *, - const struct iovec *, int, int, off_t *, register_t *); -int dofilewritev(struct proc *, int, struct file *, - const struct iovec *, int, int, off_t *, register_t *); +int iovec_copyin(const struct iovec *, struct iovec **, struct iovec *, + unsigned int, size_t *); +void iovec_free(struct iovec *, unsigned int ); +int dofilereadv(struct proc *, int, struct uio *, int, register_t *); +int dofilewritev(struct proc *, int, struct uio *, int, register_t *); #endif /* !_KERNEL */ -- 2.20.1