From: mpi Date: Fri, 15 Oct 2021 06:59:57 +0000 (+0000) Subject: Revert "Implement select(2) and pselect(2) on top of kqueue." X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=a45acc462adf5a8f72b828495cf35c3f8b0ed0d0;p=openbsd Revert "Implement select(2) and pselect(2) on top of kqueue." It introduced a regression exposed by the ssh tests. Reported by anton@ --- diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 5b5a7df6c2c..1ab1c808b9e 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sys_generic.c,v 1.136 2021/10/14 08:46:01 mpi Exp $ */ +/* $OpenBSD: sys_generic.c,v 1.137 2021/10/15 06:59:57 mpi Exp $ */ /* $NetBSD: sys_generic.c,v 1.24 1996/03/29 00:25:32 cgd Exp $ */ /* @@ -55,7 +55,6 @@ #include #include #include -#include #ifdef KTRACE #include #endif @@ -67,21 +66,8 @@ #include -/* - * Debug values: - * 1 - print implementation errors, things that should not happen. - * 2 - print ppoll(2) information, somewhat verbose - * 3 - print pselect(2) and ppoll(2) information, very verbose - */ -int kqpoll_debug = 0; -#define DPRINTFN(v, x...) if (kqpoll_debug > v) { \ - printf("%s(%d): ", curproc->p_p->ps_comm, curproc->p_tid); \ - printf(x); \ -} - -int pselregister(struct proc *, fd_set *[], int, int *); -int pselcollect(struct proc *, struct kevent *, fd_set *[], int *); - +int selscan(struct proc *, fd_set *, fd_set *, int, int, register_t *); +void pollscan(struct proc *, struct pollfd *, u_int, register_t *); int pollout(struct pollfd *, struct pollfd *, u_int); int dopselect(struct proc *, int, fd_set *, fd_set *, fd_set *, struct timespec *, const sigset_t *, register_t *); @@ -598,10 +584,11 @@ int dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, struct timespec *timeout, const sigset_t *sigmask, register_t *retval) { - struct kqueue_scan_state scan; fd_mask bits[6]; fd_set *pibits[3], *pobits[3]; - int error, ncollected = 0, nevents = 0; + struct timespec elapsed, start, stop; + uint64_t nsecs; + int s, ncoll, error = 0; u_int ni; if (nd < 0) @@ -631,8 +618,6 @@ dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, pobits[2] = (fd_set *)&bits[5]; } - kqpoll_init(); - #define getbits(name, x) \ if (name && (error = copyin(name, pibits[x], ni))) \ goto done; @@ -651,61 +636,43 @@ dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, if (sigmask) dosigsuspend(p, *sigmask &~ sigcantmask); - /* Register kqueue events */ - error = pselregister(p, pibits, nd, &nevents); - if (error != 0) +retry: + ncoll = nselcoll; + atomic_setbits_int(&p->p_flag, P_SELECT); + error = selscan(p, pibits[0], pobits[0], nd, ni, retval); + if (error || *retval) goto done; - - /* - * The poll/select family of syscalls has been designed to - * block when file descriptors are not available, even if - * there's nothing to wait for. - */ - if (nevents == 0) { - uint64_t nsecs = INFSLP; - + if (timeout == NULL || timespecisset(timeout)) { if (timeout != NULL) { - if (!timespecisset(timeout)) - goto done; - nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP)); + getnanouptime(&start); + nsecs = MIN(TIMESPEC_TO_NSEC(timeout), MAXTSLP); + } else + nsecs = INFSLP; + s = splhigh(); + if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { + splx(s); + goto retry; } - error = tsleep_nsec(&p->p_kq, PSOCK | PCATCH, "kqsel", nsecs); - /* select is not restarted after signals... */ - if (error == ERESTART) - error = EINTR; - if (error == EWOULDBLOCK) - error = 0; - goto done; - } - - /* Collect at most `nevents' possibly waiting in kqueue_scan() */ - kqueue_scan_setup(&scan, p->p_kq); - while (nevents > 0) { - struct kevent kev[KQ_NEVENTS]; - int i, ready, count; - - /* Maximum number of events per iteration */ - count = MIN(nitems(kev), nevents); - ready = kqueue_scan(&scan, count, kev, timeout, p, &error); -#ifdef KTRACE - if (KTRPOINT(p, KTR_STRUCT)) - ktrevent(p, kev, ready); -#endif - /* Convert back events that are ready. */ - for (i = 0; i < ready && error == 0; i++) - error = pselcollect(p, &kev[i], pobits, &ncollected); - /* - * Stop if there was an error or if we had enough - * space to collect all events that were ready. - */ - if (error || ready < count) - break; - - nevents -= ready; + atomic_clearbits_int(&p->p_flag, P_SELECT); + error = tsleep_nsec(&selwait, PSOCK | PCATCH, "select", nsecs); + splx(s); + if (timeout != NULL) { + getnanouptime(&stop); + timespecsub(&stop, &start, &elapsed); + timespecsub(timeout, &elapsed, timeout); + if (timeout->tv_sec < 0) + timespecclear(timeout); + } + if (error == 0 || error == EWOULDBLOCK) + goto retry; } - kqueue_scan_finish(&scan); - *retval = ncollected; - done: +done: + atomic_clearbits_int(&p->p_flag, P_SELECT); + /* select is not restarted after signals... */ + if (error == ERESTART) + error = EINTR; + if (error == EWOULDBLOCK) + error = 0; #define putbits(name, x) \ if (name && (error2 = copyout(pobits[x], name, ni))) \ error = error2; @@ -727,104 +694,40 @@ dopselect(struct proc *p, int nd, fd_set *in, fd_set *ou, fd_set *ex, if (pibits[0] != (fd_set *)&bits[0]) free(pibits[0], M_TEMP, 6 * ni); - - kqueue_purge(p, p->p_kq); - p->p_kq_serial += nd; - return (error); } -/* - * Convert fd_set into kqueue events and register them on the - * per-thread queue. - */ int -pselregister(struct proc *p, fd_set *pibits[3], int nfd, int *nregistered) +selscan(struct proc *p, fd_set *ibits, fd_set *obits, int nfd, int ni, + register_t *retval) { - static const int evf[] = { EVFILT_READ, EVFILT_WRITE, EVFILT_EXCEPT }; - static const int evff[] = { 0, 0, NOTE_OOB }; - int msk, i, j, fd, nevents = 0, error = 0; - struct kevent kev; + caddr_t cibits = (caddr_t)ibits, cobits = (caddr_t)obits; + struct filedesc *fdp = p->p_fd; + int msk, i, j, fd; fd_mask bits; + struct file *fp; + int n = 0; + static const int flag[3] = { POLLIN, POLLOUT|POLL_NOHUP, POLLPRI }; for (msk = 0; msk < 3; msk++) { + fd_set *pibits = (fd_set *)&cibits[msk*ni]; + fd_set *pobits = (fd_set *)&cobits[msk*ni]; + for (i = 0; i < nfd; i += NFDBITS) { - bits = pibits[msk]->fds_bits[i / NFDBITS]; + bits = pibits->fds_bits[i/NFDBITS]; while ((j = ffs(bits)) && (fd = i + --j) < nfd) { bits &= ~(1 << j); - - DPRINTFN(2, "select fd %d mask %d serial %lu\n", - fd, msk, p->p_kq_serial); - EV_SET(&kev, fd, evf[msk], - EV_ADD|EV_ENABLE|EV_ONESHOT|__EV_POLL, - evff[msk], 0, (void *)(p->p_kq_serial)); -#ifdef KTRACE - if (KTRPOINT(p, KTR_STRUCT)) - ktrevent(p, &kev, 1); -#endif - error = kqueue_register(p->p_kq, &kev, p); - switch (error) { - case 0: - nevents++; - /* FALLTHROUGH */ - case EOPNOTSUPP:/* No underlying kqfilter */ - case EINVAL: /* Unimplemented filter */ - error = 0; - break; - case ENXIO: /* Device has been detached */ - default: - goto bad; + if ((fp = fd_getfile(fdp, fd)) == NULL) + return (EBADF); + if ((*fp->f_ops->fo_poll)(fp, flag[msk], p)) { + FD_SET(fd, pobits); + n++; } + FRELE(fp, p); } } } - - *nregistered = nevents; - return (0); -bad: - DPRINTFN(0, "select fd %u filt %d error %d\n", (int)kev.ident, - kev.filter, error); - return (error); -} - -/* - * Convert given kqueue event into corresponding select(2) bit. - */ -int -pselcollect(struct proc *p, struct kevent *kevp, fd_set *pobits[3], - int *ncollected) -{ - /* Filter out and lazily delete spurious events */ - if ((unsigned long)kevp->udata != p->p_kq_serial) { - DPRINTFN(0, "select fd %u mismatched serial %lu\n", - (int)kevp->ident, p->p_kq_serial); - kevp->flags = EV_DISABLE|EV_DELETE; - kqueue_register(p->p_kq, kevp, p); - return (0); - } - - if (kevp->flags & EV_ERROR) { - DPRINTFN(2, "select fd %d filt %d error %d\n", - (int)kevp->ident, kevp->filter, (int)kevp->data); - return (kevp->data); - } - - switch (kevp->filter) { - case EVFILT_READ: - FD_SET(kevp->ident, pobits[0]); - break; - case EVFILT_WRITE: - FD_SET(kevp->ident, pobits[1]); - break; - case EVFILT_EXCEPT: - FD_SET(kevp->ident, pobits[2]); - break; - default: - KASSERT(0); - } - (*ncollected)++; - - DPRINTFN(2, "select fd %d filt %d\n", (int)kevp->ident, kevp->filter); + *retval = n; return (0); }