Extend the connection and session FSMs so that connection failure is
authorclaudio <claudio@openbsd.org>
Sat, 10 May 2014 11:30:47 +0000 (11:30 +0000)
committerclaudio <claudio@openbsd.org>
Sat, 10 May 2014 11:30:47 +0000 (11:30 +0000)
handled more gracefully. Losing the TCP connection no longer results
in an unrecoverable stop requiring a restart of iscsid.

usr.sbin/iscsid/connection.c
usr.sbin/iscsid/initiator.c
usr.sbin/iscsid/iscsid.c
usr.sbin/iscsid/iscsid.h
usr.sbin/iscsid/session.c

index 35304f9..044abba 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: connection.c,v 1.18 2014/04/21 18:59:05 claudio Exp $ */
+/*     $OpenBSD: connection.c,v 1.19 2014/05/10 11:30:47 claudio Exp $ */
 
 /*
  * Copyright (c) 2009 Claudio Jeker <claudio@openbsd.org>
@@ -42,9 +42,11 @@ void conn_write_dispatch(int, short, void *);
 int    c_do_connect(struct connection *, enum c_event);
 int    c_do_login(struct connection *, enum c_event);
 int    c_do_loggedin(struct connection *, enum c_event);
+int    c_do_req_logout(struct connection *, enum c_event);
 int    c_do_logout(struct connection *, enum c_event);
 int    c_do_loggedout(struct connection *, enum c_event);
 int    c_do_fail(struct connection *, enum c_event);
+int    c_do_cleanup(struct connection *, enum c_event);
 
 const char *conn_state(int);
 const char *conn_event(enum c_event);
@@ -99,7 +101,6 @@ conn_new(struct session *s, struct connection_config *cc)
 
        event_set(&c->ev, c->fd, EV_READ|EV_PERSIST, conn_dispatch, c);
        event_set(&c->wev, c->fd, EV_WRITE, conn_write_dispatch, c);
-       event_add(&c->ev, NULL);
 
        conn_fsm(c, CONN_EV_CONNECT);
 }
@@ -107,12 +108,15 @@ conn_new(struct session *s, struct connection_config *cc)
 void
 conn_free(struct connection *c)
 {
+       log_debug("conn_free");
+
        pdu_readbuf_free(&c->prbuf);
        pdu_free_queue(&c->pdu_w);
 
        event_del(&c->ev);
        event_del(&c->wev);
-       close(c->fd);
+       if (c->fd != -1)
+               close(c->fd);
 
        taskq_cleanup(&c->tasks);
 
@@ -164,7 +168,7 @@ conn_write_dispatch(int fd, short event, void *arg)
                len = sizeof(error);
                if (getsockopt(c->fd, SOL_SOCKET, SO_ERROR,
                    &error, &len) == -1 || (errno = error)) {
-                       log_warn("cwd connect(%s)",
+                       log_warn("connect to %s failed",
                            log_sockaddr(&c->config.TargetAddr));
                        conn_fsm(c, CONN_EV_FAIL);
                        return;
@@ -383,8 +387,17 @@ struct {
        { CONN_XPT_WAIT, CONN_EV_CONNECTED, c_do_login },       /* T4 */
        { CONN_IN_LOGIN, CONN_EV_LOGGED_IN, c_do_loggedin },    /* T5 */
        { CONN_LOGGED_IN, CONN_EV_LOGOUT, c_do_logout },        /* T9 */
+       { CONN_LOGGED_IN, CONN_EV_REQ_LOGOUT, c_do_req_logout },/* T11 */
        { CONN_LOGOUT_REQ, CONN_EV_LOGOUT, c_do_logout },       /* T10 */
+       { CONN_LOGOUT_REQ, CONN_EV_REQ_LOGOUT, c_do_req_logout},/* T12 */
+       { CONN_LOGOUT_REQ, CONN_EV_LOGGED_OUT, c_do_loggedout },/* T18 */
        { CONN_IN_LOGOUT, CONN_EV_LOGGED_OUT, c_do_loggedout }, /* T13 */
+       { CONN_IN_LOGOUT, CONN_EV_REQ_LOGOUT, c_do_req_logout },/* T14 */
+       { CONN_CLEANUP_WAIT, CONN_EV_CLEANING_UP, c_do_cleanup},/* M2 */
+       { CONN_CLEANUP_WAIT, CONN_EV_FREE, c_do_loggedout },    /* M1 */
+       { CONN_IN_CLEANUP, CONN_EV_FREE, c_do_loggedout },      /* M4 */
+       { CONN_IN_CLEANUP, CONN_EV_CLEANING_UP, c_do_cleanup},
+       /* either one of T2, T7, T15, T16, T17, M3 */
        { CONN_ANYSTATE, CONN_EV_CLOSED, c_do_fail },
        { CONN_ANYSTATE, CONN_EV_FAIL, c_do_fail },
        { CONN_ANYSTATE, CONN_EV_FREE, c_do_fail },
@@ -423,7 +436,7 @@ c_do_connect(struct connection *c, enum c_event ev)
        if (c->fd == -1) {
                log_warnx("connect(%s), lost socket",
                    log_sockaddr(&c->config.TargetAddr));
-               session_fsm(c->session, SESS_EV_CONN_FAIL, c);
+               session_fsm(c->session, SESS_EV_CONN_FAIL, c, 0);
                return CONN_FREE;
        }
        if (c->config.LocalAddr.ss_len != 0) {
@@ -431,7 +444,7 @@ c_do_connect(struct connection *c, enum c_event ev)
                    c->config.LocalAddr.ss_len) == -1) {
                        log_warn("bind(%s)",
                            log_sockaddr(&c->config.LocalAddr));
-                       session_fsm(c->session, SESS_EV_CONN_FAIL, c);
+                       session_fsm(c->session, SESS_EV_CONN_FAIL, c, 0);
                        return CONN_FREE;
                }
        }
@@ -439,14 +452,16 @@ c_do_connect(struct connection *c, enum c_event ev)
            c->config.TargetAddr.ss_len) == -1) {
                if (errno == EINPROGRESS) {
                        event_add(&c->wev, NULL);
+                       event_add(&c->ev, NULL);
                        return CONN_XPT_WAIT;
                } else {
                        log_warn("connect(%s)",
                            log_sockaddr(&c->config.TargetAddr));
-                       session_fsm(c->session, SESS_EV_CONN_FAIL, c);
+                       session_fsm(c->session, SESS_EV_CONN_FAIL, c, 0);
                        return CONN_FREE;
                }
        }
+       event_add(&c->ev, NULL);
        /* move forward */
        return c_do_login(c, CONN_EV_CONNECTED);
 }
@@ -463,11 +478,22 @@ int
 c_do_loggedin(struct connection *c, enum c_event ev)
 {
        iscsi_merge_conn_params(&c->active, &c->mine, &c->his);
-       session_fsm(c->session, SESS_EV_CONN_LOGGED_IN, c);
+       session_fsm(c->session, SESS_EV_CONN_LOGGED_IN, c, 0);
 
        return CONN_LOGGED_IN;
 }
 
+int
+c_do_req_logout(struct connection *c, enum c_event ev)
+{
+       /* target requested logout. XXX implement async handler */
+
+       if (c->state & CONN_IN_LOGOUT)
+               return CONN_IN_LOGOUT;
+       else
+               return CONN_LOGOUT_REQ;
+}
+
 int
 c_do_logout(struct connection *c, enum c_event ev)
 {
@@ -478,30 +504,42 @@ c_do_logout(struct connection *c, enum c_event ev)
 int
 c_do_loggedout(struct connection *c, enum c_event ev)
 {
-       /* close TCP session and cleanup */
-       event_del(&c->ev);
-       event_del(&c->wev);
-       close(c->fd);
-
-       /* session is informed by the logout handler */
+       /*
+        * Called by the session fsm before calling conn_free.
+        * Doing this so the state transition is logged.
+        */
        return CONN_FREE;
 }
 
 int
 c_do_fail(struct connection *c, enum c_event ev)
 {
+       log_debug("c_do_fail");
+
        /* cleanup events so that the connection does not retrigger */
        event_del(&c->ev);
        event_del(&c->wev);
        close(c->fd);
+       c->fd = -1;     /* make sure this fd is not closed again */
+
+       /* all pending task have failed so clean them up */
+       taskq_cleanup(&c->tasks);
 
-       session_fsm(c->session, SESS_EV_CONN_FAIL, c);
+       /* session will take care of cleaning up the mess */
+       session_fsm(c->session, SESS_EV_CONN_FAIL, c, 0);
 
        if (ev == CONN_EV_FREE || c->state & CONN_NEVER_LOGGED_IN)
                return CONN_FREE;
        return CONN_CLEANUP_WAIT;
 }
 
+int
+c_do_cleanup(struct connection *c, enum c_event ev)
+{
+       /* nothing to do here just adjust state */
+       return CONN_IN_CLEANUP;
+}
+
 const char *
 conn_state(int s)
 {
@@ -547,10 +585,14 @@ conn_event(enum c_event e)
                return "connected";
        case CONN_EV_LOGGED_IN:
                return "logged in";
+       case CONN_EV_REQ_LOGOUT:
+               return "logout requested";
        case CONN_EV_LOGOUT:
                return "logout";
        case CONN_EV_LOGGED_OUT:
                return "logged out";
+       case CONN_EV_CLEANING_UP:
+               return "cleaning up";
        case CONN_EV_CLOSED:
                return "closed";
        case CONN_EV_FREE:
index a8b794d..a9903b8 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: initiator.c,v 1.12 2014/04/20 16:49:56 claudio Exp $ */
+/*     $OpenBSD: initiator.c,v 1.13 2014/05/10 11:30:47 claudio Exp $ */
 
 /*
  * Copyright (c) 2009 Claudio Jeker <claudio@openbsd.org>
@@ -503,16 +503,17 @@ initiator_logout_cb(struct connection *c, void *arg, struct pdu *p)
        loresp = pdu_getbuf(p, NULL, PDU_HEADER);
        log_debug("initiator_logout_cb: "
            "response %d, Time2Wait %d, Time2Retain %d",
-           loresp->response, loresp->time2wait, loresp->time2retain);
+           loresp->response, ntohs(loresp->time2wait),
+           ntohs(loresp->time2retain));
 
        switch (loresp->response) {
        case ISCSI_LOGOUT_RESP_SUCCESS:
                if (tl->reason == ISCSI_LOGOUT_CLOSE_SESS) {
                        conn_fsm(c, CONN_EV_LOGGED_OUT);
-                       session_fsm(c->session, SESS_EV_CLOSED, NULL);
+                       session_fsm(c->session, SESS_EV_CLOSED, NULL, 0);
                } else {
                        conn_fsm(tl->c, CONN_EV_LOGGED_OUT);
-                       session_fsm(c->session, SESS_EV_CONN_CLOSED, tl->c);
+                       session_fsm(c->session, SESS_EV_CONN_CLOSED, tl->c, 0);
                }
                break;
        case ISCSI_LOGOUT_RESP_UNKN_CID:
index 0faa435..05621df 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: iscsid.c,v 1.14 2014/04/21 18:00:23 claudio Exp $ */
+/*     $OpenBSD: iscsid.c,v 1.15 2014/05/10 11:30:47 claudio Exp $ */
 
 /*
  * Copyright (c) 2009 Claudio Jeker <claudio@openbsd.org>
@@ -252,7 +252,7 @@ iscsid_ctrl_dispatch(void *ch, struct pdu *pdu)
 
                session_config(s, sc);
                if (s->state == SESS_INIT)
-                       session_fsm(s, SESS_EV_START, NULL);
+                       session_fsm(s, SESS_EV_START, NULL, 0);
 
                control_compose(ch, CTRL_SUCCESS, NULL, 0);
                break;
index e47d622..af5a57c 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: iscsid.h,v 1.13 2014/04/21 17:41:52 claudio Exp $ */
+/*     $OpenBSD: iscsid.h,v 1.14 2014/05/10 11:30:47 claudio Exp $ */
 
 /*
  * Copyright (c) 2009 Claudio Jeker <claudio@openbsd.org>
@@ -23,6 +23,7 @@
 
 #define ISCSID_BASE_NAME       "iqn.1995-11.org.openbsd.iscsid"
 #define ISCSID_DEF_CONNS       8
+#define ISCSID_HOLD_TIME_MAX   128
 
 #define PDU_READ_SIZE          (256 * 1024)
 #define CONTROL_READ_SIZE      8192
@@ -79,19 +80,18 @@ TAILQ_HEAD(taskq, task);
 #define SESS_FREE              0x0002
 #define SESS_LOGGED_IN         0x0004
 #define SESS_FAILED            0x0008
-#define SESS_DOWN              0x0010
 #define SESS_ANYSTATE          0xffff
 #define SESS_RUNNING           (SESS_FREE | SESS_LOGGED_IN | SESS_FAILED)
 
-#define CONN_FREE              0x0001
-#define CONN_XPT_WAIT          0x0002
-#define CONN_XPT_UP            0x0004
-#define CONN_IN_LOGIN          0x0008
-#define CONN_LOGGED_IN         0x0010
-#define CONN_IN_LOGOUT         0x0020
-#define CONN_LOGOUT_REQ                0x0040
-#define CONN_CLEANUP_WAIT      0x0080
-#define CONN_IN_CLEANUP                0x0100
+#define CONN_FREE              0x0001  /* S1 = R3 */
+#define CONN_XPT_WAIT          0x0002  /* S2 */
+#define CONN_XPT_UP            0x0004  /* S3 */
+#define CONN_IN_LOGIN          0x0008  /* S4 */
+#define CONN_LOGGED_IN         0x0010  /* S5 */
+#define CONN_IN_LOGOUT         0x0020  /* S6 */
+#define CONN_LOGOUT_REQ                0x0040  /* S7 */
+#define CONN_CLEANUP_WAIT      0x0080  /* S8 = R1 */
+#define CONN_IN_CLEANUP                0x0100  /* R2 */
 #define CONN_ANYSTATE          0xffff
 #define CONN_RUNNING           (CONN_LOGGED_IN | CONN_LOGOUT_REQ)
 #define CONN_FAILED            (CONN_CLEANUP_WAIT | CONN_IN_CLEANUP)
@@ -104,18 +104,24 @@ enum c_event {
        CONN_EV_CONNECT,
        CONN_EV_CONNECTED,
        CONN_EV_LOGGED_IN,
+       CONN_EV_REQ_LOGOUT,
        CONN_EV_LOGOUT,
        CONN_EV_LOGGED_OUT,
-       CONN_EV_CLOSED
+       CONN_EV_CLOSED,
+       CONN_EV_CLEANING_UP
 };
 
 enum s_event {
        SESS_EV_START,
+       SESS_EV_STOP,
        SESS_EV_CONN_LOGGED_IN,
        SESS_EV_CONN_FAIL,
        SESS_EV_CONN_CLOSED,
+       SESS_EV_REINSTATEMENT,
+       SESS_EV_TIMEOUT,
        SESS_EV_CLOSED,
-       SESS_EV_FAIL
+       SESS_EV_FAIL,
+       SESS_EV_FREE
 };
 
 #define SESS_ACT_UP            0
@@ -216,7 +222,7 @@ struct initiator {
 };
 
 struct sessev {
-       SIMPLEQ_ENTRY(sessev)    entry;
+       struct session          *sess;
        struct connection       *conn;
        enum s_event             event;
 };
@@ -230,14 +236,13 @@ struct session {
        struct session_params    his;
        struct session_params    active;
        struct initiator        *initiator;
-       struct event             fsm_ev;
-       SIMPLEQ_HEAD(, sessev)   fsmq;
        u_int32_t                cmdseqnum;
        u_int32_t                itt;
        u_int32_t                isid_base;     /* only 24 bits */
        u_int16_t                isid_qual;     /* inherited from initiator */
        u_int16_t                tsih;          /* target session id handle */
        u_int                    target;
+       int                      holdTimer;     /* internal hold timer */
        int                      state;
        int                      action;
 };
@@ -331,7 +336,8 @@ void        session_task_issue(struct session *, struct task *);
 void   session_logout_issue(struct session *, struct task *);
 void   session_schedule(struct session *);
 void   session_task_login(struct connection *);
-void   session_fsm(struct session *, enum s_event, struct connection *);
+void   session_fsm(struct session *, enum s_event, struct connection *,
+           unsigned int);
 
 void   conn_new(struct session *, struct connection_config *);
 void   conn_free(struct connection *);
index 2ec59b6..ab8c128 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: session.c,v 1.6 2014/04/20 20:12:31 claudio Exp $ */
+/*     $OpenBSD: session.c,v 1.7 2014/05/10 11:30:47 claudio Exp $ */
 
 /*
  * Copyright (c) 2011 Claudio Jeker <claudio@openbsd.org>
@@ -40,7 +40,9 @@ int   sess_do_start(struct session *, struct sessev *);
 int    sess_do_conn_loggedin(struct session *, struct sessev *);
 int    sess_do_conn_fail(struct session *, struct sessev *);
 int    sess_do_conn_closed(struct session *, struct sessev *);
-int    sess_do_down(struct session *, struct sessev *);
+int    sess_do_stop(struct session *, struct sessev *);
+int    sess_do_free(struct session *, struct sessev *);
+int    sess_do_reinstatement(struct session *, struct sessev *);
 
 const char *sess_state(int);
 const char *sess_event(enum s_event);
@@ -81,8 +83,6 @@ session_new(struct initiator *i, u_int8_t st)
        TAILQ_INSERT_HEAD(&i->sessions, s, entry);
        TAILQ_INIT(&s->connections);
        TAILQ_INIT(&s->tasks);
-       SIMPLEQ_INIT(&s->fsmq);
-       evtimer_set(&s->fsm_ev, session_fsm_callback, s);
 
        return s;
 }
@@ -108,7 +108,8 @@ session_shutdown(struct session *s)
        log_debug("session[%s] going down", s->config.SessionName);
 
        s->action = SESS_ACT_DOWN;
-       if (s->state & (SESS_INIT | SESS_FREE | SESS_DOWN)) {
+       if (s->state & (SESS_INIT | SESS_FREE)) {
+               /* no active session, so do a quick cleanup */
                struct connection *c;
                while ((c = TAILQ_FIRST(&s->connections)) != NULL)
                        conn_free(c);
@@ -205,19 +206,25 @@ session_schedule(struct session *s)
  * The session FSM runs from a callback so that the connection FSM can finish.
  */
 void
-session_fsm(struct session *s, enum s_event ev, struct connection *c)
+session_fsm(struct session *s, enum s_event ev, struct connection *c,
+    unsigned int timeout)
 {
        struct timeval tv;
        struct sessev *sev;
 
+       log_debug("session_fsm[%s]: %s ev %s timeout %d",
+           s->config.SessionName, sess_state(s->state),
+           sess_event(ev), timeout);
+
        if ((sev = malloc(sizeof(*sev))) == NULL)
                fatal("session_fsm");
        sev->conn = c;
+       sev->sess = s;
        sev->event = ev;
-       SIMPLEQ_INSERT_TAIL(&s->fsmq, sev, entry);
 
        timerclear(&tv);
-       if (evtimer_add(&s->fsm_ev, &tv) == -1)
+       tv.tv_sec = timeout;
+       if (event_once(-1, EV_TIMEOUT, session_fsm_callback, sev, &tv) == -1)
                fatal("session_fsm");
 }
 
@@ -227,11 +234,17 @@ struct {
        int             (*action)(struct session *, struct sessev *);
 } s_fsm[] = {
        { SESS_INIT, SESS_EV_START, sess_do_start },
-       { SESS_FREE, SESS_EV_CONN_LOGGED_IN, sess_do_conn_loggedin },
+       { SESS_FREE, SESS_EV_START, sess_do_start },
+       { SESS_FREE, SESS_EV_CONN_LOGGED_IN, sess_do_conn_loggedin },   /* N1 */
+       { SESS_FREE, SESS_EV_CLOSED, sess_do_stop },
        { SESS_LOGGED_IN, SESS_EV_CONN_LOGGED_IN, sess_do_conn_loggedin },
-       { SESS_RUNNING, SESS_EV_CONN_FAIL, sess_do_conn_fail },
-       { SESS_RUNNING, SESS_EV_CONN_CLOSED, sess_do_conn_closed },
-       { SESS_RUNNING, SESS_EV_CLOSED, sess_do_down },
+       { SESS_RUNNING, SESS_EV_CONN_CLOSED, sess_do_conn_closed },     /* N3 */
+       { SESS_RUNNING, SESS_EV_CONN_FAIL, sess_do_conn_fail },         /* N5 */
+       { SESS_RUNNING, SESS_EV_CLOSED, sess_do_free },         /* XXX */
+       { SESS_FAILED, SESS_EV_START, sess_do_start },
+       { SESS_FAILED, SESS_EV_TIMEOUT, sess_do_free },                 /* N6 */
+       { SESS_FAILED, SESS_EV_FREE, sess_do_free },                    /* N6 */
+       { SESS_FAILED, SESS_EV_CONN_LOGGED_IN, sess_do_reinstatement }, /* N4 */
        { 0, 0, NULL }
 };
 
@@ -239,37 +252,35 @@ struct {
 void
 session_fsm_callback(int fd, short event, void *arg)
 {
-       struct session *s = arg;
-       struct sessev *sev;
+       struct sessev *sev = arg;
+       struct session *s = sev->sess;
        int     i, ns;
 
-       while ((sev = SIMPLEQ_FIRST(&s->fsmq))) {
-               SIMPLEQ_REMOVE_HEAD(&s->fsmq, entry);
-               for (i = 0; s_fsm[i].action != NULL; i++) {
-                       if (s->state & s_fsm[i].state &&
-                           sev->event == s_fsm[i].event) {
-                               log_debug("sess_fsm[%s]: %s ev %s",
-                                   s->config.SessionName, sess_state(s->state),
-                                   sess_event(sev->event));
-                               ns = s_fsm[i].action(s, sev);
-                               if (ns == -1)
-                                       /* XXX better please */
-                                       fatalx("sess_fsm: action failed");
-                               log_debug("sess_fsm[%s]: new state %s",
-                                   s->config.SessionName,
-                                   sess_state(ns));
-                               s->state = ns;
-                               break;
-                       }
-               }
-               if (s_fsm[i].action == NULL) {
-                       log_warnx("sess_fsm[%s]: unhandled state transition "
-                           "[%s, %s]", s->config.SessionName,
-                           sess_state(s->state), sess_event(sev->event));
-                       fatalx("bjork bjork bjork");
+       for (i = 0; s_fsm[i].action != NULL; i++) {
+               if (s->state & s_fsm[i].state &&
+                   sev->event == s_fsm[i].event) {
+                       log_debug("sess_fsm[%s]: %s ev %s",
+                           s->config.SessionName, sess_state(s->state),
+                           sess_event(sev->event));
+                       ns = s_fsm[i].action(s, sev);
+                       if (ns == -1)
+                               /* XXX better please */
+                               fatalx("sess_fsm: action failed");
+                       log_debug("sess_fsm[%s]: new state %s",
+                           s->config.SessionName,
+                           sess_state(ns));
+                       s->state = ns;
+                       break;
                }
-               free(sev);
        }
+       if (s_fsm[i].action == NULL) {
+               log_warnx("sess_fsm[%s]: unhandled state transition "
+                   "[%s, %s]", s->config.SessionName,
+                   sess_state(s->state), sess_event(sev->event));
+               fatalx("bjork bjork bjork");
+       }
+       free(sev);
+log_debug("sess_fsm: done");
 }
 
 int
@@ -283,12 +294,17 @@ sess_do_start(struct session *s, struct sessev *sev)
        s->his = iscsi_sess_defaults;
        s->active = iscsi_sess_defaults;
 
-       if (s->config.MaxConnections)
+       if (s->config.SessionType != SESSION_TYPE_DISCOVERY &&
+           s->config.MaxConnections)
                s->mine.MaxConnections = s->config.MaxConnections;
 
        conn_new(s, &s->config.connection);
 
-       return SESS_FREE;
+       /* XXX kill SESS_FREE it seems to be bad */
+       if (s->state == SESS_INIT)
+               return SESS_FREE;
+       else
+               return s->state;
 }
 
 int
@@ -304,6 +320,7 @@ sess_do_conn_loggedin(struct session *s, struct sessev *sev)
 
        iscsi_merge_sess_params(&s->active, &s->mine, &s->his);
        vscsi_event(VSCSI_REQPROBE, s->target, -1);
+       s->holdTimer = 0;
 
        return SESS_LOGGED_IN;
 }
@@ -324,7 +341,7 @@ sess_do_conn_fail(struct session *s, struct sessev *sev)
         * Connections in state FREE can be removed.
         * Connections in any error state will cause the session to enter
         * the FAILED state. If no sessions are left and the session was
-        * not already FREE then explicit recovery needs to be done.
+        * not already FREE then implicit recovery needs to be done.
         */
 
        switch (c->state) {
@@ -341,11 +358,16 @@ sess_do_conn_fail(struct session *s, struct sessev *sev)
        TAILQ_FOREACH(c, &s->connections, entry) {
                if (c->state & CONN_FAILED) {
                        state = SESS_FAILED;
-                       break;
-               } else if (c->state & CONN_RUNNING)
+                       conn_fsm(c, CONN_EV_CLEANING_UP);
+               } else if (c->state & CONN_RUNNING && state != SESS_FAILED)
                        state = SESS_LOGGED_IN;
        }
 
+       session_fsm(s, SESS_EV_START, NULL, s->holdTimer);
+       /* exponential back-off on constant failure */
+       if (s->holdTimer < ISCSID_HOLD_TIME_MAX)
+               s->holdTimer = s->holdTimer ? s->holdTimer * 2 : 1;
+
        return state;
 }
 
@@ -373,16 +395,50 @@ sess_do_conn_closed(struct session *s, struct sessev *sev)
 }
 
 int
-sess_do_down(struct session *s, struct sessev *sev)
+sess_do_stop(struct session *s, struct sessev *sev)
 {
        struct connection *c;
 
+       /* XXX do graceful closing of session and go to INIT state at the end */
+
        while ((c = TAILQ_FIRST(&s->connections)) != NULL)
                conn_free(c);
 
        /* XXX anything else to reset to initial state? */
+       return SESS_INIT;
+}
+
+int
+sess_do_free(struct session *s, struct sessev *sev)
+{
+       struct connection *c;
+
+       while ((c = TAILQ_FIRST(&s->connections)) != NULL)
+               conn_free(c);
+
+       return SESS_FREE;
+}
 
-       return SESS_DOWN;
+const char *conn_state(int);
+
+
+int
+sess_do_reinstatement(struct session *s, struct sessev *sev)
+{
+       struct connection *c, *nc;
+
+       TAILQ_FOREACH_SAFE(c, &s->connections, entry, nc) {
+               log_debug("sess reinstatement[%s]: %s",
+                   s->config.SessionName, conn_state(c->state));
+
+               if (c->state & CONN_FAILED) {
+                       conn_fsm(c, CONN_EV_FREE);
+                       TAILQ_REMOVE(&s->connections, c, entry);
+                       conn_free(c);
+               }
+       }
+
+       return SESS_LOGGED_IN;
 }
 
 const char *
@@ -399,8 +455,6 @@ sess_state(int s)
                return "LOGGED_IN";
        case SESS_FAILED:
                return "FAILED";
-       case SESS_DOWN:
-               return "DOWN";
        default:
                snprintf(buf, sizeof(buf), "UKNWN %x", s);
                return buf;
@@ -416,14 +470,22 @@ sess_event(enum s_event e)
        switch (e) {
        case SESS_EV_START:
                return "start";
+       case SESS_EV_STOP:
+               return "stop";
        case SESS_EV_CONN_LOGGED_IN:
                return "connection logged in";
        case SESS_EV_CONN_FAIL:
                return "connection fail";
        case SESS_EV_CONN_CLOSED:
                return "connection closed";
+       case SESS_EV_REINSTATEMENT:
+               return "connection reinstated";
        case SESS_EV_CLOSED:
                return "session closed";
+       case SESS_EV_TIMEOUT:
+               return "timeout";
+       case SESS_EV_FREE:
+               return "free";
        case SESS_EV_FAIL:
                return "fail";
        }