123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Shared Memory Communications over RDMA (SMC-R) and RoCE
- *
- * Socket Closing - normal and abnormal
- *
- * Copyright IBM Corp. 2016
- *
- * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
- */
- #include <linux/workqueue.h>
- #include <linux/sched/signal.h>
- #include <net/sock.h>
- #include "smc.h"
- #include "smc_tx.h"
- #include "smc_cdc.h"
- #include "smc_close.h"
- #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
- static void smc_close_cleanup_listen(struct sock *parent)
- {
- struct sock *sk;
- /* Close non-accepted connections */
- while ((sk = smc_accept_dequeue(parent, NULL)))
- smc_close_non_accepted(sk);
- }
- /* wait for sndbuf data being transmitted */
- static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
- {
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct sock *sk = &smc->sk;
- if (!timeout)
- return;
- if (!smc_tx_prepared_sends(&smc->conn))
- return;
- smc->wait_close_tx_prepared = 1;
- add_wait_queue(sk_sleep(sk), &wait);
- while (!signal_pending(current) && timeout) {
- int rc;
- rc = sk_wait_event(sk, &timeout,
- !smc_tx_prepared_sends(&smc->conn) ||
- (sk->sk_err == ECONNABORTED) ||
- (sk->sk_err == ECONNRESET),
- &wait);
- if (rc)
- break;
- }
- remove_wait_queue(sk_sleep(sk), &wait);
- smc->wait_close_tx_prepared = 0;
- }
- void smc_close_wake_tx_prepared(struct smc_sock *smc)
- {
- if (smc->wait_close_tx_prepared)
- /* wake up socket closing */
- smc->sk.sk_state_change(&smc->sk);
- }
- static int smc_close_wr(struct smc_connection *conn)
- {
- conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
- return smc_cdc_get_slot_and_msg_send(conn);
- }
- static int smc_close_final(struct smc_connection *conn)
- {
- if (atomic_read(&conn->bytes_to_rcv))
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
- else
- conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
- return smc_cdc_get_slot_and_msg_send(conn);
- }
- static int smc_close_abort(struct smc_connection *conn)
- {
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
- return smc_cdc_get_slot_and_msg_send(conn);
- }
- /* terminate smc socket abnormally - active abort
- * link group is terminated, i.e. RDMA communication no longer possible
- */
- static void smc_close_active_abort(struct smc_sock *smc)
- {
- struct sock *sk = &smc->sk;
- struct smc_cdc_conn_state_flags *txflags =
- &smc->conn.local_tx_ctrl.conn_state_flags;
- if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) {
- sk->sk_err = ECONNABORTED;
- if (smc->clcsock && smc->clcsock->sk) {
- smc->clcsock->sk->sk_err = ECONNABORTED;
- smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
- }
- }
- switch (sk->sk_state) {
- case SMC_ACTIVE:
- sk->sk_state = SMC_PEERABORTWAIT;
- release_sock(sk);
- cancel_delayed_work_sync(&smc->conn.tx_work);
- lock_sock(sk);
- sock_put(sk); /* passive closing */
- break;
- case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
- if (!smc_cdc_rxed_any_close(&smc->conn))
- sk->sk_state = SMC_PEERABORTWAIT;
- else
- sk->sk_state = SMC_CLOSED;
- release_sock(sk);
- cancel_delayed_work_sync(&smc->conn.tx_work);
- lock_sock(sk);
- break;
- case SMC_PEERCLOSEWAIT1:
- case SMC_PEERCLOSEWAIT2:
- if (!txflags->peer_conn_closed) {
- /* just SHUTDOWN_SEND done */
- sk->sk_state = SMC_PEERABORTWAIT;
- } else {
- sk->sk_state = SMC_CLOSED;
- }
- sock_put(sk); /* passive closing */
- break;
- case SMC_PROCESSABORT:
- case SMC_APPFINCLOSEWAIT:
- sk->sk_state = SMC_CLOSED;
- break;
- case SMC_PEERFINCLOSEWAIT:
- sock_put(sk); /* passive closing */
- break;
- case SMC_INIT:
- case SMC_PEERABORTWAIT:
- case SMC_CLOSED:
- break;
- }
- sock_set_flag(sk, SOCK_DEAD);
- sk->sk_state_change(sk);
- }
- static inline bool smc_close_sent_any_close(struct smc_connection *conn)
- {
- return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
- }
- int smc_close_active(struct smc_sock *smc)
- {
- struct smc_cdc_conn_state_flags *txflags =
- &smc->conn.local_tx_ctrl.conn_state_flags;
- struct smc_connection *conn = &smc->conn;
- struct sock *sk = &smc->sk;
- int old_state;
- long timeout;
- int rc = 0;
- timeout = current->flags & PF_EXITING ?
- 0 : sock_flag(sk, SOCK_LINGER) ?
- sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
- old_state = sk->sk_state;
- again:
- switch (sk->sk_state) {
- case SMC_INIT:
- sk->sk_state = SMC_CLOSED;
- break;
- case SMC_LISTEN:
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk); /* wake up accept */
- if (smc->clcsock && smc->clcsock->sk) {
- rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
- /* wake up kernel_accept of smc_tcp_listen_worker */
- smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
- }
- smc_close_cleanup_listen(sk);
- release_sock(sk);
- flush_work(&smc->tcp_listen_work);
- lock_sock(sk);
- break;
- case SMC_ACTIVE:
- smc_close_stream_wait(smc, timeout);
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
- if (sk->sk_state == SMC_ACTIVE) {
- /* send close request */
- rc = smc_close_final(conn);
- if (rc)
- break;
- sk->sk_state = SMC_PEERCLOSEWAIT1;
- } else {
- /* peer event has changed the state */
- goto again;
- }
- break;
- case SMC_APPFINCLOSEWAIT:
- /* socket already shutdown wr or both (active close) */
- if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(conn)) {
- /* just shutdown wr done, send close request */
- rc = smc_close_final(conn);
- if (rc)
- break;
- }
- sk->sk_state = SMC_CLOSED;
- break;
- case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
- if (!smc_cdc_rxed_any_close(conn))
- smc_close_stream_wait(smc, timeout);
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
- if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
- sk->sk_state != SMC_APPCLOSEWAIT2)
- goto again;
- /* confirm close from peer */
- rc = smc_close_final(conn);
- if (rc)
- break;
- if (smc_cdc_rxed_any_close(conn)) {
- /* peer has closed the socket already */
- sk->sk_state = SMC_CLOSED;
- sock_put(sk); /* postponed passive closing */
- } else {
- /* peer has just issued a shutdown write */
- sk->sk_state = SMC_PEERFINCLOSEWAIT;
- }
- break;
- case SMC_PEERCLOSEWAIT1:
- case SMC_PEERCLOSEWAIT2:
- if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(conn)) {
- /* just shutdown wr done, send close request */
- rc = smc_close_final(conn);
- if (rc)
- break;
- }
- /* peer sending PeerConnectionClosed will cause transition */
- break;
- case SMC_PEERFINCLOSEWAIT:
- /* peer sending PeerConnectionClosed will cause transition */
- break;
- case SMC_PROCESSABORT:
- smc_close_abort(conn);
- sk->sk_state = SMC_CLOSED;
- break;
- case SMC_PEERABORTWAIT:
- case SMC_CLOSED:
- /* nothing to do, add tracing in future patch */
- break;
- }
- if (old_state != sk->sk_state)
- sk->sk_state_change(sk);
- return rc;
- }
- static void smc_close_passive_abort_received(struct smc_sock *smc)
- {
- struct smc_cdc_conn_state_flags *txflags =
- &smc->conn.local_tx_ctrl.conn_state_flags;
- struct sock *sk = &smc->sk;
- switch (sk->sk_state) {
- case SMC_INIT:
- case SMC_ACTIVE:
- case SMC_APPCLOSEWAIT1:
- sk->sk_state = SMC_PROCESSABORT;
- sock_put(sk); /* passive closing */
- break;
- case SMC_APPFINCLOSEWAIT:
- sk->sk_state = SMC_PROCESSABORT;
- break;
- case SMC_PEERCLOSEWAIT1:
- case SMC_PEERCLOSEWAIT2:
- if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(&smc->conn))
- /* just shutdown, but not yet closed locally */
- sk->sk_state = SMC_PROCESSABORT;
- else
- sk->sk_state = SMC_CLOSED;
- sock_put(sk); /* passive closing */
- break;
- case SMC_APPCLOSEWAIT2:
- case SMC_PEERFINCLOSEWAIT:
- sk->sk_state = SMC_CLOSED;
- sock_put(sk); /* passive closing */
- break;
- case SMC_PEERABORTWAIT:
- sk->sk_state = SMC_CLOSED;
- break;
- case SMC_PROCESSABORT:
- /* nothing to do, add tracing in future patch */
- break;
- }
- }
- /* Either some kind of closing has been received: peer_conn_closed,
- * peer_conn_abort, or peer_done_writing
- * or the link group of the connection terminates abnormally.
- */
- static void smc_close_passive_work(struct work_struct *work)
- {
- struct smc_connection *conn = container_of(work,
- struct smc_connection,
- close_work);
- struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
- struct smc_cdc_conn_state_flags *rxflags;
- struct sock *sk = &smc->sk;
- int old_state;
- lock_sock(sk);
- old_state = sk->sk_state;
- if (!conn->alert_token_local) {
- /* abnormal termination */
- smc_close_active_abort(smc);
- goto wakeup;
- }
- rxflags = &conn->local_rx_ctrl.conn_state_flags;
- if (rxflags->peer_conn_abort) {
- /* peer has not received all data */
- smc_close_passive_abort_received(smc);
- release_sock(&smc->sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(&smc->sk);
- goto wakeup;
- }
- switch (sk->sk_state) {
- case SMC_INIT:
- if (atomic_read(&conn->bytes_to_rcv) ||
- (rxflags->peer_done_writing &&
- !smc_cdc_rxed_any_close(conn))) {
- sk->sk_state = SMC_APPCLOSEWAIT1;
- } else {
- sk->sk_state = SMC_CLOSED;
- sock_put(sk); /* passive closing */
- }
- break;
- case SMC_ACTIVE:
- sk->sk_state = SMC_APPCLOSEWAIT1;
- /* postpone sock_put() for passive closing to cover
- * received SEND_SHUTDOWN as well
- */
- break;
- case SMC_PEERCLOSEWAIT1:
- if (rxflags->peer_done_writing)
- sk->sk_state = SMC_PEERCLOSEWAIT2;
- /* fall through */
- /* to check for closing */
- case SMC_PEERCLOSEWAIT2:
- if (!smc_cdc_rxed_any_close(conn))
- break;
- if (sock_flag(sk, SOCK_DEAD) &&
- smc_close_sent_any_close(conn)) {
- /* smc_release has already been called locally */
- sk->sk_state = SMC_CLOSED;
- } else {
- /* just shutdown, but not yet closed locally */
- sk->sk_state = SMC_APPFINCLOSEWAIT;
- }
- sock_put(sk); /* passive closing */
- break;
- case SMC_PEERFINCLOSEWAIT:
- if (smc_cdc_rxed_any_close(conn)) {
- sk->sk_state = SMC_CLOSED;
- sock_put(sk); /* passive closing */
- }
- break;
- case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
- /* postpone sock_put() for passive closing to cover
- * received SEND_SHUTDOWN as well
- */
- break;
- case SMC_APPFINCLOSEWAIT:
- case SMC_PEERABORTWAIT:
- case SMC_PROCESSABORT:
- case SMC_CLOSED:
- /* nothing to do, add tracing in future patch */
- break;
- }
- wakeup:
- sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
- sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
- if (old_state != sk->sk_state) {
- sk->sk_state_change(sk);
- if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
- smc_conn_free(conn);
- }
- release_sock(sk);
- sock_put(sk); /* sock_hold done by schedulers of close_work */
- }
- int smc_close_shutdown_write(struct smc_sock *smc)
- {
- struct smc_connection *conn = &smc->conn;
- struct sock *sk = &smc->sk;
- int old_state;
- long timeout;
- int rc = 0;
- timeout = current->flags & PF_EXITING ?
- 0 : sock_flag(sk, SOCK_LINGER) ?
- sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
- old_state = sk->sk_state;
- again:
- switch (sk->sk_state) {
- case SMC_ACTIVE:
- smc_close_stream_wait(smc, timeout);
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
- if (sk->sk_state != SMC_ACTIVE)
- goto again;
- /* send close wr request */
- rc = smc_close_wr(conn);
- if (rc)
- break;
- sk->sk_state = SMC_PEERCLOSEWAIT1;
- break;
- case SMC_APPCLOSEWAIT1:
- /* passive close */
- if (!smc_cdc_rxed_any_close(conn))
- smc_close_stream_wait(smc, timeout);
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
- if (sk->sk_state != SMC_APPCLOSEWAIT1)
- goto again;
- /* confirm close from peer */
- rc = smc_close_wr(conn);
- if (rc)
- break;
- sk->sk_state = SMC_APPCLOSEWAIT2;
- break;
- case SMC_APPCLOSEWAIT2:
- case SMC_PEERFINCLOSEWAIT:
- case SMC_PEERCLOSEWAIT1:
- case SMC_PEERCLOSEWAIT2:
- case SMC_APPFINCLOSEWAIT:
- case SMC_PROCESSABORT:
- case SMC_PEERABORTWAIT:
- /* nothing to do, add tracing in future patch */
- break;
- }
- if (old_state != sk->sk_state)
- sk->sk_state_change(sk);
- return rc;
- }
- /* Initialize close properties on connection establishment. */
- void smc_close_init(struct smc_sock *smc)
- {
- INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
- }
|