Module: sip-router Branch: sr_3.0 Commit: a72fd8da7c0b40c37a322f7d69b7d76197ceb9c7 URL: http://git.sip-router.org/cgi-bin/gitweb.cgi/sip-router/?a=commit;h=a72fd8da...
Author: Andrei Pelinescu-Onciul andrei@iptel.org Committer: Andrei Pelinescu-Onciul andrei@iptel.org Date: Wed Jul 7 11:59:30 2010 +0200
tcp: fix dispatching closed connections to tcp readers
Under very heavy load it is possible that send2child() might try to send an already closed connection/fd to a tcp reader. This can happen only if the tcp connection is watched for read (POLLIN) by tcp_main (and not by a tcp reader), the connection becomes available for reading (either new data received, EOF or RST) and tcp_main chooses a specific tcp reader to send the connection to while in the same time the same tcp reader tries to send on the same connection, fails for some reason (no more space for buffering, EOF, RST a.s.o) and sends a close command back to tcp_main. Because send2child() executes first any pending commands from the choosen tcp_reader, this might lead to closing the fd before attempting to send it to the tcp_reader. Under normal circumstances the impact is only an extra syscall and some log messages, however it is possible (but highly unlikely) that after sending the close command the tcp_reader opens a new connection for sending and sends its fd back to tcp_main. This new fd might get the same number as the freshly closed fd and send2child might send the wrong (fd, tcp connection) pair.
(cherry picked from commit d89437a3d7bc25a9c098a04c6ee69fc3848ff0b5)
---
tcp_main.c | 20 ++++++++++++++------ 1 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/tcp_main.c b/tcp_main.c index 6a6c4c0..54334ab 100644 --- a/tcp_main.c +++ b/tcp_main.c @@ -2468,6 +2468,7 @@ close_again: LOG(L_ERR, "ERROR: tcpconn_put_destroy; close() failed: %s (%d)\n", strerror(errno), errno); } + tcpconn->s=-1; }
@@ -3378,10 +3379,20 @@ inline static int send2child(struct tcp_connection* tcpconn) * send a release command, but the master fills its socket buffer * with new connection commands => deadlock) */ /* answer tcp_send requests first */ - while(handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0); + while(unlikely((tcpconn->state != S_CONN_BAD) && + (handle_ser_child(&pt[tcp_children[idx].proc_no], -1)>0))); /* process tcp readers requests */ - while(handle_tcp_child(&tcp_children[idx], -1)>0); - + while(unlikely((tcpconn->state != S_CONN_BAD && + (handle_tcp_child(&tcp_children[idx], -1)>0)))); + + /* the above possible pending requests might have included a + command to close this tcpconn (e.g. CONN_ERROR, CONN_EOF). + In this case the fd is already closed here (and possible + even replaced by another one with the same number) so it + must not be sent to a reader anymore */ + if (unlikely(tcpconn->state == S_CONN_BAD || + (tcpconn->flags & F_CONN_FD_CLOSED))) + return -1; #ifdef SEND_FD_QUEUE /* if queue full, try to queue the io */ if (unlikely(send_fd(tcp_children[idx].unix_sock, &tcpconn, @@ -3501,8 +3512,6 @@ static inline int handle_new_connect(struct socket_info* si) DBG("handle_new_connect: new connection from %s: %p %d flags: %04x\n", su2a(&su, sizeof(su)), tcpconn, tcpconn->s, tcpconn->flags); if(unlikely(send2child(tcpconn)<0)){ - LOG(L_ERR,"ERROR: handle_new_connect: no children " - "available\n"); tcpconn->flags&=~F_CONN_READER; tcpconn_put(tcpconn); tcpconn_try_unhash(tcpconn); @@ -3676,7 +3685,6 @@ send_to_child: tcpconn->flags&=~(F_CONN_MAIN_TIMER|F_CONN_READ_W|F_CONN_WANTS_RD); tcpconn_ref(tcpconn); /* refcnt ++ */ if (unlikely(send2child(tcpconn)<0)){ - LOG(L_ERR,"ERROR: handle_tcpconn_ev: no children available\n"); tcpconn->flags&=~F_CONN_READER; #ifdef TCP_ASYNC if (tcpconn->flags & F_CONN_WRITE_W){