changeset 1200:ce2b00dc86c1

React quickly to head-of-the-line blocking in case of network failure. Can be adapted with MAX_HOTL_BLOCKING_TIME constant in cnxctx.h
author Sebastien Decugis <sdecugis@freediameter.net>
date Fri, 14 Jun 2013 10:14:11 +0800
parents 2c62bba0ef4b
children d2608e47db28
files libfdcore/cnxctx.c libfdcore/cnxctx.h libfdcore/sctp.c
diffstat 3 files changed, 49 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/libfdcore/cnxctx.c	Fri Jun 14 10:13:22 2013 +0800
+++ b/libfdcore/cnxctx.c	Fri Jun 14 10:14:11 2013 +0800
@@ -624,9 +624,9 @@
 	
 	/* Set a timeout on the socket so that in any case we are not stuck waiting for something */
 	memset(&tv, 0, sizeof(tv));
-	tv.tv_sec = 3;	/* allow 3 seconds timeout for TLS session cleanup */
-	CHECK_SYS_DO( setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), /* best effort only */ );
-	CHECK_SYS_DO( setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), /* Also timeout for sending, to avoid waiting forever */ );
+	tv.tv_usec = 100000L;	/* 100ms, to react quickly to head-of-the-line blocking. */
+	CHECK_SYS_DO( setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),  );
+	CHECK_SYS_DO( setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),  );
 }
 
 
@@ -674,23 +674,30 @@
 	return ret;
 }
 
-/* Send, for older GNUTLS */
-#ifndef GNUTLS_VERSION_212
-static ssize_t fd_cnx_s_send(struct cnxctx * conn, const void *buffer, size_t length)
+/* Send */
+static ssize_t fd_cnx_s_sendv(struct cnxctx * conn, const struct iovec * iov, int iovcnt)
 {
 	ssize_t ret = 0;
-	int timedout = 0;
+	struct timespec ts, now;
+	CHECK_SYS_DO(  clock_gettime(CLOCK_REALTIME, &ts), return -1 );
 again:
-	ret = send(conn->cc_socket, buffer, length, 0);
+	ret = writev(conn->cc_socket, iov, iovcnt);
 	/* Handle special case of timeout */
 	if ((ret < 0) && ((errno == EAGAIN) || (errno == EINTR))) {
+		ret = -errno;
 		pthread_testcancel();
-		if (! fd_cnx_teststate(conn, CC_STATUS_CLOSING ))
+		
+		/* Check how much time we were blocked for this sending. */
+		CHECK_SYS_DO(  clock_gettime(CLOCK_REALTIME, &now), return -1 );
+		if ( ((now.tv_sec - ts.tv_sec) * 1000 + ((now.tv_nsec - ts.tv_nsec) / 1000000L)) > MAX_HOTL_BLOCKING_TIME) {
+			LOG_D("Unable to send any data for %dms, closing the connection", MAX_HOTL_BLOCKING_TIME);
+		} else if (! fd_cnx_teststate(conn, CC_STATUS_CLOSING )) {
 			goto again; /* don't care, just ignore */
-		if (!timedout) {
-			timedout ++; /* allow for one timeout while closing */
-			goto again;
 		}
+		
+		/* propagate the error */
+		errno = -ret;
+		ret = -1;
 		CHECK_SYS_DO(ret, /* continue */);
 	}
 	
@@ -700,33 +707,17 @@
 	
 	return ret;
 }
-#endif /* GNUTLS_VERSION_212 */
 
-/* Send */
-static ssize_t fd_cnx_s_sendv(struct cnxctx * conn, const struct iovec * iov, int iovcnt)
+/* Send, for older GNUTLS */
+#ifndef GNUTLS_VERSION_212
+static ssize_t fd_cnx_s_send(struct cnxctx * conn, const void *buffer, size_t length)
 {
-	ssize_t ret = 0;
-	int timedout = 0;
-again:
-	ret = writev(conn->cc_socket, iov, iovcnt);
-	/* Handle special case of timeout */
-	if ((ret < 0) && ((errno == EAGAIN) || (errno == EINTR))) {
-		pthread_testcancel();
-		if (! fd_cnx_teststate(conn, CC_STATUS_CLOSING ))
-			goto again; /* don't care, just ignore */
-		if (!timedout) {
-			timedout ++; /* allow for one timeout while closing */
-			goto again;
-		}
-		CHECK_SYS_DO(ret, /* continue */);
-	}
-	
-	/* Mark the error */
-	if (ret <= 0)
-		fd_cnx_markerror(conn);
-	
-	return ret;
+	struct iovec iov;
+	iov.iov_base = (void *)buffer;
+	iov.iov_len  = length;
+	return fd_cnx_s_sendv(conn, &iov, 1);
 }
+#endif /* GNUTLS_VERSION_212 */
 
 #define ALIGNOF(t) ((char *)(&((struct { char c; t _h; } *)0)->_h) - (char *)0)  /* Could use __alignof__(t) on some systems but this is more portable probably */
 #define PMDL_PADDED(len) ( ((len) + ALIGNOF(struct fd_msg_pmdl) - 1) & ~(ALIGNOF(struct fd_msg_pmdl) - 1) )
@@ -1007,9 +998,12 @@
 static ssize_t fd_tls_send_handle_error(struct cnxctx * conn, gnutls_session_t session, void * data, size_t sz)
 {
 	ssize_t ret;
+	struct timespec ts, now;
+	CHECK_SYS_DO(  clock_gettime(CLOCK_REALTIME, &ts), return -1 );
 again:	
 	CHECK_GNUTLS_DO( ret = gnutls_record_send(session, data, sz),
 		{
+			pthread_testcancel();
 			switch (ret) {
 				case GNUTLS_E_REHANDSHAKE: 
 					if (!fd_cnx_teststate(conn, CC_STATUS_CLOSING)) {
@@ -1024,9 +1018,12 @@
 
 				case GNUTLS_E_AGAIN:
 				case GNUTLS_E_INTERRUPTED:
-					if (!fd_cnx_teststate(conn, CC_STATUS_CLOSING))
+					CHECK_SYS_DO(  clock_gettime(CLOCK_REALTIME, &now), return -1 );
+					if ( ((now.tv_sec - ts.tv_sec) * 1000 + ((now.tv_nsec - ts.tv_nsec) / 1000000L)) > MAX_HOTL_BLOCKING_TIME) {
+						LOG_D("Unable to send any data for %dms, closing the connection", MAX_HOTL_BLOCKING_TIME);
+					} else if (! fd_cnx_teststate(conn, CC_STATUS_CLOSING )) {
 						goto again;
-					TRACE_DEBUG(INFO, "Connection is closing, so abord gnutls_record_send now.");
+					}
 					break;
 
 				default:
--- a/libfdcore/cnxctx.h	Fri Jun 14 10:13:22 2013 +0800
+++ b/libfdcore/cnxctx.h	Fri Jun 14 10:14:11 2013 +0800
@@ -38,6 +38,9 @@
 #ifndef _CNXCTX_H
 #define _CNXCTX_H
 
+/* Maximum time we allow a connection to be blocked because of head-of-the-line buffers. After this delay, connection is considered in error. */
+#define MAX_HOTL_BLOCKING_TIME	1000	/* ms */
+
 /* The connection context structure */
 struct cnxctx {
 	char		cc_id[60];	/* The name of this connection. the first 5 chars are reserved for flags display (cc_state). */
--- a/libfdcore/sctp.c	Fri Jun 14 10:13:22 2013 +0800
+++ b/libfdcore/sctp.c	Fri Jun 14 10:14:11 2013 +0800
@@ -1083,10 +1083,11 @@
 	uint8_t anci[CMSG_SPACE(sizeof(struct sctp_sndinfo))];	
 #endif /* OLD_SCTP_SOCKET_API */
 	ssize_t ret;
-	int timedout = 0;
+	struct timespec ts, now;
 	
 	TRACE_ENTRY("%p %hu %p %d", conn, strid, iov, iovcnt);
 	CHECK_PARAMS_DO(conn && iov && iovcnt, { errno = EINVAL; return -1; } );
+	CHECK_SYS_DO(  clock_gettime(CLOCK_REALTIME, &ts), return -1 );
 	
 	memset(&mhdr, 0, sizeof(mhdr));
 	memset(&anci, 0, sizeof(anci));
@@ -1120,12 +1121,17 @@
 	/* Handle special case of timeout */
 	if ((ret < 0) && ((errno == EAGAIN) || (errno == EINTR))) {
 		pthread_testcancel();
-		if (! fd_cnx_teststate(conn, CC_STATUS_CLOSING ))
+		/* Check how much time we were blocked for this sending. */
+		CHECK_SYS_DO(  clock_gettime(CLOCK_REALTIME, &now), return -1 );
+		if ( ((now.tv_sec - ts.tv_sec) * 1000 + ((now.tv_nsec - ts.tv_nsec) / 1000000L)) > MAX_HOTL_BLOCKING_TIME) {
+			LOG_D("Unable to send any data for %dms, closing the connection", MAX_HOTL_BLOCKING_TIME);
+		} else if (! fd_cnx_teststate(conn, CC_STATUS_CLOSING )) {
 			goto again; /* don't care, just ignore */
-		if (!timedout) {
-			timedout ++; /* allow for one timeout while closing */
-			goto again;
 		}
+		
+		/* propagate the error */
+		errno = -ret;
+		ret = -1;
 	}
 	
 	CHECK_SYS_DO( ret, ); /* for tracing error only */
"Welcome to our mercurial repository"