Navigation


Changeset 1010:357c2f892d24 in freeDiameter


Ignore:
Timestamp:
Mar 26, 2013, 12:39:32 AM (11 years ago)
Author:
Sebastien Decugis <sdecugis@freediameter.net>
Branch:
default
Phase:
public
Message:

Implement a new counter on pending answers to send back to a peer.
Function fd_peer_get_load_pending updated to retrieve this counter as well.
When a peer has answers pending, the connection is not immediately teared down
upon DPR/DPA exchange, but a GRACE_TIMEOUT delay (default 1 sec) is granted.

Files:
7 edited

Legend:

Unmodified
Added
Removed
  • include/freeDiameter/libfdcore.h

    r1008 r1010  
    390390 * PARAMETERS:
    391391 *  peer        : The peer which load to read
     392 *  to_receive  : (out) number of requests sent to this peer without matching answer yet.
     393 *  to_send     : (out) number of requests received from this peer and not yet answered.
    392394 *
    393395 * DESCRIPTION:
     
    400402 * !0  : An error occurred
    401403 */
    402 int fd_peer_get_load_pending(struct peer_hdr *peer, int * load);
     404int fd_peer_get_load_pending(struct peer_hdr *peer, long * to_receive, long * to_send);
    403405
    404406/*
  • libfdcore/fdcore-internal.h

    r938 r1010  
    7171#endif /* DPR_TIMEOUT */
    7272
     73/* Delai where the connection is maintained opened to allow exchanging remaining pending answers after DPR/DPA */
     74#ifndef GRACE_TIMEOUT
     75#define GRACE_TIMEOUT   1       /* in seconds */
     76#endif /* GRACE_TIMEOUT */
     77
    7378/* The Vendor-Id to advertise in CER/CEA */
    7479#ifndef MY_VENDOR_ID
     
    126131        struct fd_list  srs; /* requests ordered by hop-by-hop id */
    127132        struct fd_list  exp; /* requests that have a timeout set, ordered by timeout */
    128         int             cnt; /* number of requests in the srs list */
     133        long            cnt; /* number of requests in the srs list */
    129134        pthread_mutex_t mtx; /* mutex to protect these lists */
    130135        pthread_cond_t  cnd; /* cond var used by the thread that handles timeouts */
     
    181186        /* Sent requests (for fallback), list of struct sentreq ordered by hbh */
    182187        struct sr_list   p_sr;
     188       
     189        /* Pending received requests not yet answered (count only) */
     190        long             p_reqin_count; /* We use p_state_mtx to protect this value */
    183191       
    184192        /* Data for transitional states before the peer is in OPEN state */
  • libfdcore/p_ce.c

    r975 r1010  
    919919        }
    920920       
     921        /* Update the counter to match with the answer being sent */
     922        CHECK_POSIX( pthread_mutex_lock(&peer->p_state_mtx) );
     923        peer->p_reqin_count++;
     924        CHECK_POSIX( pthread_mutex_unlock(&peer->p_state_mtx) );
     925
    921926        /* Reply a CEA */
    922927        CHECK_FCT( fd_msg_new_answer_from_req ( fd_g_config->cnf_dict, &msg, 0 ) );
  • libfdcore/p_dp.c

    r974 r1010  
    6363int fd_p_dp_handle(struct msg ** msg, int req, struct fd_peer * peer)
    6464{
     65        long to_receive, to_send;
    6566        TRACE_ENTRY("%p %d %p", msg, req, peer);
    6667       
     
    110111                CHECK_FCT( fd_msg_rescode_set( *msg, "DIAMETER_SUCCESS", NULL, NULL, 1 ) );
    111112               
    112                 /* Move to CLOSING state to failover outgoing messages (and avoid failing over the DPA...) */
    113                 CHECK_FCT( fd_psm_change_state(peer, STATE_CLOSING) );
    114                
    115                 /* Now send the DPA */
    116                 CHECK_FCT( fd_out_send( msg, NULL, peer, FD_CNX_ORDERED) );
    117                
    118                 if (fd_cnx_isMultichan(peer->p_cnxctx)) {
    119                         /* There is a possibililty that messages are still in the pipe coming here, so let's grace for 1 second */
    120                         CHECK_FCT( fd_psm_change_state(peer, STATE_CLOSING_GRACE) );
    121                         fd_psm_next_timeout(peer, 0, 1);
     113                /* Do we have pending exchanges with this peer? */
     114                CHECK_FCT( fd_peer_get_load_pending(&peer->p_hdr, &to_receive, &to_send) );
     115               
     116                if ((to_receive == 0) && (to_send == 1 /* only the DPA */)) {
     117                        /* No pending exchange, move to CLOSING directly */
     118                        CHECK_FCT( fd_psm_change_state(peer, STATE_CLOSING) );
     119               
     120                        /* Now send the DPA */
     121                        CHECK_FCT( fd_out_send( msg, NULL, peer, FD_CNX_ORDERED) );
    122122                       
    123                 } else {
    124                         /* Move to CLOSED state */
     123                        /* and move to CLOSED */
    125124                        fd_psm_cleanup(peer, 0);
    126125
    127126                        /* Reset the timer for next connection attempt -- we'll retry sooner or later depending on the disconnection cause */
    128127                        fd_psm_next_timeout(peer, 1, fd_p_dp_newdelay(peer));
     128                } else {
     129                        /* We have pending exchanges, we move to CLOSING_GRACE which allows exchanges of answers but
     130                        not new requests */
     131                        CHECK_FCT( fd_psm_change_state(peer, STATE_CLOSING_GRACE) );
     132                        fd_psm_next_timeout(peer, 0, GRACE_TIMEOUT);
     133                       
     134                        /* Now send the DPA */
     135                        CHECK_FCT( fd_out_send( msg, NULL, peer, FD_CNX_ORDERED) );
    129136                }
    130137        } else {
    131138                /* We received a DPA */
    132139                int curstate = fd_peer_getstate(peer);
    133                 if (curstate != STATE_CLOSING) {
     140                if (curstate != STATE_CLOSING_GRACE) {
    134141                        TRACE_DEBUG(INFO, "Ignoring DPA received in state %s", STATE_STR(curstate));
    135142                }
     
    141148                *msg = NULL;
    142149               
    143                 if (fd_cnx_isMultichan(peer->p_cnxctx)) {
    144                         CHECK_FCT( fd_psm_change_state(peer, STATE_CLOSING_GRACE) );
    145                         fd_psm_next_timeout(peer, 0, 1);
     150                /* Do we still have pending exchanges with this peer? */
     151                CHECK_FCT( fd_peer_get_load_pending(&peer->p_hdr, &to_receive, &to_send) );
     152                if ((to_receive != 0) || (to_send != 0)) {
     153                        TRACE_DEBUG(INFO, "Received DPA but pending load: [%ld, %ld], giving grace delay before closing", to_receive, to_send);
     154                        fd_psm_next_timeout(peer, 0, GRACE_TIMEOUT);
    146155                        peer->p_flags.pf_localterm = 1;
    147                 }
    148                 /* otherwise, return in CLOSING state, the psm will handle it */
     156                } else {
     157                        /* otherwise, go to CLOSING state, the psm will handle terminating the connection */
     158                        CHECK_FCT( fd_psm_change_state(peer, STATE_CLOSING) );
     159                }
    149160        }
    150161       
     
    188199       
    189200        /* Update the peer state and timer */
    190         CHECK_FCT( fd_psm_change_state(peer, STATE_CLOSING) );
     201        CHECK_FCT( fd_psm_change_state(peer, STATE_CLOSING_GRACE) );
    191202        fd_psm_next_timeout(peer, 0, DPR_TIMEOUT);
    192203       
  • libfdcore/p_out.c

    r975 r1010  
    173173int fd_out_send(struct msg ** msg, struct cnxctx * cnx, struct fd_peer * peer, uint32_t flags)
    174174{
     175        struct msg_hdr * hdr;
     176       
    175177        TRACE_ENTRY("%p %p %p %x", msg, cnx, peer, flags);
    176178        CHECK_PARAMS( msg && *msg && (cnx || (peer && peer->p_cnxctx)));
     179       
     180        if (peer) {
     181                CHECK_FCT( fd_msg_hdr(*msg, &hdr) );
     182                if (!(hdr->msg_flags & CMD_FLAG_REQUEST)) {
     183                        /* Update the count of pending answers to send */
     184                        CHECK_POSIX( pthread_mutex_lock(&peer->p_state_mtx) );
     185                        peer->p_reqin_count--;
     186                        CHECK_POSIX( pthread_mutex_unlock(&peer->p_state_mtx) );                       
     187                }
     188        }
    177189       
    178190        if (fd_peer_getstate(peer) == STATE_OPEN) {
  • libfdcore/p_psm.c

    r975 r1010  
    4545begining and end of a connection lifetime. It means we need agility to
    4646switch between "ordering enforced" and "ordering not enforced to counter
    47 HotLB" modes of operation.
    48 
    49 The connection state machine represented in RFC3588 (and rfc3588bis) is
     47Head of the Line Blocking" modes of operation.
     48
     49The connection state machine represented in RFC3588 (and RFC6733) is
    5050incomplete, because it lacks the SUSPECT state and the 3 DWR/DWA
    5151exchanges (section 5.1) when the peer recovers from this state.
     
    9191application message is lost.
    9292
    93 This situation is actually quite possible because DPR/DPA messages are
     93This situation is actually happening easily because DPR/DPA messages are
    9494very short, while application messages can be quite large. Therefore,
    9595they require much more time to deliver.
     
    9797I really cannot see a way to counter this effect by using the ordering
    9898of the messages, except by applying a timer (state STATE_CLOSING_GRACE).
     99This timer can be also useful when we detect that some messages has not
     100yet received an answer on this link, to give time to the application to
     101complete the exchange ongoing.
    99102
    100103However, this problem must be balanced with the fact that the message
     
    197200        return 0;
    198201}
    199 static int leave_open_state(struct fd_peer * peer)
     202static int leave_open_state(struct fd_peer * peer, int skip_failover)
    200203{
    201204        /* Remove from active peers list */
     
    208211       
    209212        /* Failover the messages */
    210         fd_peer_failover_msg(peer);
     213        if (!skip_failover) {
     214                fd_peer_failover_msg(peer);
     215        }
    211216       
    212217        return 0;
     
    288293       
    289294        if (old == STATE_OPEN) {
    290                 CHECK_FCT( leave_open_state(peer) );
    291         }
    292        
     295                CHECK_FCT( leave_open_state(peer, new_state == STATE_CLOSING_GRACE) );
     296        }
     297        if (old == STATE_CLOSING_GRACE) {
     298                fd_peer_failover_msg(peer);
     299        }
     300
    293301        if (new_state == STATE_OPEN) {
    294302                CHECK_FCT( enter_open_state(peer) );
     
    298306                /* Purge event list */
    299307                fd_psm_events_free(peer);
     308               
     309                /* Reset the counter of pending anwers to send */
     310                peer->p_reqin_count = 0;
    300311               
    301312                /* If the peer is not persistant, we destroy it */
     
    529540                                fd_msg_log( FD_MSG_LOG_TIMING, msg, "Answer received in %d.%06.6d sec.", delay.tv_sec, delay.tv_nsec / 1000 );
    530541                        }
     542                } else {
     543                        /* Mark the incoming request so that we know we have pending answers for this peer */
     544                        CHECK_POSIX_DO( pthread_mutex_lock(&peer->p_state_mtx), goto psm_end  );
     545                        peer->p_reqin_count++;
     546                        CHECK_POSIX_DO( pthread_mutex_unlock(&peer->p_state_mtx), goto psm_end  );
    531547                }
    532548               
  • libfdcore/peers.c

    r974 r1010  
    258258
    259259/* Return the value of srlist->cnt */
    260 int fd_peer_get_load_pending(struct peer_hdr *peer, int * load)
     260int fd_peer_get_load_pending(struct peer_hdr *peer, long * to_receive, long * to_send)
    261261{
    262262        struct fd_peer * p = (struct fd_peer *)peer;
    263         TRACE_ENTRY("%p %p", peer, load);
    264         CHECK_PARAMS(CHECK_PEER(peer) && load);
    265        
    266         CHECK_POSIX( pthread_mutex_lock(&p->p_sr.mtx) );
    267         *load = p->p_sr.cnt;
    268         CHECK_POSIX( pthread_mutex_unlock(&p->p_sr.mtx) );
     263        TRACE_ENTRY("%p %p %p", peer, to_receive, to_send);
     264        CHECK_PARAMS(CHECK_PEER(peer));
     265       
     266        if (to_receive) {
     267                CHECK_POSIX( pthread_mutex_lock(&p->p_sr.mtx) );
     268                *to_receive = p->p_sr.cnt;
     269                CHECK_POSIX( pthread_mutex_unlock(&p->p_sr.mtx) );
     270        }
     271        if (to_send) {
     272                CHECK_POSIX( pthread_mutex_lock(&p->p_state_mtx) );
     273                *to_send = p->p_reqin_count;
     274                CHECK_POSIX( pthread_mutex_unlock(&p->p_state_mtx) );
     275        }
    269276       
    270277        return 0;
     
    411418        }
    412419
    413         snprintf(buf, sizeof(buf), ">  %s\t%s\t[%dsr]", STATE_STR(fd_peer_getstate(peer)), peer->p_hdr.info.pi_diamid, peer->p_sr.cnt);
     420        snprintf(buf, sizeof(buf), ">  %s\t%s\t[%ldsr,%ldpa]", STATE_STR(fd_peer_getstate(peer)), peer->p_hdr.info.pi_diamid, peer->p_sr.cnt, peer->p_reqin_count);
    414421        if (details > INFO) {
    415422                snprintf(buf+strlen(buf), sizeof(buf)-strlen(buf), "\t(rlm:%s)", peer->p_hdr.info.runtime.pir_realm ?: "<unknown>");
Note: See TracChangeset for help on using the changeset viewer.