Cluster: send a ping to every node we never contacted in timeout/2 seconds.

Usually we try to send just 1 ping every second, however when we detect
we are going to have unreliable failure detection because we can't ping
some node in time, send an additional ping.

This should only happen with very large clusters or when the the node
timeout is set to a very low value.
This commit is contained in:
antirez 2013-03-05 12:13:39 +01:00
parent e7628be2a7
commit d728ec6dee

View File

@ -1279,22 +1279,33 @@ void clusterCron(void) {
di = dictGetIterator(server.cluster->nodes);
while((de = dictNext(di)) != NULL) {
clusterNode *node = dictGetVal(de);
time_t now = time(NULL);
int delay;
if (node->flags &
(REDIS_NODE_MYSELF|REDIS_NODE_NOADDR|REDIS_NODE_HANDSHAKE))
continue;
/* Check only if we already sent a ping and did not received
* a reply yet. */
if (node->ping_sent == 0 ||
node->ping_sent <= node->pong_received) continue;
/* If our ping is older than half the cluster timeout (may happen
* in a cluster with many nodes), send a new ping. */
if (node->link &&
(now - node->ping_sent) > server.cluster->node_timeout/2)
{
clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
continue;
}
/* If we never received a pong, use the ping time to compute
* the delay. */
if (node->pong_received) {
delay = time(NULL) - node->pong_received;
delay = now - node->pong_received;
} else {
delay = time(NULL) - node->ping_sent;
delay = now - node->ping_sent;
}
if (delay < server.cluster->node_timeout) {