From 0f1f25784f6440f3476609065edfb524e3bbbcf4 Mon Sep 17 00:00:00 2001
From: antirez <antirez@gmail.com>
Date: Mon, 10 Mar 2014 09:57:52 +0100
Subject: [PATCH] Cluster: better timeout and retry time for failover.

When node-timeout is too small, in the order of a few milliseconds,
there is no way the voting process can terminate during that time, so we
set a lower limit for the failover timeout of two seconds.

The retry time is set to two times the failover timeout time, so it is
at least 4 seconds.
---
 src/cluster.c | 23 ++++++++++++++++-------
 src/cluster.h |  1 -
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/cluster.c b/src/cluster.c
index c99f1fa8..031b61bc 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -2050,6 +2050,18 @@ void clusterHandleSlaveFailover(void) {
     int manual_failover = server.cluster->mf_end != 0 &&
                           server.cluster->mf_can_start;
     int j;
+    mstime_t auth_timeout, auth_retry_time;
+
+    /* Compute the failover timeout (the max time we have to send votes
+     * and wait for replies), and the failover retry time (the time to wait
+     * before waiting again.
+     *
+     * Timeout is MIN(NODE_TIMEOUT*2,2000) milliseconds.
+     * Retry is two times the Timeout.
+     */
+    auth_timeout = server.cluster_node_timeout*2;
+    if (auth_timeout < 2000) auth_timeout = 2000;
+    auth_retry_time = auth_timeout*2;
 
     /* Pre conditions to run the function:
      * 1) We are a slave.
@@ -2060,8 +2072,6 @@ void clusterHandleSlaveFailover(void) {
         (!nodeFailed(myself->slaveof) && !manual_failover) ||
         myself->slaveof->numslots == 0) return;
 
-    /* If this is a manual failover, are we ready to start? */
-
     /* Set data_age to the number of seconds we are disconnected from
      * the master. */
     if (server.repl_state == REDIS_REPL_CONNECTED) {
@@ -2084,10 +2094,9 @@ void clusterHandleSlaveFailover(void) {
         (server.cluster_node_timeout * REDIS_CLUSTER_SLAVE_VALIDITY_MULT))
         return;
 
-    /* Compute the time at which we can start an election. */
-    if (auth_age >
-        server.cluster_node_timeout * REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT)
-    {
+    /* If the previous failover attempt timedout and the retry time has
+     * elapsed, we can setup a new one. */
+    if (auth_age > auth_retry_time) {
         server.cluster->failover_auth_time = mstime() +
             500 + /* Fixed delay of 500 milliseconds, let FAIL msg propagate. */
             random() % 500; /* Random delay between 0 and 500 milliseconds. */
@@ -2139,7 +2148,7 @@ void clusterHandleSlaveFailover(void) {
     if (mstime() < server.cluster->failover_auth_time) return;
 
     /* Return ASAP if the election is too old to be valid. */
-    if (auth_age > server.cluster_node_timeout) return;
+    if (auth_age > auth_timeout) return;
 
     /* Ask for votes if needed. */
     if (server.cluster->failover_auth_sent == 0) {
diff --git a/src/cluster.h b/src/cluster.h
index 9581b575..4fb1cfe8 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -18,7 +18,6 @@
 #define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
 #define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
 #define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */
-#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 4 /* Auth request retry time. */
 #define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
 #define REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER 1
 #define REDIS_CLUSTER_MF_TIMEOUT 5000 /* Milliseconds to do a manual failover. */