From 00bab23c4188bea8bfed9b232fc2c771d9734276 Mon Sep 17 00:00:00 2001
From: antirez <antirez@gmail.com>
Date: Thu, 4 Apr 2013 12:02:48 +0200
Subject: [PATCH] Cluster: turn hardcoded node timeout multiplicators into
 defines.

Most Redis Cluster time limits are expressed in terms of the configured
node timeout. Turn them into defines.
---
 src/cluster.c | 22 ++++++++++++++++------
 src/redis.h   |  9 +++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/src/cluster.c b/src/cluster.c
index 1147a742..98d37b28 100644
--- a/src/cluster.c
+++ b/src/cluster.c
@@ -231,7 +231,7 @@ void clusterInit(void) {
     server.cluster->state = REDIS_CLUSTER_FAIL;
     server.cluster->size = 1;
     server.cluster->nodes = dictCreate(&clusterNodesDictType,NULL);
-    server.cluster->node_timeout = 15;
+    server.cluster->node_timeout = REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT;
     server.cluster->failover_auth_time = 0;
     server.cluster->failover_auth_count = 0;
     memset(server.cluster->migrating_slots_to,0,
@@ -405,7 +405,8 @@ void clusterNodeCleanupFailureReports(clusterNode *node) {
     listNode *ln;
     listIter li;
     clusterNodeFailReport *fr;
-    time_t maxtime = server.cluster->node_timeout*2;
+    time_t maxtime = server.cluster->node_timeout *
+                     REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT;
     time_t now = time(NULL);
 
     listRewind(l,&li);
@@ -631,12 +632,17 @@ void clearNodeFailureIfNeeded(clusterNode *node) {
     }
 
     /* If it is a master and...
-     * 1) The FAIL state was set more than 2 times the node timeout + 10 sec.
+     * 1) The FAIL state is old enough. We use our node timeout multiplicator
+     *    plus some additional fixed time. The additional time is useful when
+     *    the node timeout is extremely short and the reaction time of
+     *    the cluster may be longer, so wait at least a few seconds always.
      * 2) It is yet serving slots from our point of view (not failed over).
      * Apparently no one is going to fix these slots, clear the FAIL flag. */
     if (node->flags & REDIS_NODE_MASTER &&
         node->numslots > 0 &&
-        (now - node->fail_time) > (server.cluster->node_timeout*2+10))
+        (now - node->fail_time) >
+        (server.cluster->node_timeout * REDIS_CLUSTER_FAIL_UNDO_TIME_MULT +
+                                        REDIS_CLUSTER_FAIL_UNDO_TIME_ADD))
     {
         redisLog(REDIS_NOTICE,
             "Clear FAIL state for node %.40s: is reachable again and nobody is serving its slots after some time.",
@@ -1418,14 +1424,18 @@ void clusterHandleSlaveFailover(void) {
     /* Check if our data is recent enough. For now we just use a fixed
      * constant of ten times the node timeout since the cluster should
      * react much faster to a master down. */
-    if (data_age > server.cluster->node_timeout * 10) return;
+    if (data_age >
+        server.cluster->node_timeout * REDIS_CLUSTER_SLAVE_VALIDITY_MULT)
+        return;
 
     /* TODO: check if we are the first slave as well? Or just rely on the
      * master authorization? */
 
     /* Ask masters if we are authorized to perform the failover. If there
      * is a pending auth request that's too old, reset it. */
-    if (server.cluster->failover_auth_time == 0 || auth_age > 15)
+    if (server.cluster->failover_auth_time == 0 ||
+        auth_age >
+        server.cluster->node_timeout * REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT)
     {
         redisLog(REDIS_WARNING,"Asking masters if I can failover...");
         server.cluster->failover_auth_time = time(NULL);
diff --git a/src/redis.h b/src/redis.h
index 225dd4a1..0d0f9a85 100644
--- a/src/redis.h
+++ b/src/redis.h
@@ -524,6 +524,15 @@ typedef struct redisOpArray {
 #define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */
 #define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
 
+/* The following defines are amunt of time, sometimes expressed as
+ * multiplicators of the node timeout value (when ending with MULT). */
+#define REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT 15
+#define REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
+#define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
+#define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
+#define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */
+#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 1 /* Auth request retry time. */
+
 struct clusterNode;
 
 /* clusterLink encapsulates everything needed to talk with a remote node. */