From 00bab23c4188bea8bfed9b232fc2c771d9734276 Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 4 Apr 2013 12:02:48 +0200 Subject: [PATCH] Cluster: turn hardcoded node timeout multiplicators into defines. Most Redis Cluster time limits are expressed in terms of the configured node timeout. Turn them into defines. --- src/cluster.c | 22 ++++++++++++++++------ src/redis.h | 9 +++++++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 1147a742..98d37b28 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -231,7 +231,7 @@ void clusterInit(void) { server.cluster->state = REDIS_CLUSTER_FAIL; server.cluster->size = 1; server.cluster->nodes = dictCreate(&clusterNodesDictType,NULL); - server.cluster->node_timeout = 15; + server.cluster->node_timeout = REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT; server.cluster->failover_auth_time = 0; server.cluster->failover_auth_count = 0; memset(server.cluster->migrating_slots_to,0, @@ -405,7 +405,8 @@ void clusterNodeCleanupFailureReports(clusterNode *node) { listNode *ln; listIter li; clusterNodeFailReport *fr; - time_t maxtime = server.cluster->node_timeout*2; + time_t maxtime = server.cluster->node_timeout * + REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT; time_t now = time(NULL); listRewind(l,&li); @@ -631,12 +632,17 @@ void clearNodeFailureIfNeeded(clusterNode *node) { } /* If it is a master and... - * 1) The FAIL state was set more than 2 times the node timeout + 10 sec. + * 1) The FAIL state is old enough. We use our node timeout multiplicator + * plus some additional fixed time. The additional time is useful when + * the node timeout is extremely short and the reaction time of + * the cluster may be longer, so wait at least a few seconds always. * 2) It is yet serving slots from our point of view (not failed over). * Apparently no one is going to fix these slots, clear the FAIL flag. */ if (node->flags & REDIS_NODE_MASTER && node->numslots > 0 && - (now - node->fail_time) > (server.cluster->node_timeout*2+10)) + (now - node->fail_time) > + (server.cluster->node_timeout * REDIS_CLUSTER_FAIL_UNDO_TIME_MULT + + REDIS_CLUSTER_FAIL_UNDO_TIME_ADD)) { redisLog(REDIS_NOTICE, "Clear FAIL state for node %.40s: is reachable again and nobody is serving its slots after some time.", @@ -1418,14 +1424,18 @@ void clusterHandleSlaveFailover(void) { /* Check if our data is recent enough. For now we just use a fixed * constant of ten times the node timeout since the cluster should * react much faster to a master down. */ - if (data_age > server.cluster->node_timeout * 10) return; + if (data_age > + server.cluster->node_timeout * REDIS_CLUSTER_SLAVE_VALIDITY_MULT) + return; /* TODO: check if we are the first slave as well? Or just rely on the * master authorization? */ /* Ask masters if we are authorized to perform the failover. If there * is a pending auth request that's too old, reset it. */ - if (server.cluster->failover_auth_time == 0 || auth_age > 15) + if (server.cluster->failover_auth_time == 0 || + auth_age > + server.cluster->node_timeout * REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT) { redisLog(REDIS_WARNING,"Asking masters if I can failover..."); server.cluster->failover_auth_time = time(NULL); diff --git a/src/redis.h b/src/redis.h index 225dd4a1..0d0f9a85 100644 --- a/src/redis.h +++ b/src/redis.h @@ -524,6 +524,15 @@ typedef struct redisOpArray { #define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */ #define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */ +/* The following defines are amunt of time, sometimes expressed as + * multiplicators of the node timeout value (when ending with MULT). */ +#define REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT 15 +#define REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */ +#define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */ +#define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */ +#define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */ +#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 1 /* Auth request retry time. */ + struct clusterNode; /* clusterLink encapsulates everything needed to talk with a remote node. */