diff --git a/src/cluster.c b/src/cluster.c index 7424f2d9..68628f06 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1565,10 +1565,16 @@ void clusterCron(void) { } /* If we are a slave and our master is down, but is serving slots, - * call the function that handles the failover. */ + * call the function that handles the failover. + * This function is called with a small delay in order to let the + * FAIL message to propagate after failure detection, this is not + * strictly required but makes 99.99% of failovers mechanically + * simpler. */ if (server.cluster->myself->flags & REDIS_NODE_SLAVE && server.cluster->myself->slaveof && server.cluster->myself->slaveof->flags & REDIS_NODE_FAIL && + (server.unixtime - server.cluster->myself->slaveof->fail_time) > + REDIS_CLUSTER_FAILOVER_DELAY && server.cluster->myself->slaveof->numslots != 0) { clusterHandleSlaveFailover(); diff --git a/src/redis.h b/src/redis.h index 2c662d6d..0eb74675 100644 --- a/src/redis.h +++ b/src/redis.h @@ -521,6 +521,7 @@ typedef struct redisOpArray { #define REDIS_CLUSTER_FAIL 1 /* The cluster can't work */ #define REDIS_CLUSTER_NAMELEN 40 /* sha1 hex length */ #define REDIS_CLUSTER_PORT_INCR 10000 /* Cluster port = baseport + PORT_INCR */ +#define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */ struct clusterNode;