From d2bc6dc39a50a412234ed05147b1e92e68fbe8aa Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 12 Nov 2013 17:07:31 +0100 Subject: [PATCH] Sentinel: new failover algo, desync slaves and update config epoch. --- src/sentinel.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/sentinel.c b/src/sentinel.c index 7cf32d7e..5ad7bee3 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -1509,6 +1509,12 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) { (ri->master->failover_state == SENTINEL_FAILOVER_STATE_WAIT_PROMOTION)) { + /* Now that we are sure the slave was reconfigured as a master + * set the master configuration epoch to the epoch we won the + * election to perform this failover. This will force the other + * Sentinels to update their config (assuming there is not + * a newer one already available). */ + ri->master->config_epoch = ri->master->failover_epoch; ri->master->failover_state = SENTINEL_FAILOVER_STATE_RECONF_SLAVES; ri->master->failover_state_change_time = mstime(); sentinelEvent(REDIS_WARNING,"+promoted-slave",ri,"%@"); @@ -2417,6 +2423,13 @@ char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char master->leader_epoch = sentinel.current_epoch; sentinelEvent(REDIS_WARNING,"+vote-for-leader",master,"%s %llu", master->leader, (unsigned long long) master->leader_epoch); + /* If we did not voted for ourselves, set the master failover start + * time to now, in order to force a delay before we can start a + * failover for the same master. + * + * The random addition is useful to desynchronize a bit the slaves + * and reduce the chance that no slave gets majority. */ + master->failover_start_time = mstime() + rand() % 2000; } *leader_epoch = master->leader_epoch; @@ -2671,7 +2684,14 @@ void sentinelFailoverWaitStart(sentinelRedisInstance *ri) { sdsfree(leader); /* If I'm not the leader, I can't continue with the failover. */ - if (!isleader) return; + if (!isleader) { + /* Abort the failover if I'm not the leader after some time. */ + if (mstime() - ri->failover_start_time > 10000) { + sentinelEvent(REDIS_WARNING,"-failover-abort-not-elected",ri,"%@"); + sentinelAbortFailover(ri); + } + return; + } sentinelEvent(REDIS_WARNING,"+elected-leader",ri,"%@"); /* Start the failover going to the next state if enough time has