From 18b8bad53cf380471ec28ec097fd38960a246077 Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 18 Feb 2014 08:50:57 +0100 Subject: [PATCH] Sentinel: fix slave promotion timeout. If we can't reconfigure a slave in time during failover, go forward as anyway the slave will be fixed by Sentinels in the future, once they detect it is misconfigured. Otherwise a failover in progress may never terminate if for some reason the slave is uncapable to sync with the master while at the same time it is not disconnected. --- src/sentinel.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/sentinel.c b/src/sentinel.c index c25e01b1..7713cce7 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -3364,14 +3364,17 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) { /* Skip the promoted slave, and already configured slaves. */ if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE)) continue; - /* Clear the SRI_RECONF_SENT flag if too much time elapsed without - * the slave moving forward to the next state. */ + /* If too much time elapsed without the slave moving forward to + * the next state, consider it reconfigured even if it is not. + * Sentinels will detect the slave as misconfigured and fix its + * configuration later. */ if ((slave->flags & SRI_RECONF_SENT) && (mstime() - slave->slave_reconf_sent_time) > SENTINEL_SLAVE_RECONF_RETRY_PERIOD) { sentinelEvent(REDIS_NOTICE,"-slave-reconf-sent-timeout",slave,"%@"); slave->flags &= ~SRI_RECONF_SENT; + slave->flags |= SRI_RECONF_DONE; } /* Nothing to do for instances that are disconnected or already