From 671c1dfb5629e32f3addfe865e5e5a04f35b007d Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 5 Nov 2013 17:23:11 +0100 Subject: [PATCH] Sentinel: always send CONFIG REWRITE when changing instance role. This change makes Sentinel less fragile about a number of failure modes. This commit also fixes a different bug as a side effect, SLAVEOF command was sent multiple times without incrementing the pending commands count. --- src/db.c | 2 +- src/sentinel.c | 64 +++++++++++++++++++++++++++++++------------------- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/db.c b/src/db.c index 8d538662..1bdcf9e8 100644 --- a/src/db.c +++ b/src/db.c @@ -350,7 +350,7 @@ void scanCallback(void *privdata, const dictEntry *de) { if (val) listAddNodeTail(keys, val); } -/* Try to parse a SCAN cursor stored ad object 'o': +/* Try to parse a SCAN cursor stored at object 'o': * if the cursor is valid, store it as unsigned integer into *cursor and * returns REDIS_OK. Otherwise return REDIS_ERR and send an error to the * client. */ diff --git a/src/sentinel.c b/src/sentinel.c index 4bea156d..307a6071 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -1518,8 +1518,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) { (mstime() - ri->master->info_refresh) < SENTINEL_INFO_PERIOD*2) { int retval; - retval = redisAsyncCommand(ri->cc, - sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %d", + retval = sentinelSendSlaveOf(ri, ri->master->addr->ip, ri->master->addr->port); if (retval == REDIS_OK) @@ -2521,6 +2520,39 @@ char *sentinelGetObjectiveLeader(sentinelRedisInstance *master) { return winner; } +/* Send SLAVEOF to the specified instance, always followed by a + * CONFIG REWRITE command in order to store the new configuration on disk + * when possible (that is, if the Redis instance is recent enough to support + * config rewriting, and if the server was started with a configuration file). + * + * If Host is NULL the function sends "SLAVEOF NO ONE". + * + * The command returns REDIS_OK if the SLAVEOF command was accepted for + * (later) delivery otherwise REDIS_ERR. The command replies are just + * discarded. */ +int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) { + char portstr[32]; + + ll2string(portstr,sizeof(portstr),port); + + if (host == NULL) { + host = "NO"; + memcpy(portstr,"ONE",4); + } + + retval = redisAsyncCommand(ri->cc, + sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", host, portstr); + if (retval == REDIS_ERR) return retval; + + ri->pending_commands++; + if (redisAsyncCommand(ri->cc, + sentinelDiscardReplyCallback, NULL, "CONFIG REWRITE") == REDIS_OK) + { + ri->pending_commands++; + } + return REDIS_OK; +} + /* Setup the master state to start a failover as a leader. * * State can be either: @@ -2750,10 +2782,8 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) { * We actually register a generic callback for this command as we don't * really care about the reply. We check if it worked indirectly observing * if INFO returns a different role (master instead of slave). */ - retval = redisAsyncCommand(ri->promoted_slave->cc, - sentinelDiscardReplyCallback, NULL, "SLAVEOF NO ONE"); + retval = sentinelSendSlaveOf(ri->promoted_slave,NULL,0); if (retval != REDIS_OK) return; - ri->promoted_slave->pending_commands++; sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion", ri->promoted_slave,"%@"); ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION; @@ -2823,10 +2853,6 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) { if (timeout && (master->flags & SRI_I_AM_THE_LEADER)) { dictIterator *di; dictEntry *de; - char master_port[32]; - - ll2string(master_port,sizeof(master_port), - master->promoted_slave->addr->port); di = dictGetIterator(master->slaves); while((de = dictNext(di)) != NULL) { @@ -2836,10 +2862,9 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) { if (slave->flags & (SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue; - retval = redisAsyncCommand(slave->cc, - sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", + retval = sentinelSendSlaveOf(slave, master->promoted_slave->addr->ip, - master_port); + master->promoted_slave->addr->port); if (retval == REDIS_OK) { sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@"); slave->flags |= SRI_RECONF_SENT; @@ -2892,15 +2917,11 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) { continue; /* Send SLAVEOF . */ - ll2string(master_port,sizeof(master_port), - master->promoted_slave->addr->port); - retval = redisAsyncCommand(slave->cc, - sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", + retval = sentinelSendSlaveOf(slave, master->promoted_slave->addr->ip, - master_port); + master->promoted_slave->addr->port); if (retval == REDIS_OK) { slave->flags |= SRI_RECONF_SENT; - slave->pending_commands++; slave->slave_reconf_sent_time = mstime(); sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@"); in_progress++; @@ -2982,13 +3003,11 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) { * back to the master as well, sending a best effort SLAVEOF command. */ void sentinelAbortFailover(sentinelRedisInstance *ri) { - char master_port[32]; dictIterator *di; dictEntry *de; int sentinel_role; redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS); - ll2string(master_port,sizeof(master_port),ri->addr->port); /* Clear failover related flags from slaves. * Also if we are the leader make sure to send SLAVEOF commands to all the @@ -3004,10 +3023,7 @@ void sentinelAbortFailover(sentinelRedisInstance *ri) { { int retval; - retval = redisAsyncCommand(slave->cc, - sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", - ri->addr->ip, - master_port); + retval = sentinelSendSlaveOf(slave,ri->addr->ip,ri->addr->port); if (retval == REDIS_OK) sentinelEvent(REDIS_NOTICE,"-slave-reconf-undo",slave,"%@"); }