From 671c1dfb5629e32f3addfe865e5e5a04f35b007d Mon Sep 17 00:00:00 2001
From: antirez <antirez@gmail.com>
Date: Tue, 5 Nov 2013 17:23:11 +0100
Subject: [PATCH] Sentinel: always send CONFIG REWRITE when changing instance
 role.

This change makes Sentinel less fragile about a number of failure modes.

This commit also fixes a different bug as a side effect, SLAVEOF command
was sent multiple times without incrementing the pending commands count.
---
 src/db.c       |  2 +-
 src/sentinel.c | 64 +++++++++++++++++++++++++++++++-------------------
 2 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/src/db.c b/src/db.c
index 8d538662..1bdcf9e8 100644
--- a/src/db.c
+++ b/src/db.c
@@ -350,7 +350,7 @@ void scanCallback(void *privdata, const dictEntry *de) {
     if (val) listAddNodeTail(keys, val);
 }
 
-/* Try to parse a SCAN cursor stored ad object 'o':
+/* Try to parse a SCAN cursor stored at object 'o':
  * if the cursor is valid, store it as unsigned integer into *cursor and
  * returns REDIS_OK. Otherwise return REDIS_ERR and send an error to the
  * client. */
diff --git a/src/sentinel.c b/src/sentinel.c
index 4bea156d..307a6071 100644
--- a/src/sentinel.c
+++ b/src/sentinel.c
@@ -1518,8 +1518,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
                 (mstime() - ri->master->info_refresh) < SENTINEL_INFO_PERIOD*2)
             {
                 int retval;
-                retval = redisAsyncCommand(ri->cc,
-                    sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %d",
+                retval = sentinelSendSlaveOf(ri,
                         ri->master->addr->ip,
                         ri->master->addr->port);
                 if (retval == REDIS_OK)
@@ -2521,6 +2520,39 @@ char *sentinelGetObjectiveLeader(sentinelRedisInstance *master) {
     return winner;
 }
 
+/* Send SLAVEOF to the specified instance, always followed by a
+ * CONFIG REWRITE command in order to store the new configuration on disk
+ * when possible (that is, if the Redis instance is recent enough to support
+ * config rewriting, and if the server was started with a configuration file).
+ *
+ * If Host is NULL the function sends "SLAVEOF NO ONE".
+ *
+ * The command returns REDIS_OK if the SLAVEOF command was accepted for
+ * (later) delivery otherwise REDIS_ERR. The command replies are just
+ * discarded. */
+int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
+    char portstr[32];
+
+    ll2string(portstr,sizeof(portstr),port);
+
+    if (host == NULL) {
+        host = "NO";
+        memcpy(portstr,"ONE",4);
+    }
+
+    retval = redisAsyncCommand(ri->cc,
+        sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", host, portstr);
+    if (retval == REDIS_ERR) return retval;
+
+    ri->pending_commands++;
+    if (redisAsyncCommand(ri->cc,
+        sentinelDiscardReplyCallback, NULL, "CONFIG REWRITE") == REDIS_OK)
+    {
+        ri->pending_commands++;
+    }
+    return REDIS_OK;
+}
+
 /* Setup the master state to start a failover as a leader.
  *
  * State can be either:
@@ -2750,10 +2782,8 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
      * We actually register a generic callback for this command as we don't
      * really care about the reply. We check if it worked indirectly observing
      * if INFO returns a different role (master instead of slave). */
-    retval = redisAsyncCommand(ri->promoted_slave->cc,
-        sentinelDiscardReplyCallback, NULL, "SLAVEOF NO ONE");
+    retval = sentinelSendSlaveOf(ri->promoted_slave,NULL,0);
     if (retval != REDIS_OK) return;
-    ri->promoted_slave->pending_commands++;
     sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion",
         ri->promoted_slave,"%@");
     ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
@@ -2823,10 +2853,6 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
     if (timeout && (master->flags & SRI_I_AM_THE_LEADER)) {
         dictIterator *di;
         dictEntry *de;
-        char master_port[32];
-
-        ll2string(master_port,sizeof(master_port),
-            master->promoted_slave->addr->port);
 
         di = dictGetIterator(master->slaves);
         while((de = dictNext(di)) != NULL) {
@@ -2836,10 +2862,9 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
             if (slave->flags &
                 (SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue;
 
-            retval = redisAsyncCommand(slave->cc,
-                sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
+            retval = sentinelSendSlaveOf(slave,
                     master->promoted_slave->addr->ip,
-                    master_port);
+                    master->promoted_slave->addr->port);
             if (retval == REDIS_OK) {
                 sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@");
                 slave->flags |= SRI_RECONF_SENT;
@@ -2892,15 +2917,11 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
             continue;
 
         /* Send SLAVEOF <new master>. */
-        ll2string(master_port,sizeof(master_port),
-            master->promoted_slave->addr->port);
-        retval = redisAsyncCommand(slave->cc,
-            sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
+        retval = sentinelSendSlaveOf(slave,
                 master->promoted_slave->addr->ip,
-                master_port);
+                master->promoted_slave->addr->port);
         if (retval == REDIS_OK) {
             slave->flags |= SRI_RECONF_SENT;
-            slave->pending_commands++;
             slave->slave_reconf_sent_time = mstime();
             sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@");
             in_progress++;
@@ -2982,13 +3003,11 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
  *    back to the master as well, sending a best effort SLAVEOF command.
  */
 void sentinelAbortFailover(sentinelRedisInstance *ri) {
-    char master_port[32];
     dictIterator *di;
     dictEntry *de;
     int sentinel_role;
 
     redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
-    ll2string(master_port,sizeof(master_port),ri->addr->port);
 
     /* Clear failover related flags from slaves.
      * Also if we are the leader make sure to send SLAVEOF commands to all the
@@ -3004,10 +3023,7 @@ void sentinelAbortFailover(sentinelRedisInstance *ri) {
         {
             int retval;
 
-            retval = redisAsyncCommand(slave->cc,
-                sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
-                    ri->addr->ip,
-                    master_port);
+            retval = sentinelSendSlaveOf(slave,ri->addr->ip,ri->addr->port);
             if (retval == REDIS_OK)
                 sentinelEvent(REDIS_NOTICE,"-slave-reconf-undo",slave,"%@");
         }