From 6ddf0ea29340891d9934892b6c239d160d1ee0c4 Mon Sep 17 00:00:00 2001
From: "zhaozhao.zz" <zhaozhao.zz@alibaba-inc.com>
Date: Wed, 1 Nov 2017 17:32:23 +0800
Subject: [PATCH 1/5] PSYNC2: safe free backlog when reach the time limit

When we free the backlog, we should use a new
replication ID and clear the ID2. Since without
backlog we can not increment master_repl_offset
even do write commands, that may lead to inconsistency
when we try to connect a "slave-before" master
(if this master is our slave before, our replid
equals the master's replid2). As the master have our
history, so we can match the master's replid2 and
second_replid_offset, that make partial sync work,
but the data is inconsistent.
---
 src/replication.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/replication.c b/src/replication.c
index e0b3d910..fe7b0f73 100644
--- a/src/replication.c
+++ b/src/replication.c
@@ -2613,6 +2613,18 @@ void replicationCron(void) {
         time_t idle = server.unixtime - server.repl_no_slaves_since;
 
         if (idle > server.repl_backlog_time_limit) {
+            /* When we free the backlog, we always use a new
+             * replication ID and clear the ID2. Since without
+             * backlog we can not increment master_repl_offset
+             * even do write commands, that may lead to inconsistency
+             * when we try to connect a "slave-before" master
+             * (if this master is our slave before, our replid
+             * equals the master's replid2). As the master have our
+             * history, so we can match the master's replid2 and
+             * second_replid_offset, that make partial sync work,
+             * but the data is inconsistent. */
+            changeReplicationId();
+            clearReplicationId2();
             freeReplicationBacklog();
             serverLog(LL_NOTICE,
                 "Replication backlog freed after %d seconds "

From 885c4f856e5a41e86ebf2a3233dc3ba2bae6945e Mon Sep 17 00:00:00 2001
From: "zhaozhao.zz" <zhaozhao.zz@alibaba-inc.com>
Date: Wed, 1 Nov 2017 17:52:43 +0800
Subject: [PATCH 2/5] PSYNC2 & RDB: fix the missing rdbSaveInfo for BGSAVE

---
 src/rdb.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/rdb.c b/src/rdb.c
index 125df607..70b13fb9 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -1999,6 +1999,9 @@ void bgsaveCommand(client *c) {
         }
     }
 
+    rdbSaveInfo rsi, *rsiptr;
+    rsiptr = rdbPopulateSaveInfo(&rsi);
+
     if (server.rdb_child_pid != -1) {
         addReplyError(c,"Background save already in progress");
     } else if (server.aof_child_pid != -1) {
@@ -2011,7 +2014,7 @@ void bgsaveCommand(client *c) {
                 "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever "
                 "possible.");
         }
-    } else if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK) {
+    } else if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK) {
         addReplyStatus(c,"Background saving started");
     } else {
         addReply(c,shared.err);

From b8579c225cd0ea83222938bb188cd74501fb1627 Mon Sep 17 00:00:00 2001
From: "zhaozhao.zz" <zhaozhao.zz@alibaba-inc.com>
Date: Thu, 2 Nov 2017 10:45:33 +0800
Subject: [PATCH 3/5] PSYNC2: clarify the scenario when repl_stream_db can be
 -1

---
 src/rdb.c    | 27 +++++++++++++++++++--------
 src/server.c |  3 ++-
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/rdb.c b/src/rdb.c
index 70b13fb9..8de3cd96 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -2035,14 +2035,25 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
     *rsi = rsi_init;
 
     /* If the instance is a master, we can populate the replication info
-     * in all the cases, even if sometimes in incomplete (but safe) form. */
-    if (!server.masterhost) {
-        if (server.repl_backlog) rsi->repl_stream_db = server.slaveseldb;
-        /* Note that if repl_backlog is NULL, it means that histories
-         * following from this point will trigger a full synchronization
-         * generating a SELECT statement, so we can leave the currently
-         * selected DB set to -1. This allows a restarted master to reload
-         * its replication ID/offset when there are no connected slaves. */
+     * only when repl_backlog is not NULL. If the repl_backlog is NULL,
+     * it means that the instance isn't in any replication chains. In this
+     * scenario the replication info is useless, because when a slave
+     * connect to us, the NULL repl_backlog will trigger a full synchronization,
+     * at the same time we will use a new replid and clear replid2.
+     * And remember that after free backlog if we reach repl_backlog_time_limit,
+     * we will use a new replid and clear replid2 too. So there is only one
+     * scenario which can make repl_stream_db be -1, that is the instance is
+     * a master, and it have repl_backlog, but server.slaveseldb is -1. */
+    if (!server.masterhost && server.repl_backlog) {
+        rsi->repl_stream_db = server.slaveseldb;
+        /* Note that server.slaveseldb may be -1, it means that this master
+         * didn't apply any write commands after a full synchronization,
+         * so we can leave the currently selected DB set to -1, because the
+         * next write command must generate a SELECT statement. This allows
+         * a restarted slave to reload replication ID/offset even the repl_stream_db
+         * is -1, but we should not do that, because older implementations
+         * may save a repl_stream_db as -1 in a wrong way. Maybe we can fix
+         * it in the next release version. */
         return rsi;
     }
 
diff --git a/src/server.c b/src/server.c
index 61291fde..d617d3ec 100644
--- a/src/server.c
+++ b/src/server.c
@@ -3536,7 +3536,8 @@ void loadDataFromDisk(void) {
                 rsi.repl_id_is_set &&
                 rsi.repl_offset != -1 &&
                 /* Note that older implementations may save a repl_stream_db
-                 * of -1 inside the RDB file. */
+                 * of -1 inside the RDB file in a wrong way, see more information
+                 * in function rdbPopulateSaveInfo. */
                 rsi.repl_stream_db != -1)
             {
                 memcpy(server.replid,rsi.repl_id,sizeof(server.replid));

From 93037f764209e86f3fcb240a642334fd67552935 Mon Sep 17 00:00:00 2001
From: "zhaozhao.zz" <zhaozhao.zz@alibaba-inc.com>
Date: Wed, 22 Nov 2017 12:05:30 +0800
Subject: [PATCH 4/5] PSYNC2: make repl_stream_db never be -1

it means that after this change all the replication
info in RDB is valid, and it can distinguish us from
the older version.
---
 src/rdb.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/src/rdb.c b/src/rdb.c
index 8de3cd96..386b6a78 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -2045,15 +2045,12 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
      * scenario which can make repl_stream_db be -1, that is the instance is
      * a master, and it have repl_backlog, but server.slaveseldb is -1. */
     if (!server.masterhost && server.repl_backlog) {
-        rsi->repl_stream_db = server.slaveseldb;
-        /* Note that server.slaveseldb may be -1, it means that this master
-         * didn't apply any write commands after a full synchronization,
-         * so we can leave the currently selected DB set to -1, because the
-         * next write command must generate a SELECT statement. This allows
-         * a restarted slave to reload replication ID/offset even the repl_stream_db
-         * is -1, but we should not do that, because older implementations
-         * may save a repl_stream_db as -1 in a wrong way. Maybe we can fix
-         * it in the next release version. */
+        rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
+        /* Note that when server.slaveseldb is -1, it means that this master
+         * didn't apply any write commands after a full synchronization.
+         * So we can let repl_stream_db be 0, this allows a restarted slave
+         * to reload replication ID/offset, it's safe because the next write
+         * command must generate a SELECT statement. */
         return rsi;
     }
 

From ea2e51c630f972b23d3557f6369b13d983c12f17 Mon Sep 17 00:00:00 2001
From: "zhaozhao.zz" <zhaozhao.zz@alibaba-inc.com>
Date: Sat, 4 Nov 2017 23:05:00 +0800
Subject: [PATCH 5/5] PSYNC2: persist cached_master's dbid inside the RDB

---
 src/rdb.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/rdb.c b/src/rdb.c
index 386b6a78..a7334449 100644
--- a/src/rdb.c
+++ b/src/rdb.c
@@ -2054,11 +2054,25 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
         return rsi;
     }
 
-    /* If the instance is a slave we need a connected master in order to
-     * fetch the currently selected DB. */
+    /* If the instance is a slave we need a connected master
+     * in order to fetch the currently selected DB. */
     if (server.master) {
         rsi->repl_stream_db = server.master->db->id;
         return rsi;
     }
+    /* It is useful to persist cached_master's db id inside RDB file.
+     * When a slave lost master's connection, server.master will be
+     * cached as server.cached_master, after that a slave can not
+     * increment the master_repl_offset because slave only apply data
+     * from connected master, so the cached_master can hold right
+     * replication info. But please note that this action is safe
+     * only after we fix the free backlog problem, because when a master
+     * turn to be a slave, it will use itself as the server.cached_master,
+     * that is dangerous if we didn't use a new replication ID after
+     * free backlog. */
+    if (server.cached_master) {
+        rsi->repl_stream_db = server.cached_master->db->id;
+        return rsi;
+    }
     return NULL;
 }