diff --git a/src/rdb.c b/src/rdb.c index 36e4400f..acf3197f 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -2000,6 +2000,9 @@ void bgsaveCommand(client *c) { } } + rdbSaveInfo rsi, *rsiptr; + rsiptr = rdbPopulateSaveInfo(&rsi); + if (server.rdb_child_pid != -1) { addReplyError(c,"Background save already in progress"); } else if (server.aof_child_pid != -1) { @@ -2012,7 +2015,7 @@ void bgsaveCommand(client *c) { "Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenever " "possible."); } - } else if (rdbSaveBackground(server.rdb_filename,NULL) == C_OK) { + } else if (rdbSaveBackground(server.rdb_filename,rsiptr) == C_OK) { addReplyStatus(c,"Background saving started"); } else { addReply(c,shared.err); @@ -2033,22 +2036,44 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) { *rsi = rsi_init; /* If the instance is a master, we can populate the replication info - * in all the cases, even if sometimes in incomplete (but safe) form. */ - if (!server.masterhost) { - if (server.repl_backlog) rsi->repl_stream_db = server.slaveseldb; - /* Note that if repl_backlog is NULL, it means that histories - * following from this point will trigger a full synchronization - * generating a SELECT statement, so we can leave the currently - * selected DB set to -1. This allows a restarted master to reload - * its replication ID/offset when there are no connected slaves. */ + * only when repl_backlog is not NULL. If the repl_backlog is NULL, + * it means that the instance isn't in any replication chains. In this + * scenario the replication info is useless, because when a slave + * connect to us, the NULL repl_backlog will trigger a full synchronization, + * at the same time we will use a new replid and clear replid2. + * And remember that after free backlog if we reach repl_backlog_time_limit, + * we will use a new replid and clear replid2 too. So there is only one + * scenario which can make repl_stream_db be -1, that is the instance is + * a master, and it have repl_backlog, but server.slaveseldb is -1. */ + if (!server.masterhost && server.repl_backlog) { + rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb; + /* Note that when server.slaveseldb is -1, it means that this master + * didn't apply any write commands after a full synchronization. + * So we can let repl_stream_db be 0, this allows a restarted slave + * to reload replication ID/offset, it's safe because the next write + * command must generate a SELECT statement. */ return rsi; } - /* If the instance is a slave we need a connected master in order to - * fetch the currently selected DB. */ + /* If the instance is a slave we need a connected master + * in order to fetch the currently selected DB. */ if (server.master) { rsi->repl_stream_db = server.master->db->id; return rsi; } + /* It is useful to persist cached_master's db id inside RDB file. + * When a slave lost master's connection, server.master will be + * cached as server.cached_master, after that a slave can not + * increment the master_repl_offset because slave only apply data + * from connected master, so the cached_master can hold right + * replication info. But please note that this action is safe + * only after we fix the free backlog problem, because when a master + * turn to be a slave, it will use itself as the server.cached_master, + * that is dangerous if we didn't use a new replication ID after + * free backlog. */ + if (server.cached_master) { + rsi->repl_stream_db = server.cached_master->db->id; + return rsi; + } return NULL; } diff --git a/src/replication.c b/src/replication.c index e0b3d910..fe7b0f73 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2613,6 +2613,18 @@ void replicationCron(void) { time_t idle = server.unixtime - server.repl_no_slaves_since; if (idle > server.repl_backlog_time_limit) { + /* When we free the backlog, we always use a new + * replication ID and clear the ID2. Since without + * backlog we can not increment master_repl_offset + * even do write commands, that may lead to inconsistency + * when we try to connect a "slave-before" master + * (if this master is our slave before, our replid + * equals the master's replid2). As the master have our + * history, so we can match the master's replid2 and + * second_replid_offset, that make partial sync work, + * but the data is inconsistent. */ + changeReplicationId(); + clearReplicationId2(); freeReplicationBacklog(); serverLog(LL_NOTICE, "Replication backlog freed after %d seconds " diff --git a/src/server.c b/src/server.c index 69dca456..88892626 100644 --- a/src/server.c +++ b/src/server.c @@ -3540,7 +3540,8 @@ void loadDataFromDisk(void) { rsi.repl_id_is_set && rsi.repl_offset != -1 && /* Note that older implementations may save a repl_stream_db - * of -1 inside the RDB file. */ + * of -1 inside the RDB file in a wrong way, see more information + * in function rdbPopulateSaveInfo. */ rsi.repl_stream_db != -1) { memcpy(server.replid,rsi.repl_id,sizeof(server.replid));