From 55ba77270363400625f4563788d8f842408389a2 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 5 Aug 2015 16:49:16 +0200 Subject: [PATCH] Fix replication slave pings period. For PINGs we use the period configured by the user, but for the newlines of slaves waiting for an RDB to be created (including slaves waiting for the FULLRESYNC reply) we need to ping with frequency of 1 second, since the timeout is fixed and needs to be refreshed. --- src/replication.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/replication.c b/src/replication.c index 5889352a..30d1e299 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2009,6 +2009,8 @@ long long replicationGetSlaveOffset(void) { /* Replication cron function, called 1 time per second. */ void replicationCron(void) { + static long long replication_cron_loops = 0; + /* Non blocking connection timeout? */ if (server.masterhost && (server.repl_state == REPL_STATE_CONNECTING || @@ -2055,31 +2057,34 @@ void replicationCron(void) { * So slaves can implement an explicit timeout to masters, and will * be able to detect a link disconnection even if the TCP connection * will not actually go down. */ - if (!(server.cronloops % (server.repl_ping_slave_period * server.hz))) { - listIter li; - listNode *ln; - robj *ping_argv[1]; + listIter li; + listNode *ln; + robj *ping_argv[1]; - /* First, send PING */ + /* First, send PING according to ping_slave_period. */ + if ((replication_cron_loops % server.repl_ping_slave_period) == 0) { ping_argv[0] = createStringObject("PING",4); - replicationFeedSlaves(server.slaves, server.slaveseldb, ping_argv, 1); + replicationFeedSlaves(server.slaves, server.slaveseldb, + ping_argv, 1); decrRefCount(ping_argv[0]); + } - /* Second, send a newline to all the slaves in pre-synchronization - * stage, that is, slaves waiting for the master to create the RDB file. - * The newline will be ignored by the slave but will refresh the - * last-io timer preventing a timeout. */ - listRewind(server.slaves,&li); - while((ln = listNext(&li))) { - client *slave = ln->value; + /* Second, send a newline to all the slaves in pre-synchronization + * stage, that is, slaves waiting for the master to create the RDB file. + * The newline will be ignored by the slave but will refresh the + * last-io timer preventing a timeout. In this case we ignore the + * ping period and refresh the connection once per second since certain + * timeouts are set at a few seconds (example: PSYNC response). */ + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + client *slave = ln->value; - if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START || - (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END && - server.rdb_child_type != RDB_CHILD_TYPE_SOCKET)) - { - if (write(slave->fd, "\n", 1) == -1) { - /* Don't worry, it's just a ping. */ - } + if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_START || + (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END && + server.rdb_child_type != RDB_CHILD_TYPE_SOCKET)) + { + if (write(slave->fd, "\n", 1) == -1) { + /* Don't worry, it's just a ping. */ } } } @@ -2176,4 +2181,5 @@ void replicationCron(void) { /* Refresh the number of slaves with lag <= min-slaves-max-lag. */ refreshGoodSlavesCount(); + replication_cron_loops++; /* Incremented with frequency 1 HZ. */ }