From 1461422ce66056eb79231e8240dde01db2260fa0 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 4 Oct 2013 16:12:25 +0200 Subject: [PATCH] Replication: install the write handler when reusing a cached master. Sometimes when we resurrect a cached master after a successful partial resynchronization attempt, there is pending data in the output buffers of the client structure representing the master (likely REPLCONF ACK commands). If we don't reinstall the write handler, it will never be installed again by addReply*() family functions as they'll assume that if there is already data pending, the write handler is already installed. This bug caused some slaves after a successful partial sync to never send REPLCONF ACK, and continuously being detected as timing out by the master, with a disconnection / reconnection loop. --- src/replication.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/replication.c b/src/replication.c index bcc80b11..8102fc2d 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1375,6 +1375,16 @@ void replicationResurrectCachedMaster(int newfd) { redisLog(REDIS_WARNING,"Error resurrecting the cached master, impossible to add the readable handler: %s", strerror(errno)); freeClientAsync(server.master); /* Close ASAP. */ } + + /* We may also need to install the write handler as well if there is + * pending data in the write buffers. */ + if (server.master->bufpos || listLength(server.master->reply)) { + if (aeCreateFileEvent(server.el, newfd, AE_WRITABLE, + sendReplyToClient, server.master)) { + redisLog(REDIS_WARNING,"Error resurrecting the cached master, impossible to add the writable handler: %s", strerror(errno)); + freeClientAsync(server.master); /* Close ASAP. */ + } + } } /* ------------------------- MIN-SLAVES-TO-WRITE --------------------------- */