From 233729fe7f6a26bfff62eba78716b711dc1719ce Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 30 Jan 2015 11:54:18 +0100 Subject: [PATCH] Cluster: some bias towwards FAIL/PFAIL nodes in gossip sections. This improves PFAIL -> FAIL switch. Too late at this point in the RC releases to add proper PFAIL/FAIL separate dictionary to do this in a less randomized way. Tested in practice with experiments that this helps. PFAIL -> FAIL average with 20 nodes and node-timeout set to 5 seconds takes 2.5 seconds without this commit, 1 second with this commit. --- src/cluster.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index a296bdd8..2da0ed5f 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -2158,7 +2158,7 @@ void clusterSendPing(clusterLink *link, int type) { clusterBuildMessageHdr(hdr,type); /* Populate the gossip fields */ - int maxiterations = wanted*2; + int maxiterations = wanted*3; while(freshnodes > 0 && gossipcount < wanted && maxiterations--) { dictEntry *de = dictGetRandomKey(server.cluster->nodes); clusterNode *this = dictGetVal(de); @@ -2169,6 +2169,11 @@ void clusterSendPing(clusterLink *link, int type) { * already, so we just gossip about other nodes. */ if (this == myself) continue; + /* Give a bias to FAIL/PFAIL nodes. */ + if (maxiterations > wanted*2 && + !(this->flags & (REDIS_NODE_PFAIL|REDIS_NODE_FAIL))) + continue; + /* In the gossip section don't include: * 1) Nodes in HANDSHAKE state. * 3) Nodes with the NOADDR flag set. @@ -2201,8 +2206,6 @@ void clusterSendPing(clusterLink *link, int type) { gossip->notused2 = 0; gossipcount++; } - redisLog(REDIS_VERBOSE,"WANTED: %d, USED_ITER: %d, GOSSIPCOUNT: %d", - wanted, wanted*2-maxiterations, gossipcount); /* Ready to send... fix the totlen fiend and queue the message in the * output buffer. */