mirror of
https://github.com/fluencelabs/redis
synced 2025-04-01 23:31:03 +00:00
Cluster: detect cluster reconfiguration when master slots drop to 0.
The old algorithm used a PROMOTED flag and explicitly checks about slave->master convertions. Wit the new cluster meta-data propagation algorithm we just look at the configEpoch to check if we need to reconfigure slots, then: 1) If a node is a master but it reaches zero served slots becuase of reconfiguration. 2) If a node is a slave but the master reaches zero served slots because of a reconfiguration. We switch as a replica of the new slots owner.
This commit is contained in:
parent
62b1591439
commit
4dc247eb31
@ -135,8 +135,6 @@ int clusterLoadConfig(char *filename) {
|
|||||||
n->flags |= REDIS_NODE_HANDSHAKE;
|
n->flags |= REDIS_NODE_HANDSHAKE;
|
||||||
} else if (!strcasecmp(s,"noaddr")) {
|
} else if (!strcasecmp(s,"noaddr")) {
|
||||||
n->flags |= REDIS_NODE_NOADDR;
|
n->flags |= REDIS_NODE_NOADDR;
|
||||||
} else if (!strcasecmp(s,"promoted")) {
|
|
||||||
n->flags |= REDIS_NODE_PROMOTED;
|
|
||||||
} else if (!strcasecmp(s,"noflags")) {
|
} else if (!strcasecmp(s,"noflags")) {
|
||||||
/* nothing to do */
|
/* nothing to do */
|
||||||
} else {
|
} else {
|
||||||
@ -755,7 +753,6 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) {
|
|||||||
if (flags & REDIS_NODE_FAIL) ci = sdscat(ci,"fail,");
|
if (flags & REDIS_NODE_FAIL) ci = sdscat(ci,"fail,");
|
||||||
if (flags & REDIS_NODE_HANDSHAKE) ci = sdscat(ci,"handshake,");
|
if (flags & REDIS_NODE_HANDSHAKE) ci = sdscat(ci,"handshake,");
|
||||||
if (flags & REDIS_NODE_NOADDR) ci = sdscat(ci,"noaddr,");
|
if (flags & REDIS_NODE_NOADDR) ci = sdscat(ci,"noaddr,");
|
||||||
if (flags & REDIS_NODE_PROMOTED) ci = sdscat(ci,"promoted,");
|
|
||||||
if (ci[sdslen(ci)-1] == ',') ci[sdslen(ci)-1] = ' ';
|
if (ci[sdslen(ci)-1] == ',') ci[sdslen(ci)-1] = ' ';
|
||||||
|
|
||||||
redisLog(REDIS_DEBUG,"GOSSIP %.40s %s:%d %s",
|
redisLog(REDIS_DEBUG,"GOSSIP %.40s %s:%d %s",
|
||||||
@ -1051,9 +1048,6 @@ int clusterProcessPacket(clusterLink *link) {
|
|||||||
{
|
{
|
||||||
/* Node is a master. */
|
/* Node is a master. */
|
||||||
if (sender->flags & REDIS_NODE_SLAVE) {
|
if (sender->flags & REDIS_NODE_SLAVE) {
|
||||||
/* Slave turned into master! */
|
|
||||||
clusterNode *oldmaster = sender->slaveof;
|
|
||||||
|
|
||||||
/* Reconfigure node as master. */
|
/* Reconfigure node as master. */
|
||||||
if (sender->slaveof)
|
if (sender->slaveof)
|
||||||
clusterNodeRemoveSlave(sender->slaveof,sender);
|
clusterNodeRemoveSlave(sender->slaveof,sender);
|
||||||
@ -1061,29 +1055,6 @@ int clusterProcessPacket(clusterLink *link) {
|
|||||||
sender->flags |= REDIS_NODE_MASTER;
|
sender->flags |= REDIS_NODE_MASTER;
|
||||||
sender->slaveof = NULL;
|
sender->slaveof = NULL;
|
||||||
|
|
||||||
/* If this node used to be our slave, and now has the
|
|
||||||
* PROMOTED flag set. We'll turn ourself into a slave
|
|
||||||
* of the new master. */
|
|
||||||
if (flags & REDIS_NODE_PROMOTED &&
|
|
||||||
oldmaster == server.cluster->myself)
|
|
||||||
{
|
|
||||||
redisLog(REDIS_WARNING,"One of my slaves took my place. Reconfiguring myself as a replica of %.40s", sender->name);
|
|
||||||
clusterDelNodeSlots(server.cluster->myself);
|
|
||||||
clusterSetMaster(sender);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If we are a slave, and this node used to be a slave
|
|
||||||
* of our master, and now has the PROMOTED flag set, we
|
|
||||||
* need to switch our replication setup over it. */
|
|
||||||
if (flags & REDIS_NODE_PROMOTED &&
|
|
||||||
server.cluster->myself->flags & REDIS_NODE_SLAVE &&
|
|
||||||
server.cluster->myself->slaveof == oldmaster)
|
|
||||||
{
|
|
||||||
redisLog(REDIS_WARNING,"One of the slaves failed over my master. Reconfiguring myself as a replica of %.40s", sender->name);
|
|
||||||
clusterDelNodeSlots(server.cluster->myself);
|
|
||||||
clusterSetMaster(sender);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Update config and state. */
|
/* Update config and state. */
|
||||||
update_state = 1;
|
update_state = 1;
|
||||||
update_config = 1;
|
update_config = 1;
|
||||||
@ -1125,26 +1096,55 @@ int clusterProcessPacket(clusterLink *link) {
|
|||||||
changes =
|
changes =
|
||||||
memcmp(sender->slots,hdr->myslots,sizeof(hdr->myslots)) != 0;
|
memcmp(sender->slots,hdr->myslots,sizeof(hdr->myslots)) != 0;
|
||||||
if (changes) {
|
if (changes) {
|
||||||
|
clusterNode *curmaster, *newmaster = NULL;
|
||||||
|
|
||||||
|
/* Here we set curmaster to this node or the node this node
|
||||||
|
* replicates to if it's a slave. In the for loop we are
|
||||||
|
* interested to check if slots are taken away from curmaster. */
|
||||||
|
if (server.cluster->myself->flags & REDIS_NODE_MASTER)
|
||||||
|
curmaster = server.cluster->myself;
|
||||||
|
else
|
||||||
|
curmaster = server.cluster->myself->slaveof;
|
||||||
|
|
||||||
for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
|
for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) {
|
||||||
if (bitmapTestBit(hdr->myslots,j)) {
|
if (bitmapTestBit(hdr->myslots,j)) {
|
||||||
/* If this slot was not served, or served by a node
|
/* We rebind the slot to the new node claiming it if:
|
||||||
* in FAIL state, update the table with the new node
|
* 1) The slot was unassigned.
|
||||||
* claiming to serve the slot. */
|
* 2) The new node claims it with a greater configEpoch. */
|
||||||
if (server.cluster->slots[j] == sender) continue;
|
if (server.cluster->slots[j] == sender) continue;
|
||||||
if (server.cluster->slots[j] == NULL ||
|
if (server.cluster->slots[j] == NULL ||
|
||||||
server.cluster->slots[j]->flags & REDIS_NODE_FAIL)
|
server.cluster->slots[j]->configEpoch <
|
||||||
|
senderConfigEpoch)
|
||||||
{
|
{
|
||||||
|
if (server.cluster->slots[j] == curmaster)
|
||||||
|
newmaster = sender;
|
||||||
clusterDelSlot(j);
|
clusterDelSlot(j);
|
||||||
clusterAddSlot(sender,j);
|
clusterAddSlot(sender,j);
|
||||||
update_state = update_config = 1;
|
update_state = update_config = 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* This node claims to no longer handling the slot,
|
/* This node claims to no longer handling the slot,
|
||||||
* however we don't change our config as this is likely
|
* however we don't change our config as this is likely:
|
||||||
* happening because a resharding is in progress, and
|
* 1) Rehashing in progress.
|
||||||
* it already knows where to redirect clients. */
|
* 2) Failover.
|
||||||
|
* In both cases we'll be informed about who is serving
|
||||||
|
* the slot eventually. In the meantime it's up to the
|
||||||
|
* original owner to try to redirect our clients to the
|
||||||
|
* right node. */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If at least one slot was reassigned from a node to another node
|
||||||
|
* with a greater configEpoch, it is possible that:
|
||||||
|
* 1) We are a master is left without slots. This means that we were
|
||||||
|
* failed over and we should turn into a replica of the new
|
||||||
|
* master.
|
||||||
|
* 2) We are a slave and our master is left without slots. We need
|
||||||
|
* to replicate to the new slots owner. */
|
||||||
|
if (newmaster && curmaster->numslots == 0) {
|
||||||
|
redisLog(REDIS_WARNING,"Configuration change detected. Reconfiguring myself as a replica of %.40s", sender->name);
|
||||||
|
clusterSetMaster(sender);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1681,7 +1681,6 @@ void clusterHandleSlaveFailover(void) {
|
|||||||
server.cluster->myself);
|
server.cluster->myself);
|
||||||
server.cluster->myself->flags &= ~REDIS_NODE_SLAVE;
|
server.cluster->myself->flags &= ~REDIS_NODE_SLAVE;
|
||||||
server.cluster->myself->flags |= REDIS_NODE_MASTER;
|
server.cluster->myself->flags |= REDIS_NODE_MASTER;
|
||||||
server.cluster->myself->flags |= REDIS_NODE_PROMOTED;
|
|
||||||
server.cluster->myself->slaveof = NULL;
|
server.cluster->myself->slaveof = NULL;
|
||||||
replicationUnsetMaster();
|
replicationUnsetMaster();
|
||||||
|
|
||||||
@ -2109,9 +2108,6 @@ void clusterSetMaster(clusterNode *n) {
|
|||||||
myself->flags &= ~REDIS_NODE_MASTER;
|
myself->flags &= ~REDIS_NODE_MASTER;
|
||||||
myself->flags |= REDIS_NODE_SLAVE;
|
myself->flags |= REDIS_NODE_SLAVE;
|
||||||
}
|
}
|
||||||
/* Clear the promoted flag anyway if we are a slave, to ensure it will
|
|
||||||
* be set only when the node turns into a master because of fail over. */
|
|
||||||
myself->flags &= ~REDIS_NODE_PROMOTED;
|
|
||||||
myself->slaveof = n;
|
myself->slaveof = n;
|
||||||
replicationSetMaster(n->ip, n->port);
|
replicationSetMaster(n->ip, n->port);
|
||||||
}
|
}
|
||||||
@ -2159,7 +2155,6 @@ sds clusterGenNodesDescription(int filter) {
|
|||||||
if (node->flags & REDIS_NODE_FAIL) ci = sdscat(ci,"fail,");
|
if (node->flags & REDIS_NODE_FAIL) ci = sdscat(ci,"fail,");
|
||||||
if (node->flags & REDIS_NODE_HANDSHAKE) ci =sdscat(ci,"handshake,");
|
if (node->flags & REDIS_NODE_HANDSHAKE) ci =sdscat(ci,"handshake,");
|
||||||
if (node->flags & REDIS_NODE_NOADDR) ci = sdscat(ci,"noaddr,");
|
if (node->flags & REDIS_NODE_NOADDR) ci = sdscat(ci,"noaddr,");
|
||||||
if (node->flags & REDIS_NODE_PROMOTED) ci = sdscat(ci,"promoted,");
|
|
||||||
if (ci[sdslen(ci)-1] == ',') ci[sdslen(ci)-1] = ' ';
|
if (ci[sdslen(ci)-1] == ',') ci[sdslen(ci)-1] = ' ';
|
||||||
|
|
||||||
/* Slave of... or just "-" */
|
/* Slave of... or just "-" */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user