Fix replicas migration by adding a new flag.

Some time ago I broken replicas migration (reported in #2924).
The idea was to prevent masters without replicas from getting replicas
because of replica migration, I remember it to create issues with tests,
but there is no clue in the commit message about why it was so
undesirable.

However my patch as a side effect totally ruined the concept of replicas
migration since we want it to work also for instances that, technically,
never had slaves in the past: promoted slaves.

So now instead the ability to be targeted by replicas migration, is a
new flag "migrate-to". It only applies to masters, and is set in the
following two cases:

1. When a master gets a slave, it is set.
2. When a slave turns into a master because of fail over, it is set.

This way replicas migration targets are only masters that used to have
slaves, and slaves of masters (that used to have slaves... obviously)
and are promoted.

The new flag is only internal, and is never exposed in the output nor
persisted in the nodes configuration, since all the information to
handle it are implicit in the cluster configuration we already have.
This commit is contained in:
antirez 2015-12-09 22:52:53 +01:00
parent f1472252eb
commit e0f22df995
2 changed files with 31 additions and 20 deletions

View File

@ -783,6 +783,8 @@ int clusterNodeRemoveSlave(clusterNode *master, clusterNode *slave) {
(sizeof(*master->slaves) * remaining_slaves)); (sizeof(*master->slaves) * remaining_slaves));
} }
master->numslaves--; master->numslaves--;
if (master->numslaves == 0)
master->flags &= ~CLUSTER_NODE_MIGRATE_TO;
return C_OK; return C_OK;
} }
} }
@ -799,6 +801,7 @@ int clusterNodeAddSlave(clusterNode *master, clusterNode *slave) {
sizeof(clusterNode*)*(master->numslaves+1)); sizeof(clusterNode*)*(master->numslaves+1));
master->slaves[master->numslaves] = slave; master->slaves[master->numslaves] = slave;
master->numslaves++; master->numslaves++;
master->flags |= CLUSTER_NODE_MIGRATE_TO;
return C_OK; return C_OK;
} }
@ -1413,7 +1416,10 @@ int nodeUpdateAddressIfNeeded(clusterNode *node, clusterLink *link, int port) {
void clusterSetNodeAsMaster(clusterNode *n) { void clusterSetNodeAsMaster(clusterNode *n) {
if (nodeIsMaster(n)) return; if (nodeIsMaster(n)) return;
if (n->slaveof) clusterNodeRemoveSlave(n->slaveof,n); if (n->slaveof) {
clusterNodeRemoveSlave(n->slaveof,n);
if (n != myself) n->flags |= CLUSTER_NODE_MIGRATE_TO;
}
n->flags &= ~CLUSTER_NODE_SLAVE; n->flags &= ~CLUSTER_NODE_SLAVE;
n->flags |= CLUSTER_NODE_MASTER; n->flags |= CLUSTER_NODE_MASTER;
n->slaveof = NULL; n->slaveof = NULL;
@ -1432,8 +1438,8 @@ void clusterSetNodeAsMaster(clusterNode *n) {
* node (see the function comments for more info). * node (see the function comments for more info).
* *
* The 'sender' is the node for which we received a configuration update. * The 'sender' is the node for which we received a configuration update.
* Sometimes it is not actually the "Sender" of the information, like in the case * Sometimes it is not actually the "Sender" of the information, like in the
* we receive the info via an UPDATE packet. */ * case we receive the info via an UPDATE packet. */
void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) { void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoch, unsigned char *slots) {
int j; int j;
clusterNode *curmaster, *newmaster = NULL; clusterNode *curmaster, *newmaster = NULL;
@ -1764,7 +1770,8 @@ int clusterProcessPacket(clusterLink *link) {
if (nodeIsMaster(sender)) { if (nodeIsMaster(sender)) {
/* Master turned into a slave! Reconfigure the node. */ /* Master turned into a slave! Reconfigure the node. */
clusterDelNodeSlots(sender); clusterDelNodeSlots(sender);
sender->flags &= ~CLUSTER_NODE_MASTER; sender->flags &= ~(CLUSTER_NODE_MASTER|
CLUSTER_NODE_MIGRATE_TO);
sender->flags |= CLUSTER_NODE_SLAVE; sender->flags |= CLUSTER_NODE_SLAVE;
/* Remove the list of slaves from the node. */ /* Remove the list of slaves from the node. */
@ -2863,7 +2870,7 @@ void clusterHandleSlaveFailover(void) {
* Slave migration is the process that allows a slave of a master that is * Slave migration is the process that allows a slave of a master that is
* already covered by at least another slave, to "migrate" to a master that * already covered by at least another slave, to "migrate" to a master that
* is orpaned, that is, left with no working slaves. * is orpaned, that is, left with no working slaves.
* -------------------------------------------------------------------------- */ * ------------------------------------------------------------------------- */
/* This function is responsible to decide if this replica should be migrated /* This function is responsible to decide if this replica should be migrated
* to a different (orphaned) master. It is called by the clusterCron() function * to a different (orphaned) master. It is called by the clusterCron() function
@ -2918,10 +2925,10 @@ void clusterHandleSlaveMigration(int max_slaves) {
/* Only iterate over working masters. */ /* Only iterate over working masters. */
if (nodeIsSlave(node) || nodeFailed(node)) continue; if (nodeIsSlave(node) || nodeFailed(node)) continue;
/* If this master never had slaves so far, don't migrate. We want /* We want to migrate only if this master used to have slaves or
* to migrate to a master that remained orphaned, not masters that * if failed over a master that had slaves. This way we only migrate
* were never configured to have slaves. */ * to instances that were supposed to have replicas. */
if (node->numslaves == 0) continue; if (!(node->flags & CLUSTER_NODE_MIGRATE_TO)) continue;
okslaves = clusterCountNonFailingSlaves(node); okslaves = clusterCountNonFailingSlaves(node);
if (okslaves == 0 && target == NULL && node->numslots > 0) if (okslaves == 0 && target == NULL && node->numslots > 0)
@ -3169,9 +3176,12 @@ void clusterCron(void) {
/* A master is orphaned if it is serving a non-zero number of /* A master is orphaned if it is serving a non-zero number of
* slots, have no working slaves, but used to have at least one * slots, have no working slaves, but used to have at least one
* slave. */ * slave, or failed over a master that used to have slaves. */
if (okslaves == 0 && node->numslots > 0 && node->numslaves) if (okslaves == 0 && node->numslots > 0 &&
node->flags & CLUSTER_NODE_MIGRATE_TO)
{
orphaned_masters++; orphaned_masters++;
}
if (okslaves > max_slaves) max_slaves = okslaves; if (okslaves > max_slaves) max_slaves = okslaves;
if (nodeIsSlave(myself) && myself->slaveof == node) if (nodeIsSlave(myself) && myself->slaveof == node)
this_slaves = okslaves; this_slaves = okslaves;
@ -3258,6 +3268,7 @@ void clusterCron(void) {
* the orphaned masters. Note that it does not make sense to try * the orphaned masters. Note that it does not make sense to try
* a migration if there is no master with at least *two* working * a migration if there is no master with at least *two* working
* slaves. */ * slaves. */
if (orphaned_masters) serverLog(LL_WARNING,"0");
if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves) if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves)
clusterHandleSlaveMigration(max_slaves); clusterHandleSlaveMigration(max_slaves);
} }
@ -3573,7 +3584,7 @@ void clusterSetMaster(clusterNode *n) {
serverAssert(myself->numslots == 0); serverAssert(myself->numslots == 0);
if (nodeIsMaster(myself)) { if (nodeIsMaster(myself)) {
myself->flags &= ~CLUSTER_NODE_MASTER; myself->flags &= ~(CLUSTER_NODE_MASTER|CLUSTER_NODE_MIGRATE_TO);
myself->flags |= CLUSTER_NODE_SLAVE; myself->flags |= CLUSTER_NODE_SLAVE;
clusterCloseAllSlots(); clusterCloseAllSlots();
} else { } else {

View File

@ -53,7 +53,7 @@ typedef struct clusterLink {
#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */ #define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
#define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */ #define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */
#define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */ #define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */
#define CLUSTER_NODE_PROMOTED 256 /* Master was a slave promoted by failover */ #define CLUSTER_NODE_MIGRATE_TO 256 /* Master elegible for replica migration. */
#define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" #define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
#define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER) #define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER)