1
0
mirror of https://github.com/fluencelabs/redis synced 2025-03-31 14:51:04 +00:00

Cluster: ability to prevent slaves from failing over their masters.

This commit, in some parts derived from PR  which is no longer
possible to merge (because the user deleted the original branch),
implements the ability of slaves to have a special configuration
preventing that they try to start a failover when the master is failing.

There are multiple reasons for wanting this, and the feautre was
requested in issue  time ago.

The differences between this patch and the original PR are the
following:

1. The flag is saved/loaded on the nodes configuration.
2. The 'myself' node is now flag-aware, the flag is updated as needed
   when the configuration is changed via CONFIG SET.
3. The flag name uses NOFAILOVER instead of NO_FAILOVER to be consistent
   with existing NOADDR.
4. The redis.conf documentation was rewritten.

Thanks to @deep011 for the original patch.
This commit is contained in:
antirez 2018-03-14 13:46:36 +01:00
parent 84b281209a
commit 432bf4770e
6 changed files with 70 additions and 2 deletions

@ -906,6 +906,16 @@ lua-time-limit 5000
#
# cluster-require-full-coverage yes
# This option, when set to yes, prevents slaves from trying to failover its
# master during master failures. However the master can still perform a
# manual failover, if forced to do so.
#
# This is useful in different scenarios, especially in the case of multiple
# data center operations, where we want one side to never be promoted if not
# in the case of a total DC failure.
#
# cluster-slave-no-failover no
# In order to setup your cluster make sure to read the documentation
# available at http://redis.io web site.

@ -201,6 +201,8 @@ int clusterLoadConfig(char *filename) {
n->flags |= CLUSTER_NODE_HANDSHAKE;
} else if (!strcasecmp(s,"noaddr")) {
n->flags |= CLUSTER_NODE_NOADDR;
} else if (!strcasecmp(s,"nofailover")) {
n->flags |= CLUSTER_NODE_NOFAILOVER;
} else if (!strcasecmp(s,"noflags")) {
/* nothing to do */
} else {
@ -407,6 +409,22 @@ int clusterLockConfig(char *filename) {
return C_OK;
}
/* Some flags (currently just the NOFAILOVER flag) may need to be updated
* in the "myself" node based on the current configuration of the node,
* that may change at runtime via CONFIG SET. This function changes the
* set of flags in myself->flags accordingly. */
void clusterUpdateMyselfFlags(void) {
int oldflags = myself->flags;
int nofailover = server.cluster_slave_no_failover ?
CLUSTER_NODE_NOFAILOVER : 0;
myself->flags &= ~CLUSTER_NODE_NOFAILOVER;
myself->flags |= nofailover;
if (myself->flags != oldflags) {
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE);
}
}
void clusterInit(void) {
int saveconf = 0;
@ -497,6 +515,7 @@ void clusterInit(void) {
server.cluster->mf_end = 0;
resetManualFailover();
clusterUpdateMyselfFlags();
}
/* Reset a node performing a soft or hard reset:
@ -1808,6 +1827,18 @@ int clusterProcessPacket(clusterLink *link) {
}
}
/* Copy the CLUSTER_NODE_NOFAILOVER flag from what the sender
* announced. This is a dynamic flag that we receive from the
* sender, and the latest status must be trusted. We need it to
* be propagated because the slave ranking used to understand the
* delay of each slave in the voting process, needs to know
* what are the instances really competing. */
if (sender) {
int nofailover = flags & CLUSTER_NODE_NOFAILOVER;
sender->flags &= ~CLUSTER_NODE_NOFAILOVER;
sender->flags |= nofailover;
}
/* Update the node address if it changed. */
if (sender && type == CLUSTERMSG_TYPE_PING &&
!nodeInHandshake(sender) &&
@ -2723,6 +2754,7 @@ int clusterGetSlaveRank(void) {
myoffset = replicationGetSlaveOffset();
for (j = 0; j < master->numslaves; j++)
if (master->slaves[j] != myself &&
!nodeCantFailover(master->slaves[j]) &&
master->slaves[j]->repl_offset > myoffset) rank++;
return rank;
}
@ -2860,10 +2892,13 @@ void clusterHandleSlaveFailover(void) {
* of an automatic or manual failover:
* 1) We are a slave.
* 2) Our master is flagged as FAIL, or this is a manual failover.
* 3) It is serving slots. */
* 3) We don't have the no failover configuration set, and this is
* not a manual failover.
* 4) It is serving slots. */
if (nodeIsMaster(myself) ||
myself->slaveof == NULL ||
(!nodeFailed(myself->slaveof) && !manual_failover) ||
(server.cluster_slave_no_failover && !manual_failover) ||
myself->slaveof->numslots == 0)
{
/* There are no reasons to failover, so we set the reason why we
@ -3239,6 +3274,9 @@ void clusterCron(void) {
handshake_timeout = server.cluster_node_timeout;
if (handshake_timeout < 1000) handshake_timeout = 1000;
/* Update myself flags. */
clusterUpdateMyselfFlags();
/* Check if we have disconnected nodes and re-establish the connection.
* Also update a few stats while we are here, that can be used to make
* better decisions in other part of the code. */
@ -3837,7 +3875,8 @@ static struct redisNodeFlags redisNodeFlagsTable[] = {
{CLUSTER_NODE_PFAIL, "fail?,"},
{CLUSTER_NODE_FAIL, "fail,"},
{CLUSTER_NODE_HANDSHAKE, "handshake,"},
{CLUSTER_NODE_NOADDR, "noaddr,"}
{CLUSTER_NODE_NOADDR, "noaddr,"},
{CLUSTER_NODE_NOFAILOVER, "nofailover,"}
};
/* Concatenate the comma separated list of node flags to the given SDS

@ -16,6 +16,7 @@
#define CLUSTER_DEFAULT_NODE_TIMEOUT 15000
#define CLUSTER_DEFAULT_SLAVE_VALIDITY 10 /* Slave max data age factor. */
#define CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE 1
#define CLUSTER_DEFAULT_SLAVE_NO_FAILOVER 0 /* Failover by default. */
#define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */
#define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
#define CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
@ -55,6 +56,7 @@ typedef struct clusterLink {
#define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */
#define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */
#define CLUSTER_NODE_MIGRATE_TO 256 /* Master elegible for replica migration. */
#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failver. */
#define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000"
#define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER)
@ -64,6 +66,7 @@ typedef struct clusterLink {
#define nodeWithoutAddr(n) ((n)->flags & CLUSTER_NODE_NOADDR)
#define nodeTimedOut(n) ((n)->flags & CLUSTER_NODE_PFAIL)
#define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL)
#define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER)
/* Reasons why a slave is not able to failover. */
#define CLUSTER_CANT_FAILOVER_NONE 0

@ -635,6 +635,14 @@ void loadServerConfigFromString(char *config) {
err = "cluster slave validity factor must be zero or positive";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"cluster-slave-no-failover") &&
argc == 2)
{
server.cluster_slave_no_failover = yesnotoi(argv[1]);
if (server.cluster_slave_no_failover == -1) {
err = "argument must be 'yes' or 'no'";
goto loaderr;
}
} else if (!strcasecmp(argv[0],"lua-time-limit") && argc == 2) {
server.lua_time_limit = strtoll(argv[1],NULL,10);
} else if (!strcasecmp(argv[0],"slowlog-log-slower-than") &&
@ -997,6 +1005,8 @@ void configSetCommand(client *c) {
"repl-diskless-sync",server.repl_diskless_sync) {
} config_set_bool_field(
"cluster-require-full-coverage",server.cluster_require_full_coverage) {
} config_set_bool_field(
"cluster-slave-no-failover",server.cluster_slave_no_failover) {
} config_set_bool_field(
"aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) {
} config_set_bool_field(
@ -1291,6 +1301,8 @@ void configGetCommand(client *c) {
/* Bool (yes/no) values */
config_get_bool_field("cluster-require-full-coverage",
server.cluster_require_full_coverage);
config_get_bool_field("cluster-slave-no-failover",
server.cluster_slave_no_failover);
config_get_bool_field("no-appendfsync-on-rewrite",
server.aof_no_fsync_on_rewrite);
config_get_bool_field("slave-serve-stale-data",
@ -2023,6 +2035,7 @@ int rewriteConfig(char *path) {
rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0);
rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,CONFIG_DEFAULT_CLUSTER_CONFIG_FILE);
rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE);
rewriteConfigYesNoOption(state,"cluster-slave-no-failover",server.cluster_slave_no_failover,CLUSTER_DEFAULT_SLAVE_NO_FAILOVER);
rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,CLUSTER_DEFAULT_NODE_TIMEOUT);
rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,CLUSTER_DEFAULT_MIGRATION_BARRIER);
rewriteConfigNumericalOption(state,"cluster-slave-validity-factor",server.cluster_slave_validity_factor,CLUSTER_DEFAULT_SLAVE_VALIDITY);

@ -1451,6 +1451,7 @@ void initServerConfig(void) {
server.cluster_migration_barrier = CLUSTER_DEFAULT_MIGRATION_BARRIER;
server.cluster_slave_validity_factor = CLUSTER_DEFAULT_SLAVE_VALIDITY;
server.cluster_require_full_coverage = CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE;
server.cluster_slave_no_failover = CLUSTER_DEFAULT_SLAVE_NO_FAILOVER;
server.cluster_configfile = zstrdup(CONFIG_DEFAULT_CLUSTER_CONFIG_FILE);
server.cluster_announce_ip = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_IP;
server.cluster_announce_port = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT;

@ -1174,6 +1174,8 @@ struct redisServer {
int cluster_slave_validity_factor; /* Slave max data age for failover. */
int cluster_require_full_coverage; /* If true, put the cluster down if
there is at least an uncovered slot.*/
int cluster_slave_no_failover; /* Prevent slave from starting a failover
if the master is in failure state. */
char *cluster_announce_ip; /* IP address to announce on cluster bus. */
int cluster_announce_port; /* base port to announce on cluster bus. */
int cluster_announce_bus_port; /* bus port to announce on cluster bus. */