diff --git a/redis.conf b/redis.conf index 1e1f5313..0ead6e30 100644 --- a/redis.conf +++ b/redis.conf @@ -906,6 +906,16 @@ lua-time-limit 5000 # # cluster-require-full-coverage yes +# This option, when set to yes, prevents slaves from trying to failover its +# master during master failures. However the master can still perform a +# manual failover, if forced to do so. +# +# This is useful in different scenarios, especially in the case of multiple +# data center operations, where we want one side to never be promoted if not +# in the case of a total DC failure. +# +# cluster-slave-no-failover no + # In order to setup your cluster make sure to read the documentation # available at http://redis.io web site. diff --git a/src/cluster.c b/src/cluster.c index ee5b6766..85fe265f 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -201,6 +201,8 @@ int clusterLoadConfig(char *filename) { n->flags |= CLUSTER_NODE_HANDSHAKE; } else if (!strcasecmp(s,"noaddr")) { n->flags |= CLUSTER_NODE_NOADDR; + } else if (!strcasecmp(s,"nofailover")) { + n->flags |= CLUSTER_NODE_NOFAILOVER; } else if (!strcasecmp(s,"noflags")) { /* nothing to do */ } else { @@ -407,6 +409,22 @@ int clusterLockConfig(char *filename) { return C_OK; } +/* Some flags (currently just the NOFAILOVER flag) may need to be updated + * in the "myself" node based on the current configuration of the node, + * that may change at runtime via CONFIG SET. This function changes the + * set of flags in myself->flags accordingly. */ +void clusterUpdateMyselfFlags(void) { + int oldflags = myself->flags; + int nofailover = server.cluster_slave_no_failover ? + CLUSTER_NODE_NOFAILOVER : 0; + myself->flags &= ~CLUSTER_NODE_NOFAILOVER; + myself->flags |= nofailover; + if (myself->flags != oldflags) { + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG| + CLUSTER_TODO_UPDATE_STATE); + } +} + void clusterInit(void) { int saveconf = 0; @@ -497,6 +515,7 @@ void clusterInit(void) { server.cluster->mf_end = 0; resetManualFailover(); + clusterUpdateMyselfFlags(); } /* Reset a node performing a soft or hard reset: @@ -1808,6 +1827,18 @@ int clusterProcessPacket(clusterLink *link) { } } + /* Copy the CLUSTER_NODE_NOFAILOVER flag from what the sender + * announced. This is a dynamic flag that we receive from the + * sender, and the latest status must be trusted. We need it to + * be propagated because the slave ranking used to understand the + * delay of each slave in the voting process, needs to know + * what are the instances really competing. */ + if (sender) { + int nofailover = flags & CLUSTER_NODE_NOFAILOVER; + sender->flags &= ~CLUSTER_NODE_NOFAILOVER; + sender->flags |= nofailover; + } + /* Update the node address if it changed. */ if (sender && type == CLUSTERMSG_TYPE_PING && !nodeInHandshake(sender) && @@ -2723,6 +2754,7 @@ int clusterGetSlaveRank(void) { myoffset = replicationGetSlaveOffset(); for (j = 0; j < master->numslaves; j++) if (master->slaves[j] != myself && + !nodeCantFailover(master->slaves[j]) && master->slaves[j]->repl_offset > myoffset) rank++; return rank; } @@ -2860,10 +2892,13 @@ void clusterHandleSlaveFailover(void) { * of an automatic or manual failover: * 1) We are a slave. * 2) Our master is flagged as FAIL, or this is a manual failover. - * 3) It is serving slots. */ + * 3) We don't have the no failover configuration set, and this is + * not a manual failover. + * 4) It is serving slots. */ if (nodeIsMaster(myself) || myself->slaveof == NULL || (!nodeFailed(myself->slaveof) && !manual_failover) || + (server.cluster_slave_no_failover && !manual_failover) || myself->slaveof->numslots == 0) { /* There are no reasons to failover, so we set the reason why we @@ -3239,6 +3274,9 @@ void clusterCron(void) { handshake_timeout = server.cluster_node_timeout; if (handshake_timeout < 1000) handshake_timeout = 1000; + /* Update myself flags. */ + clusterUpdateMyselfFlags(); + /* Check if we have disconnected nodes and re-establish the connection. * Also update a few stats while we are here, that can be used to make * better decisions in other part of the code. */ @@ -3837,7 +3875,8 @@ static struct redisNodeFlags redisNodeFlagsTable[] = { {CLUSTER_NODE_PFAIL, "fail?,"}, {CLUSTER_NODE_FAIL, "fail,"}, {CLUSTER_NODE_HANDSHAKE, "handshake,"}, - {CLUSTER_NODE_NOADDR, "noaddr,"} + {CLUSTER_NODE_NOADDR, "noaddr,"}, + {CLUSTER_NODE_NOFAILOVER, "nofailover,"} }; /* Concatenate the comma separated list of node flags to the given SDS diff --git a/src/cluster.h b/src/cluster.h index af85841c..f2b9a4ec 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -16,6 +16,7 @@ #define CLUSTER_DEFAULT_NODE_TIMEOUT 15000 #define CLUSTER_DEFAULT_SLAVE_VALIDITY 10 /* Slave max data age factor. */ #define CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE 1 +#define CLUSTER_DEFAULT_SLAVE_NO_FAILOVER 0 /* Failover by default. */ #define CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */ #define CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */ #define CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */ @@ -55,6 +56,7 @@ typedef struct clusterLink { #define CLUSTER_NODE_NOADDR 64 /* We don't know the address of this node */ #define CLUSTER_NODE_MEET 128 /* Send a MEET message to this node */ #define CLUSTER_NODE_MIGRATE_TO 256 /* Master elegible for replica migration. */ +#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failver. */ #define CLUSTER_NODE_NULL_NAME "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" #define nodeIsMaster(n) ((n)->flags & CLUSTER_NODE_MASTER) @@ -64,6 +66,7 @@ typedef struct clusterLink { #define nodeWithoutAddr(n) ((n)->flags & CLUSTER_NODE_NOADDR) #define nodeTimedOut(n) ((n)->flags & CLUSTER_NODE_PFAIL) #define nodeFailed(n) ((n)->flags & CLUSTER_NODE_FAIL) +#define nodeCantFailover(n) ((n)->flags & CLUSTER_NODE_NOFAILOVER) /* Reasons why a slave is not able to failover. */ #define CLUSTER_CANT_FAILOVER_NONE 0 diff --git a/src/config.c b/src/config.c index eddfe1f1..59553782 100644 --- a/src/config.c +++ b/src/config.c @@ -635,6 +635,14 @@ void loadServerConfigFromString(char *config) { err = "cluster slave validity factor must be zero or positive"; goto loaderr; } + } else if (!strcasecmp(argv[0],"cluster-slave-no-failover") && + argc == 2) + { + server.cluster_slave_no_failover = yesnotoi(argv[1]); + if (server.cluster_slave_no_failover == -1) { + err = "argument must be 'yes' or 'no'"; + goto loaderr; + } } else if (!strcasecmp(argv[0],"lua-time-limit") && argc == 2) { server.lua_time_limit = strtoll(argv[1],NULL,10); } else if (!strcasecmp(argv[0],"slowlog-log-slower-than") && @@ -997,6 +1005,8 @@ void configSetCommand(client *c) { "repl-diskless-sync",server.repl_diskless_sync) { } config_set_bool_field( "cluster-require-full-coverage",server.cluster_require_full_coverage) { + } config_set_bool_field( + "cluster-slave-no-failover",server.cluster_slave_no_failover) { } config_set_bool_field( "aof-rewrite-incremental-fsync",server.aof_rewrite_incremental_fsync) { } config_set_bool_field( @@ -1291,6 +1301,8 @@ void configGetCommand(client *c) { /* Bool (yes/no) values */ config_get_bool_field("cluster-require-full-coverage", server.cluster_require_full_coverage); + config_get_bool_field("cluster-slave-no-failover", + server.cluster_slave_no_failover); config_get_bool_field("no-appendfsync-on-rewrite", server.aof_no_fsync_on_rewrite); config_get_bool_field("slave-serve-stale-data", @@ -2023,6 +2035,7 @@ int rewriteConfig(char *path) { rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0); rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,CONFIG_DEFAULT_CLUSTER_CONFIG_FILE); rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE); + rewriteConfigYesNoOption(state,"cluster-slave-no-failover",server.cluster_slave_no_failover,CLUSTER_DEFAULT_SLAVE_NO_FAILOVER); rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,CLUSTER_DEFAULT_NODE_TIMEOUT); rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,CLUSTER_DEFAULT_MIGRATION_BARRIER); rewriteConfigNumericalOption(state,"cluster-slave-validity-factor",server.cluster_slave_validity_factor,CLUSTER_DEFAULT_SLAVE_VALIDITY); diff --git a/src/server.c b/src/server.c index 1a6f3038..85f05f1f 100644 --- a/src/server.c +++ b/src/server.c @@ -1451,6 +1451,7 @@ void initServerConfig(void) { server.cluster_migration_barrier = CLUSTER_DEFAULT_MIGRATION_BARRIER; server.cluster_slave_validity_factor = CLUSTER_DEFAULT_SLAVE_VALIDITY; server.cluster_require_full_coverage = CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE; + server.cluster_slave_no_failover = CLUSTER_DEFAULT_SLAVE_NO_FAILOVER; server.cluster_configfile = zstrdup(CONFIG_DEFAULT_CLUSTER_CONFIG_FILE); server.cluster_announce_ip = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_IP; server.cluster_announce_port = CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT; diff --git a/src/server.h b/src/server.h index 29919f5e..0668c375 100644 --- a/src/server.h +++ b/src/server.h @@ -1174,6 +1174,8 @@ struct redisServer { int cluster_slave_validity_factor; /* Slave max data age for failover. */ int cluster_require_full_coverage; /* If true, put the cluster down if there is at least an uncovered slot.*/ + int cluster_slave_no_failover; /* Prevent slave from starting a failover + if the master is in failure state. */ char *cluster_announce_ip; /* IP address to announce on cluster bus. */ int cluster_announce_port; /* base port to announce on cluster bus. */ int cluster_announce_bus_port; /* bus port to announce on cluster bus. */