From c89afc8e5db6166bd8855bf48d5d7f7cc16492ad Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 17 Sep 2014 11:10:09 +0200 Subject: [PATCH] Cluster: new option to work with partial slots coverage. --- redis.conf | 13 +++++++++++++ src/cluster.c | 14 ++++++++------ src/cluster.h | 1 + src/config.c | 15 +++++++++++++++ src/redis.c | 1 + src/redis.h | 2 ++ 6 files changed, 40 insertions(+), 6 deletions(-) diff --git a/redis.conf b/redis.conf index 0aa2dc9f..e03b3aa9 100644 --- a/redis.conf +++ b/redis.conf @@ -660,6 +660,19 @@ lua-time-limit 5000 # # cluster-migration-barrier 1 +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least an hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + # In order to setup your cluster make sure to read the documentation # available at http://redis.io web site. diff --git a/src/cluster.c b/src/cluster.c index 07eaa637..7f9047a9 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -3171,12 +3171,14 @@ void clusterUpdateState(void) { new_state = REDIS_CLUSTER_OK; /* Check if all the slots are covered. */ - for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) { - if (server.cluster->slots[j] == NULL || - server.cluster->slots[j]->flags & (REDIS_NODE_FAIL)) - { - new_state = REDIS_CLUSTER_FAIL; - break; + if (server.cluster_require_full_coverage) { + for (j = 0; j < REDIS_CLUSTER_SLOTS; j++) { + if (server.cluster->slots[j] == NULL || + server.cluster->slots[j]->flags & (REDIS_NODE_FAIL)) + { + new_state = REDIS_CLUSTER_FAIL; + break; + } } } diff --git a/src/cluster.h b/src/cluster.h index 16a060ec..adad0645 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -15,6 +15,7 @@ * multiplicators of the node timeout value (when ending with MULT). */ #define REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT 15000 #define REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY 10 /* Slave max data age factor. */ +#define REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE 1 #define REDIS_CLUSTER_FAIL_REPORT_VALIDITY_MULT 2 /* Fail report validity. */ #define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */ #define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */ diff --git a/src/config.c b/src/config.c index e699a7e5..db084793 100644 --- a/src/config.c +++ b/src/config.c @@ -429,6 +429,13 @@ void loadServerConfigFromString(char *config) { } else if (!strcasecmp(argv[0],"cluster-config-file") && argc == 2) { zfree(server.cluster_configfile); server.cluster_configfile = zstrdup(argv[1]); + } else if (!strcasecmp(argv[0],"cluster-require-full-coverage") && + argc == 2) + { + if ((server.cluster_require_full_coverage = yesnotoi(argv[1])) == -1) + { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"cluster-node-timeout") && argc == 2) { server.cluster_node_timeout = strtoll(argv[1],NULL,10); if (server.cluster_node_timeout <= 0) { @@ -918,6 +925,11 @@ void configSetCommand(redisClient *c) { ll < 0) goto badfmt; server.repl_min_slaves_max_lag = ll; refreshGoodSlavesCount(); + } else if (!strcasecmp(c->argv[2]->ptr,"cluster-require-full-coverage")) { + int yn = yesnotoi(o->ptr); + + if (yn == -1) goto badfmt; + server.cluster_require_full_coverage = yn; } else if (!strcasecmp(c->argv[2]->ptr,"cluster-node-timeout")) { if (getLongLongFromObject(o,&ll) == REDIS_ERR || ll <= 0) goto badfmt; @@ -1039,6 +1051,8 @@ void configGetCommand(redisClient *c) { config_get_numerical_field("cluster-slave-validity-factor",server.cluster_slave_validity_factor); /* Bool (yes/no) values */ + config_get_bool_field("cluster-require-full-coverage", + server.cluster_require_full_coverage); config_get_bool_field("no-appendfsync-on-rewrite", server.aof_no_fsync_on_rewrite); config_get_bool_field("slave-serve-stale-data", @@ -1806,6 +1820,7 @@ int rewriteConfig(char *path) { rewriteConfigNumericalOption(state,"lua-time-limit",server.lua_time_limit,REDIS_LUA_TIME_LIMIT); rewriteConfigYesNoOption(state,"cluster-enabled",server.cluster_enabled,0); rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,REDIS_DEFAULT_CLUSTER_CONFIG_FILE); + rewriteConfigYesNoOption(state,"cluster-require-full-coverage",server.cluster_require_full_coverage,REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE); rewriteConfigNumericalOption(state,"cluster-node-timeout",server.cluster_node_timeout,REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT); rewriteConfigNumericalOption(state,"cluster-migration-barrier",server.cluster_migration_barrier,REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER); rewriteConfigNumericalOption(state,"cluster-slave-validity-factor",server.cluster_slave_validity_factor,REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY); diff --git a/src/redis.c b/src/redis.c index a7e1937a..5ce7d1d9 100644 --- a/src/redis.c +++ b/src/redis.c @@ -1451,6 +1451,7 @@ void initServerConfig(void) { server.cluster_node_timeout = REDIS_CLUSTER_DEFAULT_NODE_TIMEOUT; server.cluster_migration_barrier = REDIS_CLUSTER_DEFAULT_MIGRATION_BARRIER; server.cluster_slave_validity_factor = REDIS_CLUSTER_DEFAULT_SLAVE_VALIDITY; + server.cluster_require_full_coverage = REDIS_CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE; server.cluster_configfile = zstrdup(REDIS_DEFAULT_CLUSTER_CONFIG_FILE); server.lua_caller = NULL; server.lua_time_limit = REDIS_LUA_TIME_LIMIT; diff --git a/src/redis.h b/src/redis.h index 61cd737b..7ad03dc9 100644 --- a/src/redis.h +++ b/src/redis.h @@ -858,6 +858,8 @@ struct redisServer { struct clusterState *cluster; /* State of the cluster */ int cluster_migration_barrier; /* Cluster replicas migration barrier. */ int cluster_slave_validity_factor; /* Slave max data age for failover. */ + int cluster_require_full_coverage; /* If true, put the cluster down if + there is at least an uncovered slot. */ /* Scripting */ lua_State *lua; /* The Lua interpreter. We use just one for all clients */ redisClient *lua_client; /* The "fake client" to query Redis from Lua */