From 1dd67ebceb2b44d202d09400b9bf02af62c35362 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 12 Jan 2018 11:06:24 +0100 Subject: [PATCH 01/66] Cluster Manager mode --- src/redis-cli.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 372d02d9..59abd571 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -65,6 +65,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -77,6 +78,16 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; +/* Cluster Manager command info */ +struct clusterManagerCommand { + char *name; + int argc; + char **argv; + int flags; + int replicas; +}; + + static redisContext *context; static struct config { char *hostip; @@ -119,8 +130,29 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + struct clusterManagerCommand cluster_manager_command; } config; +/* Cluster Manager commands. */ +typedef int clusterManagerCommandProc(int argc, char **argv); +static struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; +}; + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("CLUSTER: create\n"); + printf("Arguments: %d\n", argc); + printf("Replicas: %d\n", config.cluster_manager_command.replicas); + fprintf(stderr, "Not implemented yet!\n"); + return 0; +} + +struct clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2} +}; + /* User preferences. */ static struct pref { int hints; @@ -1061,6 +1093,13 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + struct clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; +} + static int parseOptions(int argc, char **argv) { int i; @@ -1146,6 +1185,18 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--cluster") && !lastarg) { + if (CLUSTER_MANAGER_MODE()) usage(); + char *cmd = argv[++i]; + int j = i; + for (; j < argc; j++) if (argv[j][0] == '-') break; + j--; + createClusterManagerCommand(cmd, j - i, argv + i); + i = j; + } else if (!strcmp(argv[i],"--cluster") && lastarg) { + usage(); + } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { + config.cluster_manager_command.replicas = atoi(argv[++i]); } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1243,9 +1294,13 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" +" --cluster [args...]\n" +" Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" +"Cluster Manager Commands:\n" +"\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" " redis-cli get mypasswd\n" @@ -1569,6 +1624,43 @@ static int evalMode(int argc, char **argv) { return retval; } +/*------------------------------------------------------------------------------ + * Cluster Manager mode + *--------------------------------------------------------------------------- */ + +static clusterManagerCommandProc *validateClusterManagerCommand(void) { + int i, commands_count = sizeof(clusterManagerCommands) / + sizeof(struct clusterManagerCommandDef); + clusterManagerCommandProc *proc = NULL; + char *cmdname = config.cluster_manager_command.name; + int argc = config.cluster_manager_command.argc; + for (i = 0; i < commands_count; i++) { + struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + if (!strcmp(cmddef.name, cmdname)) { + if ((cmddef.arity > 0 && argc != cmddef.arity) || + (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { + fprintf(stderr, "[ERR] Wrong number of arguments for " + "specified --cluster sub command\n"); + return NULL; + } + proc = cmddef.proc; + } + } + if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n"); + return proc; +} + +static void clusterManagerMode(clusterManagerCommandProc *proc) { + int argc = config.cluster_manager_command.argc; + char **argv = config.cluster_manager_command.argv; + if (!proc(argc, argv)) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + exit(0); +} + /*------------------------------------------------------------------------------ * Latency and latency history modes *--------------------------------------------------------------------------- */ @@ -2861,7 +2953,11 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; - + config.cluster_manager_command.name = NULL; + config.cluster_manager_command.argc = 0; + config.cluster_manager_command.argv = NULL; + config.cluster_manager_command.flags = 0; + config.cluster_manager_command.replicas = 0; pref.hints = 1; spectrum_palette = spectrum_palette_color; @@ -2877,6 +2973,17 @@ int main(int argc, char **argv) { argc -= firstarg; argv += firstarg; + /* Cluster Manager mode */ + if (CLUSTER_MANAGER_MODE()) { + clusterManagerCommandProc *proc = validateClusterManagerCommand(); + if (!proc) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + clusterManagerMode(proc); + } + /* Latency mode */ if (config.latency_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From bafdc1a56cbb8c56b28d144789e986c3598ee5c7 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 16:26:21 +0100 Subject: [PATCH 02/66] Cluster Manager: 'create', 'info' and 'check' commands --- src/Makefile | 2 +- src/redis-cli.c | 1297 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 1272 insertions(+), 27 deletions(-) diff --git a/src/Makefile b/src/Makefile index b896b126..a5e0e231 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.c REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 59abd571..ef917cca 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -41,13 +41,15 @@ #include #include #include -#include +#include #include #include #include #include #include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ +#include "dict.h" +#include "adlist.h" #include "zmalloc.h" #include "linenoise.h" #include "help.h" @@ -65,7 +67,64 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) +#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_COMMAND(n,...) \ + (reconnectingRedisCommand(n->context, __VA_ARGS__)) +#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) + +#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ + memset(n->slots, 0, sizeof(n->slots)); \ + n->slots_count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ + array->alloc = array->nodes; \ + array->len = alloc_len; \ + array->count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ + if (array->nodes > array->alloc) { \ + array->len = array->nodes - array->alloc; \ + array->nodes = array->alloc; \ + array->count = 0; \ + int i = 0; \ + for(; i < array->len; i++) { \ + if (array->nodes[i] != NULL) array->count++;\ + } \ + } \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) + +#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ + assert(array->nodes < (array->nodes + array->len)); \ + if (*array->nodes != NULL) array->count--; \ + nodeptr = *array->nodes; \ + array->nodes++; \ + array->len--; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ + assert(array->nodes < (array->nodes + array->len)); \ + assert(nodeptr != NULL); \ + array->nodes[array->count++] = nodeptr; \ +} while(0) + +#define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ + fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + +#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 +#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2 +#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3 +#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 +#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -79,13 +138,13 @@ int *spectrum_palette; int spectrum_palette_size; /* Cluster Manager command info */ -struct clusterManagerCommand { +typedef struct clusterManagerCommand { char *name; int argc; char **argv; int flags; int replicas; -}; +} clusterManagerCommand; static redisContext *context; @@ -130,28 +189,70 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; - struct clusterManagerCommand cluster_manager_command; + clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager commands. */ +/* Cluster Manager */ + +static struct clusterManager { + list *nodes; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; + int dirty; + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + list *friends; +} clusterManagerNode; + +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; + clusterManagerNode **alloc; + int len; + int count; +} clusterManagerNodeArray; + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); typedef int clusterManagerCommandProc(int argc, char **argv); -static struct clusterManagerCommandDef { +typedef struct clusterManagerCommandDef { char *name; clusterManagerCommandProc *proc; int arity; -}; + char *args; + char *options; +} clusterManagerCommandDef; +static int clusterManagerIsConfigConsistent(void); -static int clusterManagerCommandCreate(int argc, char **argv) { - printf("CLUSTER: create\n"); - printf("Arguments: %d\n", argc); - printf("Replicas: %d\n", config.cluster_manager_command.replicas); - fprintf(stderr, "Not implemented yet!\n"); - return 0; -} +/* Cluster Manager commands. */ -struct clusterManagerCommandDef clusterManagerCommands[] = { - {"create", clusterManagerCommandCreate, -2} -}; +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ static struct pref { @@ -165,6 +266,9 @@ char *redisGitSHA1(void); char *redisGitDirty(void); static int cliConnect(int force); +static char *getInfoField(char *info, char *field); +static long getLongInfoField(char *info, char *field); + /*------------------------------------------------------------------------------ * Utility functions *--------------------------------------------------------------------------- */ @@ -317,6 +421,36 @@ static void parseRedisUri(const char *uri) { config.dbnum = atoi(curr); } +static uint64_t dictSdsHash(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +static void dictSdsDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + sdsfree(val); +} + +/* _serverAssert is needed by dict */ +void _serverAssert(const char *estr, const char *file, int line) { + fprintf(stderr, "=== ASSERTION FAILED ==="); + fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr); + *((char*)-1) = 'x'; +} + /*------------------------------------------------------------------------------ * Help functions *--------------------------------------------------------------------------- */ @@ -1094,7 +1228,7 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. *--------------------------------------------------------------------------- */ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - struct clusterManagerCommand *cmd = &config.cluster_manager_command; + clusterManagerCommand *cmd = &config.cluster_manager_command; cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; @@ -1191,7 +1325,7 @@ static int parseOptions(int argc, char **argv) { int j = i; for (; j < argc; j++) if (argv[j][0] == '-') break; j--; - createClusterManagerCommand(cmd, j - i, argv + i); + createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { usage(); @@ -1300,6 +1434,7 @@ static void usage(void) { " --version Output version and exit.\n" "\n" "Cluster Manager Commands:\n" +" Use --cluster help to list all available cluster manager commands.\n" "\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" @@ -1628,14 +1763,22 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", + "cluster-replicas"}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"help", clusterManagerCommandHelp, 0, NULL, NULL} +}; + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / - sizeof(struct clusterManagerCommandDef); + sizeof(clusterManagerCommandDef); clusterManagerCommandProc *proc = NULL; char *cmdname = config.cluster_manager_command.name; int argc = config.cluster_manager_command.argc; for (i = 0; i < commands_count; i++) { - struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + clusterManagerCommandDef cmddef = clusterManagerCommands[i]; if (!strcmp(cmddef.name, cmdname)) { if ((cmddef.arity > 0 && argc != cmddef.arity) || (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { @@ -1650,15 +1793,1117 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +static void freeClusterManagerNode(clusterManagerNode *node) { + if (node->context != NULL) redisFree(node->context); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *fn = ln->value; + freeClusterManagerNode(fn); + } + listRelease(node->friends); + node->friends = NULL; + } + if (node->name != NULL) sdsfree(node->name); + if (node->replicate != NULL) sdsfree(node->replicate); + if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) + sdsfree(node->ip); + zfree(node); +} + +static void freeClusterManager(void) { + if (cluster_manager.nodes != NULL) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + freeClusterManagerNode(n); + } + listRelease(cluster_manager.nodes); + cluster_manager.nodes = NULL; + } +} + +static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { + clusterManagerNode *node = zmalloc(sizeof(*node)); + node->context = NULL; + node->name = NULL; + node->ip = ip; + node->port = port; + node->current_epoch = 0; + node->ping_sent = 0; + node->ping_recv = 0; + node->flags = 0; + node->replicate = NULL; + node->dirty = 0; + node->friends = NULL; + CLUSTER_MANAGER_RESET_SLOTS(node); + return node; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + freeReplyObject(info); + return 0; + } + int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); + freeReplyObject(info); + return is_cluster; +} + +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0, is_empty = 1; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + if (strstr(info->str, "db0:") != NULL) { + is_empty = 0; + goto result; + } + freeReplyObject(info); + info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + long known_nodes = getLongInfoField(info->str, "cluster_known_nodes"); + is_empty = (known_nodes == 1); +result: + freeReplyObject(info); + return is_empty; +} + +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len) +{ + assert(offending != NULL); + int score = 0, i, j; + int node_len = cluster_manager.nodes->len; + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + clusterManagerNode **offending_p = *offending; + dictType dtype = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ + }; + for (i = 0; i < ip_len; i++) { + clusterManagerNodeArray *node_array = &(ipnodes[i]); + dict *related = dictCreate(&dtype, NULL); + char *ip = NULL; + for (j = 0; j < node_array->len; j++) { + clusterManagerNode *node = node_array->nodes[j]; + if (node == NULL) continue; + if (!ip) ip = node->ip; + sds types; + if (!node->replicate) { + assert(node->name != NULL); + dictEntry *entry = dictFind(related, node->name); + if (entry) types = (sds) dictGetVal(entry); + else types = sdsempty(); + types = sdscatprintf(types, "m%s", types); + dictReplace(related, node->name, types); + } else { + dictEntry *entry = dictFind(related, node->replicate); + if (entry) types = (sds) dictGetVal(entry); + else { + types = sdsempty(); + dictAdd(related, node->replicate, types); + } + sdscat(types, "s"); + } + } + dictIterator *iter = dictGetIterator(related); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds types = (sds) dictGetVal(entry); + sds name = (sds) dictGetKey(entry); + int typeslen = sdslen(types); + if (typeslen < 2) continue; + if (types[0] == 'm') score += (10000 * (typeslen - 1)); + else score += (1 * typeslen); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate == NULL) continue; + if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { + *(offending_p++) = n; + break; + } + } + } + if (offending_len != NULL) *offending_len = offending_p - *offending; + dictReleaseIterator(iter); + dictRelease(related); + } + return score; +} + +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len) +{ + clusterManagerNode **offenders = NULL, **aux; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + if (score == 0) goto cleanup; + printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + int node_len = cluster_manager.nodes->len; + int maxiter = 500 * node_len; + srand(time(NULL)); + while (maxiter > 0) { + int offending_len = 0; + if (offenders != NULL) { + zfree(offenders); + offenders = NULL; + } + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + &offending_len); + if (score == 0) break; + int rand_idx = rand() % offending_len; + clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode **other_replicas = zcalloc((node_len - 1) * + sizeof(*other_replicas)); + int other_replicas_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n != first && n->replicate != NULL) + other_replicas[other_replicas_count++] = n; + } + if (other_replicas_count == 0) { + zfree(other_replicas); + break; + } + rand_idx = rand() % other_replicas_count; + second = other_replicas[rand_idx]; + char *first_master = first->replicate, + *second_master = second->replicate; + first->replicate = second_master, first->dirty = 1; + second->replicate = first_master, second->dirty = 1; + zfree(aux), aux = NULL; + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + &aux, NULL); + if (new_score > score) { + first->replicate = first_master; + second->replicate = second_master; + } + zfree(other_replicas); + maxiter--; + } + zfree(aux), aux = NULL; + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + char *msg; + if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + else if (score >= 10000) + msg = ("[WARNING] Some slaves are in the same host as their master"); + else + msg=("[WARNING] Some slaves of the same master are in the same host"); + printf("%s\n", msg); +cleanup: + zfree(offenders); + zfree(aux); +} + +static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { + sds slots = sdsempty(); + int first_range_idx = -1, last_slot_idx = -1, i; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int has_slot = node->slots[i]; + if (has_slot) { + if (first_range_idx == -1) { + if (sdslen(slots)) slots = sdscat(slots, ","); + first_range_idx = i; + slots = sdscatfmt(slots, "[%u", i); + } + last_slot_idx = i; + } else { + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) + slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + last_slot_idx = -1; + first_range_idx = -1; + } + } + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + return slots; +} + +static sds clusterManagerNodeInfo(clusterManagerNode *node) { + sds info = sdsempty(); + int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); + char *role = (is_master ? "M" : "S"); + sds slots = NULL; + if (node->dirty && node->replicate != NULL) + info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); + else { + slots = clusterManagerNodeSlotsString(node); + info = sdscatfmt(info, "%s: %S %s:%u\n" + " slots:%S (%u slots) " + "", //TODO: flags string + role, node->name, node->ip, node->port, + slots, node->slots_count); + sdsfree(slots); + } + if (node->replicate != NULL) + info = sdscatfmt(info, "\n replicates %S", node->replicate); + //else if () {} //TODO: add replicas info + return info; +} + +static void clusterManagerShowNodes(void) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds info = clusterManagerNodeInfo(node); + printf("%s\n", info); + sdsfree(info); + } +} + +static void clusterManagerShowInfo(void) { + int masters = 0; + int keys = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) { + if (!node->name) continue; + int replicas = 0; + int dbsize = -1; + char name[9]; + memcpy(name, node->name, 8); + name[8] = '\0'; + listIter ri; + listNode *rn; + listRewind(cluster_manager.nodes, &ri); + while ((rn = listNext(&ri)) != NULL) { + clusterManagerNode *n = rn->value; + if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + continue; + if (n->replicate && !strcmp(n->replicate, node->name)) + replicas++; + } + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE"); + if (reply != NULL || reply->type == REDIS_REPLY_INTEGER) + dbsize = reply->integer; + if (dbsize < 0) { + char *err = ""; + if (reply != NULL && reply->type == REDIS_REPLY_ERROR) + err = reply->str; + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + if (reply != NULL) freeReplyObject(reply); + return; + }; + if (reply != NULL) freeReplyObject(reply); + printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n", + node->ip, node->port, name, dbsize, + node->slots_count, replicas); + masters++; + keys += dbsize; + } + } + printf("[OK] %d keys in %d masters.\n", keys, masters); + float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; + printf("%.2f keys per slot on average.\n", keys_per_slot); +} + +static int clusterManagerAddSlots(clusterManagerNode *node, char**err) +{ + redisReply *reply = NULL; + void *_reply = NULL; + int is_err = 0; + int argc; + sds *argv = NULL; + size_t *argvlen = NULL; + *err = NULL; + sds cmd = sdsnew("CLUSTER ADDSLOTS "); + int i, added = 0; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (node->slots[i]) { + char *fmt = (!last_slot ? "%u " : "%u"); + cmd = sdscatfmt(cmd, fmt, i); + added++; + } + } + if (!added) goto node_cmd_err; + argv = cliSplitArgs(cmd, &argc); + if (argc == 0 || argv == NULL) goto node_cmd_err; + argvlen = zmalloc(argc*sizeof(size_t)); + for (i = 0; i < argc; i++) + argvlen[i] = sdslen(argv[i]); + redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); + if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + reply = (redisReply*) _reply; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + sdsfree(cmd); + zfree(argvlen); + sdsfreesplitres(argv,argc); + freeReplyObject(reply); + return 1; +node_cmd_err: + sdsfree(cmd); + zfree(argvlen); + if (argv != NULL) sdsfreesplitres(argv,argc); + if (reply != NULL) freeReplyObject(reply); + return 0; +} + +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { + if (!node->dirty) return 0; + redisReply *reply = NULL; + int is_err = 0; + *err = NULL; + if (node->replicate != NULL) { + reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", + node->replicate); + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + } else { + int added = clusterManagerAddSlots(node, err); + if (!added || *err != NULL) goto node_cmd_err; + } + node->dirty = 0; + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static void clusterManagerWaitForClusterJoin(void) { + printf("Waiting for the cluster to join\n"); + while(!clusterManagerIsConfigConsistent()) { + printf("."); + fflush(stdout); + sleep(1); + } + printf("\n"); +} + +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + int is_err = 0; + *err = NULL; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); + char *lines = reply->str, *p, *line; + while ((p = strstr(lines, "\n")) != NULL) { + *p = '\0'; + line = lines; + lines = p + 1; + char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, + *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, + *link_status = NULL; + int i = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + switch(i++){ + case 0: name = token; break; + case 1: addr = token; break; + case 2: flags = token; break; + case 3: master_id = token; break; + case 4: ping_sent = token; break; + case 5: ping_recv = token; break; + case 6: config_epoch = token; break; + case 7: link_status = token; break; + } + if (i == 8) break; // Slots + } + if (!flags) goto node_cmd_err; + int myself = (strstr(flags, "myself") != NULL); + if (strstr(flags, "noaddr") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_FAIL; + clusterManagerNode *currentNode = NULL; + if (myself) { + node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; + currentNode = node; + CLUSTER_MANAGER_RESET_SLOTS(node); + if (i == 8) { + int remaining = strlen(line); + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + remaining -= (p - line); + + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] == '[') { + //TODO: migrating/importing + } else if ((p = strchr(slotsdef, '-')) != NULL) { + int start, stop; + *p = '\0'; + start = atoi(slotsdef); + stop = atoi(p + 1); + node->slots_count += (stop - (start - 1)); + while (start <= stop) node->slots[start++] = 1; + } else if (p > slotsdef) { + node->slots[atoi(slotsdef)] = 1; + node->slots_count++; + } + } + } + node->dirty = 0; + } else if (!getfriends) { + if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue; + else break; + } else { + if (addr == NULL) { + // TODO: find a better err message + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + *c = '\0'; + int port = atoi(++c); + currentNode = clusterManagerNewNode(sdsnew(addr), port); + currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND; + if (node->friends == NULL) node->friends = listCreate(); + listAddNodeTail(node->friends, currentNode); + } + if (name != NULL) currentNode->name = sdsnew(name); + if (strstr(flags, "slave") != NULL) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + } + if (config_epoch != NULL) + currentNode->current_epoch = atoll(config_epoch); + if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent); + if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); + if (!getfriends && myself) break; + } + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { + if (node->context == NULL) + node->context = redisConnect(node->ip, node->port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, + node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; + char *e = NULL; + if (!clusterManagerNodeIsCluster(node, &e)) { + char *msg = (e ? e : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + if (e) zfree(e); + freeClusterManagerNode(node); + return 0; + } + e = NULL; + if (!clusterManagerNodeLoadInfo(node, opts, &e)) { + if (e) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e); + zfree(e); + } + freeClusterManagerNode(node); + return 0; + } + cluster_manager.nodes = listCreate(); + listAddNodeTail(cluster_manager.nodes, node); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *friend = ln->value; + if (!friend->ip || !friend->port) continue; + if (!friend->context) + friend->context = redisConnect(friend->ip, friend->port); + if (friend->context->err) continue; + e = NULL; + if (clusterManagerNodeLoadInfo(friend, 0, &e)) { + if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | + CLUSTER_MANAGER_FLAG_DISCONNECT | + CLUSTER_MANAGER_FLAG_FAIL)) continue; + listAddNodeTail(cluster_manager.nodes, friend); + + } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + } + listRelease(node->friends); + node->friends = NULL; + } + return 1; +} + +int clusterManagerSlotCompare(const void *slot1, const void *slot2) { + const char **i1 = (const char **)slot1; + const char **i2 = (const char **)slot2; + return strcmp(*i1, *i2); +} + +static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { + sds signature = NULL; + int node_count = 0, i = 0, name_len = 0; + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + if (reply == NULL || reply->type == REDIS_REPLY_ERROR) + goto cleanup; + char *lines = reply->str, *p, *line; + char **node_configs = NULL; + while ((p = strstr(lines, "\n")) != NULL) { + i = 0; + *p = '\0'; + line = lines; + lines = p + 1; + char *nodename = NULL; + int tot_size = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + if (i == 0) { + nodename = token; + tot_size = p - token; + name_len = tot_size; + } else if (i == 8) break; + i++; + } + if (i != 8) continue; + if (nodename == NULL) continue; + int remaining = strlen(line); + if (remaining == 0) continue; + char **slots = NULL; + int c = 0; + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + int size = (p - line); + remaining -= size; + tot_size += size; + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] != '[') { + c++; + slots = zrealloc(slots, (c * sizeof(char *))); + slots[c - 1] = slotsdef; + } + } + if (c > 0) { + if (c > 1) + qsort(slots, c, sizeof(char *), clusterManagerSlotCompare); + node_count++; + node_configs = + zrealloc(node_configs, (node_count * sizeof(char *))); + tot_size += (sizeof(char) * (c - 1)); + char *cfg = zmalloc((sizeof(char) * tot_size) + 1); + memcpy(cfg, nodename, name_len); + char *sp = cfg + name_len; + *(sp++) = ':'; + for (i = 0; i < c; i++) { + if (i > 0) *(sp++) = '|'; + int slen = strlen(slots[i]); + memcpy(sp, slots[i], slen); + sp += slen; + } + *(sp++) = '\0'; + node_configs[node_count - 1] = cfg; + } + zfree(slots); + } + if (node_count > 0) { + if (node_count > 1) { + qsort(node_configs, node_count, sizeof(char *), + clusterManagerSlotCompare); + } + signature = sdsempty(); + for (i = 0; i < node_count; i++) { + if (i > 0) signature = sdscatprintf(signature, "%c", '|'); + signature = sdscatfmt(signature, "%s", node_configs[i]); + } + } +cleanup: + if (reply != NULL) freeReplyObject(reply); + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + return signature; +} + +static int clusterManagerIsConfigConsistent(void) { + if (cluster_manager.nodes == NULL) return 0; + int consistent = 0; + sds first_cfg = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds cfg = clusterManagerGetConfigSignature(node); + if (cfg == NULL) { + consistent = 0; + break; + } + if (first_cfg == NULL) first_cfg = cfg; + else { + consistent = !sdscmp(first_cfg, cfg); + sdsfree(cfg); + if (!consistent) break; + } + } + if (first_cfg != NULL) sdsfree(first_cfg); + return consistent; +} + +static void clusterManagerCheckCluster(int quiet) { + listNode *ln = listFirst(cluster_manager.nodes); + if (!ln) return; + clusterManagerNode *node = ln->value; + printf(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); + if (!quiet) clusterManagerShowNodes(); + if (!clusterManagerIsConfigConsistent()) + printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array + else + printf("[OK] All nodes agree about slots configuration.\n"); + //TODO:check_open_slots + //TODO:check_slots_coverage +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; - if (!proc(argc, argv)) { - sdsfree(config.hostip); - sdsfree(config.mb_delim); - exit(1); - } + cluster_manager.nodes = NULL; + if (!proc(argc, argv)) goto cluster_manager_err; + freeClusterManager(); exit(0); +cluster_manager_err: + freeClusterManager(); + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); +} + +/* Cluster Manager Commands */ + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("Cluster Manager: Creating Cluster\n"); + int i, j; + cluster_manager.nodes = listCreate(); + for (i = 0; i < argc; i++) { + char *addr = argv[i]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Invalid address format: %s\n", addr); + return 0; + } + *c = '\0'; + char *ip = addr; + int port = atoi(++c); + clusterManagerNode *node = clusterManagerNewNode(ip, port); + node->context = redisConnect(ip, port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + char *err = NULL; + if (!clusterManagerNodeIsCluster(node, &err)) { + char *msg = (err ? err : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeLoadInfo(node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeIsEmpty(node, &err)) { + char *msg; + if (err) msg = err; + else { + msg = " is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + listAddNodeTail(cluster_manager.nodes, node); + } + int node_len = cluster_manager.nodes->len; + int replicas = config.cluster_manager_command.replicas; + int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); + if (masters_count < 3) { + fprintf(stderr, + "*** ERROR: Invalid configuration for cluster creation.\n"); + fprintf(stderr, + "*** Redis Cluster requires at least 3 master nodes.\n"); + fprintf(stderr, + "*** This is not possible with %d nodes and %d replicas per node.", + node_len, replicas); + fprintf(stderr, "\n*** At least %d nodes are required.\n", + (3 * (replicas + 1))); + return 0; + } + printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + int interleaved_len = 0, ips_len = 0; + clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); + char **ips = zcalloc(node_len * sizeof(char*)); + clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + int found = 0; + for (i = 0; i < ips_len; i++) { + char *ip = ips[i]; + if (!strcmp(ip, n->ip)) { + found = 1; + break; + } + } + if (!found) { + ips[ips_len++] = n->ip; + } + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->nodes == NULL) + CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); + CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + } + while (interleaved_len < node_len) { + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->count > 0) { + clusterManagerNode *n; + CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + interleaved[interleaved_len++] = n; + } + } + } + clusterManagerNode **masters = interleaved; + interleaved += masters_count; + interleaved_len -= masters_count; + float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count; + long first = 0; + float cursor = 0.0f; + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + long last = lround(cursor + slots_per_node - 1); + if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1)) + last = CLUSTER_MANAGER_SLOTS - 1; + if (last < first) last = first; + printf("Master[%d] -> Slots %lu - %lu\n", i, first, last); + master->slots_count = 0; + for (j = first; j <= last; j++) { + master->slots[j] = 1; + master->slots_count++; + } + master->dirty = 1; + first = last + 1; + cursor += slots_per_node; + } + + int assign_unused = 0, available_count = interleaved_len; +assign_replicas: + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + int assigned_replicas = 0; + while (assigned_replicas < replicas) { + if (available_count == 0) break; + clusterManagerNode *found = NULL, *slave = NULL; + int firstNodeIdx = -1; + for (j = 0; j < interleaved_len; j++) { + clusterManagerNode *n = interleaved[j]; + if (n == NULL) continue; + if (strcmp(n->ip, master->ip)) { + found = n; + interleaved[j] = NULL; + break; + } + if (firstNodeIdx < 0) firstNodeIdx = j; + } + if (found) slave = found; + else if (firstNodeIdx >= 0) { + slave = interleaved[firstNodeIdx]; + interleaved_len -= (interleaved - (interleaved + firstNodeIdx)); + interleaved += (firstNodeIdx + 1); + } + if (slave != NULL) { + assigned_replicas++; + available_count--; + slave->replicate = sdsnew(master->name); + slave->dirty = 1; + } else break; + printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port, + master->ip, master->port); + if (assign_unused) break; + } + } + if (!assign_unused && available_count > 0) { + assign_unused = 1; + printf("Adding extra replicas...\n"); + goto assign_replicas; + } + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + } + clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerShowNodes(); + printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread != 0 && !strcmp("yes", buf)) { + printf("\nFlushing configuration!\n"); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && node->dirty && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + printf(">>> Nodes configuration updated\n"); + printf(">>> Assign a different config epoch to each node\n"); + int config_epoch = 1; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, + "cluster set-config-epoch %d", + config_epoch++); + if (reply != NULL) freeReplyObject(reply); + } + printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerNode *first = NULL; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (first == NULL) { + first = node; + continue; + } + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", + first->ip, first->port); + if (reply != NULL) freeReplyObject(reply); + } + // Give one second for the join to start, in order to avoid that + // waiting for cluster join will find all the nodes agree about + // the config as they are still empty with unassigned slots. + sleep(1); + clusterManagerWaitForClusterJoin(); + // Useful for the replicas //TODO: create a function for this? + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!node->dirty) continue; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + // Reset Nodes + listRewind(cluster_manager.nodes, &li); + clusterManagerNode *first_node = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!first_node) first_node = node; + else freeClusterManagerNode(node); + } + listEmpty(cluster_manager.nodes); + if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + clusterManagerCheckCluster(0); + } + /* Free everything */ + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 1; +cmd_err: + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 0; +} + +static int clusterManagerCommandInfo(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandCheck(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + clusterManagerCheckCluster(0); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandHelp(int argc, char **argv) { + UNUSED(argc); + UNUSED(argv); + int commands_count = sizeof(clusterManagerCommands) / + sizeof(clusterManagerCommandDef); + int i = 0, j; + fprintf(stderr, "Cluster Manager Commands:\n"); + for (; i < commands_count; i++) { + clusterManagerCommandDef *def = &(clusterManagerCommands[i]); + int namelen = strlen(def->name), padlen = 15 - namelen; + fprintf(stderr, " %s", def->name); + for (j = 0; j < padlen; j++) fprintf(stderr, " "); + fprintf(stderr, "%s\n", (def->args ? def->args : "")); + //TODO: if (def->options) + } + return 0; } /*------------------------------------------------------------------------------ From 74dcd14d1333bc312703de5ba143f41d6973815d Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 17:57:16 +0100 Subject: [PATCH 03/66] Added check for open slots (clusterManagerCheckCluster) --- src/redis-cli.c | 162 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 143 insertions(+), 19 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index ef917cca..456751f5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,6 +74,13 @@ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) +#define CLUSTER_MANAGER_ERROR(err) do { \ + if (cluster_manager.errors == NULL) \ + cluster_manager.errors = listCreate(); \ + listAddNodeTail(cluster_manager.errors, err); \ + fprintf(stderr, "%s\n", (char *) err); \ +} while(0) + #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ memset(n->slots, 0, sizeof(n->slots)); \ n->slots_count = 0; \ @@ -137,7 +144,14 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; -/* Cluster Manager command info */ +/* Dict Helpers */ + +static uint64_t dictSdsHash(const void *key); +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2); +static void dictSdsDestructor(void *privdata, void *val); + +/* Cluster Manager Command Info */ typedef struct clusterManagerCommand { char *name; int argc; @@ -196,6 +210,7 @@ static struct config { static struct clusterManager { list *nodes; + list *errors; } cluster_manager; typedef struct clusterManagerNode { @@ -212,6 +227,10 @@ typedef struct clusterManagerNode { uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; } clusterManagerNode; typedef struct clusterManagerNodeArray { @@ -221,6 +240,15 @@ typedef struct clusterManagerNodeArray { int count; } clusterManagerNodeArray; +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1810,13 +1838,22 @@ static void freeClusterManagerNode(clusterManagerNode *node) { if (node->replicate != NULL) sdsfree(node->replicate); if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) sdsfree(node->ip); + int i; + if (node->migrating != NULL) { + for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]); + zfree(node->migrating); + } + if (node->importing != NULL) { + for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); + zfree(node->importing); + } zfree(node); } static void freeClusterManager(void) { + listIter li; + listNode *ln; if (cluster_manager.nodes != NULL) { - listIter li; - listNode *ln; listRewind(cluster_manager.nodes,&li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; @@ -1825,9 +1862,18 @@ static void freeClusterManager(void) { listRelease(cluster_manager.nodes); cluster_manager.nodes = NULL; } + if (cluster_manager.errors != NULL) { + listRewind(cluster_manager.errors,&li); + while ((ln = listNext(&li)) != NULL) { + sds err = ln->value; + sdsfree(err); + } + listRelease(cluster_manager.errors); + cluster_manager.errors = NULL; + } } -static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { +static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNode *node = zmalloc(sizeof(*node)); node->context = NULL; node->name = NULL; @@ -1840,6 +1886,10 @@ static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { node->replicate = NULL; node->dirty = 0; node->friends = NULL; + node->migrating = NULL; + node->importing = NULL; + node->migrating_count = 0; + node->importing_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } @@ -1902,17 +1952,9 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int node_len = cluster_manager.nodes->len; *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); clusterManagerNode **offending_p = *offending; - dictType dtype = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ - }; for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); - dict *related = dictCreate(&dtype, NULL); + dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; for (j = 0; j < node_array->len; j++) { clusterManagerNode *node = node_array->nodes[j]; @@ -2291,7 +2333,32 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (remaining) line = p + 1; else line = p; if (slotsdef[0] == '[') { - //TODO: migrating/importing + slotsdef++; + if ((p = strstr(slotsdef, "->-"))) { // Migrating + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds dst = sdsnew(p); + node->migrating_count += 2; + node->migrating = zrealloc(node->migrating, + (node->migrating_count * sizeof(sds))); + node->migrating[node->migrating_count - 2] = + slot; + node->migrating[node->migrating_count - 1] = + dst; + } else if ((p = strstr(slotsdef, "-<-"))) {//Importing + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds src = sdsnew(p); + node->importing_count += 2; + node->importing = zrealloc(node->importing, + (node->importing_count * sizeof(sds))); + node->importing[node->importing_count - 2] = + slot; + node->importing[node->importing_count - 1] = + src; + } } else if ((p = strchr(slotsdef, '-')) != NULL) { int start, stop; *p = '\0'; @@ -2529,11 +2596,68 @@ static void clusterManagerCheckCluster(int quiet) { printf(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); if (!quiet) clusterManagerShowNodes(); - if (!clusterManagerIsConfigConsistent()) - printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array - else - printf("[OK] All nodes agree about slots configuration.\n"); - //TODO:check_open_slots + if (!clusterManagerIsConfigConsistent()) { + sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); + CLUSTER_MANAGER_ERROR(err); + } else printf("[OK] All nodes agree about slots configuration.\n"); + // Check open slots + listIter li; + listRewind(cluster_manager.nodes, &li); + int i; + dict *open_slots = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->migrating != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "migrating state ", + n->ip, + n->port); + for (i = 0; i < n->migrating_count; i += 2) { + sds slot = n->migrating[i]; + dictAdd(open_slots, slot, n->migrating[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + if (n->importing != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "importing state ", + n->ip, + n->port); + for (i = 0; i < n->importing_count; i += 2) { + sds slot = n->importing[i]; + dictAdd(open_slots, slot, n->importing[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + } + if (open_slots != NULL) { + dictIterator *iter = dictGetIterator(open_slots); + dictEntry *entry; + sds errstr = sdsnew("[WARNING] The following slots are open: "); + i = 0; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + char *fmt = (i++ > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + fprintf(stderr, "%s.\n", (char *) errstr); + sdsfree(errstr); + dictRelease(open_slots); + } //TODO:check_slots_coverage } From d38045805d45a1b990f38d7c2c2edabb9912f711 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 19:25:02 +0100 Subject: [PATCH 04/66] - Cluster Manager: fixed various memory leaks - Cluster Manager: fixed flags assignment in clusterManagerNodeLoadInfo --- src/redis-cli.c | 54 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 456751f5..4c30067b 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2310,12 +2310,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (!flags) goto node_cmd_err; int myself = (strstr(flags, "myself") != NULL); - if (strstr(flags, "noaddr") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_FAIL; clusterManagerNode *currentNode = NULL; if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; @@ -2396,10 +2390,22 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (node->friends == NULL) node->friends = listCreate(); listAddNodeTail(node->friends, currentNode); } - if (name != NULL) currentNode->name = sdsnew(name); + if (name != NULL) { + if (currentNode->name) sdsfree(currentNode->name); + currentNode->name = sdsnew(name); + } + if (strstr(flags, "noaddr") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; if (strstr(flags, "slave") != NULL) { currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + if (master_id != NULL) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -2442,27 +2448,39 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { freeClusterManagerNode(node); return 0; } + listIter li; + listNode *ln; + if (cluster_manager.nodes != NULL) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) + freeClusterManagerNode((clusterManagerNode *) ln->value); + listRelease(cluster_manager.nodes); + } cluster_manager.nodes = listCreate(); listAddNodeTail(cluster_manager.nodes, node); if (node->friends != NULL) { - listIter li; - listNode *ln; listRewind(node->friends, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *friend = ln->value; - if (!friend->ip || !friend->port) continue; + if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) friend->context = redisConnect(friend->ip, friend->port); - if (friend->context->err) continue; + if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | CLUSTER_MANAGER_FLAG_DISCONNECT | - CLUSTER_MANAGER_FLAG_FAIL)) continue; + CLUSTER_MANAGER_FLAG_FAIL)) + goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); - - } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + } else { + fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + goto invalid_friend; + } + continue; +invalid_friend: + freeClusterManagerNode(friend); } listRelease(node->friends); node->friends = NULL; @@ -2601,6 +2619,7 @@ static void clusterManagerCheckCluster(int quiet) { CLUSTER_MANAGER_ERROR(err); } else printf("[OK] All nodes agree about slots configuration.\n"); // Check open slots + printf(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2836,6 +2855,7 @@ assign_replicas: if (slave != NULL) { assigned_replicas++; available_count--; + if (slave->replicate) sdsfree(slave->replicate); slave->replicate = sdsnew(master->name); slave->dirty = 1; } else break; @@ -2873,7 +2893,7 @@ assign_replicas: zfree(err); } goto cmd_err; - } + } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); printf(">>> Assign a different config epoch to each node\n"); From be7e2b84bdd4f5b37878768cc0b8a91c0448af11 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 17:43:36 +0100 Subject: [PATCH 05/66] Cluster Manager: slots coverage check. --- src/redis-cli.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4c30067b..51eb137e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2607,6 +2607,24 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static int clusterManagerGetCoveredSlots(char *all_slots) { + if (cluster_manager.nodes == NULL) return 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + int totslots = 0, i; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + if (node->slots[i] && !all_slots[i]) { + all_slots[i] = 1; + totslots++; + } + } + } + return totslots; +} + static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; @@ -2677,7 +2695,19 @@ static void clusterManagerCheckCluster(int quiet) { sdsfree(errstr); dictRelease(open_slots); } - //TODO:check_slots_coverage + printf(">>> Check slots coverage...\n"); + char slots[CLUSTER_MANAGER_SLOTS]; + memset(slots, 0, CLUSTER_MANAGER_SLOTS); + int coverage = clusterManagerGetCoveredSlots(slots); + if (coverage == CLUSTER_MANAGER_SLOTS) + printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); + else { + sds err = sdsempty(); + err = sdscatprintf(err, "[ERR] Not all %d slots are " + "covered by nodes.\n", + CLUSTER_MANAGER_SLOTS); + CLUSTER_MANAGER_ERROR(err); + } } static void clusterManagerMode(clusterManagerCommandProc *proc) { From 1b1f80e60f69a94873e062b87b7b802f412d7136 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 20:09:30 +0100 Subject: [PATCH 06/66] Cluster Manager: reply error catch for MEET command --- src/redis-cli.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 51eb137e..b5c80a5e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2949,7 +2949,16 @@ assign_replicas: redisReply *reply = NULL; reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", first->ip, first->port); - if (reply != NULL) freeReplyObject(reply); + int is_err = 0; + if (reply != NULL) { + if ((is_err = reply->type == REDIS_REPLY_ERROR)) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str); + freeReplyObject(reply); + } else { + is_err = 1; + fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); + } + if (is_err) goto cmd_err; } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about From 956bec4ca8a73b51b984fd7186c2baa734c065ed Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 11:29:25 +0100 Subject: [PATCH 07/66] Cluster Manager: cluster is considered consistent if only one node has been found --- src/redis-cli.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b5c80a5e..7128dd97 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2584,7 +2584,10 @@ cleanup: static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; - int consistent = 0; + int consistent = (listLength(cluster_manager.nodes) <= 1); + // If the Cluster has only one node, it's always consistent + // Does it make sense? + if (consistent) return 1; sds first_cfg = NULL; listIter li; listNode *ln; From dad69ac320eb97191ab8a1bea7eea223e8d2f4a6 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 12:02:56 +0100 Subject: [PATCH 08/66] ClusterManager: added replicas count to clusterManagerNode --- src/redis-cli.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 7128dd97..de7ba251 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -223,9 +223,11 @@ typedef struct clusterManagerNode { time_t ping_recv; int flags; sds replicate; + list replicas; int dirty; uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; + int replicas_count; list *friends; sds *migrating; sds *importing; @@ -250,6 +252,7 @@ static dictType clusterManagerDictType = { }; static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); @@ -265,6 +268,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); + typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { char *name; @@ -1890,10 +1894,31 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->importing = NULL; node->migrating_count = 0; node->importing_count = 0; + node->replicas_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } +static clusterManagerNode *clusterManagerNodeByName(const char *name) { + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && !sdscmp(n->name, lcname)) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); int is_err = 0; @@ -2119,7 +2144,9 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { } if (node->replicate != NULL) info = sdscatfmt(info, "\n replicates %S", node->replicate); - //else if () {} //TODO: add replicas info + else if (node->replicas_count) + info = sdscatfmt(info, "\n %U additional replica(s)", + node->replicas_count); return info; } @@ -2485,6 +2512,18 @@ invalid_friend: listRelease(node->friends); node->friends = NULL; } + // Count replicas for each node + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate != NULL) { + clusterManagerNode *master = clusterManagerNodeByName(n->replicate); + if (master == NULL) { + printf("*** WARNING: %s:%d claims to be slave of unknown " + "node ID %s.\n", n->ip, n->port, n->replicate); + } else master->replicas_count++; + } + } return 1; } From 7b9f945b3700c3e52cb7fd55a44ac8104267babb Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 9 Feb 2018 13:02:37 +0100 Subject: [PATCH 09/66] Cluster Manager: CLUSTER_MANAGER_NODE_CONNECT macro --- src/redis-cli.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index de7ba251..fd3bdf98 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -70,6 +70,8 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_NODE_CONNECT(n) \ + (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) @@ -2449,7 +2451,7 @@ node_cmd_err: static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) - node->context = redisConnect(node->ip, node->port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, @@ -2491,7 +2493,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { clusterManagerNode *friend = ln->value; if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) - friend->context = redisConnect(friend->ip, friend->port); + CLUSTER_MANAGER_NODE_CONNECT(friend); if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { @@ -2785,7 +2787,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *ip = addr; int port = atoi(++c); clusterManagerNode *node = clusterManagerNewNode(ip, port); - node->context = redisConnect(ip, port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); From 8128f1bf03ea73f3c3d08936f4556b12ef0c5d72 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 13 Feb 2018 12:00:06 +0100 Subject: [PATCH 10/66] Cluster Manager: 'call' command. --- src/redis-cli.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index fd3bdf98..308bd08c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -286,6 +286,7 @@ static int clusterManagerIsConfigConsistent(void); static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ @@ -1802,6 +1803,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { "cluster-replicas"}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"call", clusterManagerCommandCall, -2, + "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -2449,6 +2452,11 @@ node_cmd_err: return 0; } +/* Retrieves info about the cluster using argument 'node' as the starting + * point. All nodes will be loaded inside the cluster_manager.nodes list. + * Warning: if something goes wrong, it will free the starting node before + * returning 0. */ + static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -3115,6 +3123,56 @@ invalid_args: return 0; } +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0; + char *ip = NULL; + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + int i; + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else { + fprintf(stderr, + "Invalid arguments: first agrumnt must be host:port.\n"); + return 0; + } + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + argc--; + argv++; + size_t *argvlen = zmalloc(argc*sizeof(size_t)); + printf(">>> Calling"); + for (i = 0; i < argc; i++) { + argvlen[i] = strlen(argv[i]); + printf(" %s", argv[i]); + } + printf("\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (!n->context) CLUSTER_MANAGER_NODE_CONNECT(n); + redisReply *reply = NULL; + redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); + int status = redisGetReply(n->context, (void **)(&reply)); + if (status != REDIS_OK || reply == NULL ) + printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + else { + sds formatted_reply = cliFormatReplyTTY(reply, ""); + printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); + sdsfree(formatted_reply); + } + if (reply != NULL) freeReplyObject(reply); + } + zfree(argvlen); + return 1; +} + static int clusterManagerCommandHelp(int argc, char **argv) { UNUSED(argc); UNUSED(argv); From 4ca8dbdc2b943e2d0bf71354118d8f562aa92178 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 17:54:46 +0100 Subject: [PATCH 11/66] Cluster Manager: improved cleanup/error handling in various functions --- src/redis-cli.c | 101 +++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 308bd08c..280e6c9e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2220,7 +2220,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; int argc; sds *argv = NULL; size_t *argvlen = NULL; @@ -2235,39 +2235,44 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) added++; } } - if (!added) goto node_cmd_err; + if (!added) { + success = 0; + goto cleanup; + } argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) goto node_cmd_err; + if (argc == 0 || argv == NULL) { + success = 0; + goto cleanup; + } argvlen = zmalloc(argc*sizeof(size_t)); for (i = 0; i < argc; i++) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); - if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + if (redisGetReply(node->context, &_reply) != REDIS_OK) { + success = 1; + goto cleanup; + } reply = (redisReply*) _reply; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } - sdsfree(cmd); - zfree(argvlen); - sdsfreesplitres(argv,argc); - freeReplyObject(reply); - return 1; -node_cmd_err: +cleanup: sdsfree(cmd); zfree(argvlen); if (argv != NULL) sdsfreesplitres(argv,argc); if (reply != NULL) freeReplyObject(reply); - return 0; + return success; } static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", @@ -2277,18 +2282,20 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) goto node_cmd_err; + if (!added || *err != NULL) { + success = 0; + goto cleanup; + } } node->dirty = 0; - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply != NULL) freeReplyObject(reply); + return success; } static void clusterManagerWaitForClusterJoin(void) { @@ -2305,14 +2312,15 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); char *lines = reply->str, *p, *line; @@ -2340,7 +2348,10 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (i == 8) break; // Slots } - if (!flags) goto node_cmd_err; + if (!flags) { + success = 0; + goto cleanup; + } int myself = (strstr(flags, "myself") != NULL); clusterManagerNode *currentNode = NULL; if (myself) { @@ -2406,14 +2417,16 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (addr == NULL) { // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } char *c = strrchr(addr, '@'); if (c != NULL) *c = '\0'; c = strrchr(addr, ':'); if (c == NULL) { fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } *c = '\0'; int port = atoi(++c); @@ -2445,11 +2458,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); if (!getfriends && myself) break; } - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply) freeReplyObject(reply); + return success; } /* Retrieves info about the cluster using argument 'node' as the starting @@ -2780,7 +2791,7 @@ cluster_manager_err: static int clusterManagerCommandCreate(int argc, char **argv) { printf("Cluster Manager: Creating Cluster\n"); - int i, j; + int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { char *addr = argv[i]; @@ -2974,7 +2985,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); @@ -3010,7 +3022,10 @@ assign_replicas: is_err = 1; fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); } - if (is_err) goto cmd_err; + if (is_err) { + success = 0; + goto cleanup; + } } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about @@ -3029,7 +3044,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } } // Reset Nodes @@ -3041,9 +3057,13 @@ assign_replicas: else freeClusterManagerNode(node); } listEmpty(cluster_manager.nodes); - if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + if (!clusterManagerLoadInfoFromNode(first_node, 0)) { + success = 0; + goto cleanup; //TODO: msg? + } clusterManagerCheckCluster(0); } +cleanup: /* Free everything */ zfree(masters); zfree(ips); @@ -3052,16 +3072,7 @@ assign_replicas: CLUSTER_MANAGER_NODEARRAY_FREE(node_array); } zfree(ip_nodes); - return 1; -cmd_err: - zfree(masters); - zfree(ips); - for (i = 0; i < node_len; i++) { - clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); - } - zfree(ip_nodes); - return 0; + return success; } static int clusterManagerCommandInfo(int argc, char **argv) { From 605d7262e6e4c565fae7ed131825c2e1eea3a1cf Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 19:29:28 +0100 Subject: [PATCH 12/66] Cluster Manager: colorized output --- src/redis-cli.c | 130 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 280e6c9e..6ea44f83 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -67,6 +67,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" + #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) @@ -80,7 +81,7 @@ if (cluster_manager.errors == NULL) \ cluster_manager.errors = listCreate(); \ listAddNodeTail(cluster_manager.errors, err); \ - fprintf(stderr, "%s\n", (char *) err); \ + clusterManagerLogErr("%s\n", (char *) err); \ } while(0) #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ @@ -124,7 +125,20 @@ } while(0) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ - fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ + n->ip, n->port, err); + +#define clusterManagerLogInfo(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_INFO,__VA_ARGS__) + +#define clusterManagerLogErr(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_ERR,__VA_ARGS__) + +#define clusterManagerLogWarn(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_WARN,__VA_ARGS__) + +#define clusterManagerLogOk(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS,__VA_ARGS__) #define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 #define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 @@ -133,7 +147,22 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 + +#define CLUSTER_MANAGER_LOG_LVL_INFO 1 +#define CLUSTER_MANAGER_LOG_LVL_WARN 2 +#define CLUSTER_MANAGER_LOG_LVL_ERR 3 +#define CLUSTER_MANAGER_LOG_LVL_SUCCESS 4 + +#define LOG_COLOR_BOLD "29;1m" +#define LOG_COLOR_RED "31;1m" +#define LOG_COLOR_GREEN "32;1m" +#define LOG_COLOR_YELLOW "33;1m" +#define LOG_COLOR_RESET "0m" /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -270,6 +299,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { @@ -1267,6 +1297,7 @@ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; } static int parseOptions(int argc, char **argv) { @@ -2042,7 +2073,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerNode **offenders = NULL, **aux; int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); if (score == 0) goto cleanup; - printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + clusterManagerLogInfo(">>> Trying to optimize slaves allocation " + "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; int maxiter = 500 * node_len; srand(time(NULL)); @@ -2091,12 +2123,15 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(aux), aux = NULL; score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); char *msg; - if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + int perfect = (score == 0); + int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : + CLUSTER_MANAGER_LOG_LVL_WARN); + if (perfect) msg = "[OK] Perfect anti-affinity obtained!"; else if (score >= 10000) msg = ("[WARNING] Some slaves are in the same host as their master"); else msg=("[WARNING] Some slaves of the same master are in the same host"); - printf("%s\n", msg); + clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); zfree(aux); @@ -2211,7 +2246,7 @@ static void clusterManagerShowInfo(void) { keys += dbsize; } } - printf("[OK] %d keys in %d masters.\n", keys, masters); + clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters); float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; printf("%.2f keys per slot on average.\n", keys_per_slot); } @@ -2482,7 +2517,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { char *msg = (e ? e : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -2522,8 +2557,9 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); } else { - fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + clusterManagerLogErr("[ERR] Unable to load info for " + "node %s:%d\n", + friend->ip, friend->port); goto invalid_friend; } continue; @@ -2692,15 +2728,18 @@ static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; clusterManagerNode *node = ln->value; - printf(">>> Performing Cluster Check (using node %s:%d)\n", - node->ip, node->port); + clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); CLUSTER_MANAGER_ERROR(err); - } else printf("[OK] All nodes agree about slots configuration.\n"); + } else { + clusterManagerLogOk("[OK] All nodes agree about slots " + "configuration.\n"); + } // Check open slots - printf(">>> Check for open slots...\n"); + clusterManagerLogInfo(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2754,17 +2793,18 @@ static void clusterManagerCheckCluster(int quiet) { char *fmt = (i++ > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } - fprintf(stderr, "%s.\n", (char *) errstr); + clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); dictRelease(open_slots); } - printf(">>> Check slots coverage...\n"); + clusterManagerLogInfo(">>> Check slots coverage...\n"); char slots[CLUSTER_MANAGER_SLOTS]; memset(slots, 0, CLUSTER_MANAGER_SLOTS); int coverage = clusterManagerGetCoveredSlots(slots); - if (coverage == CLUSTER_MANAGER_SLOTS) - printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); - else { + if (coverage == CLUSTER_MANAGER_SLOTS) { + clusterManagerLogOk("[OK] All %d slots covered.\n", + CLUSTER_MANAGER_SLOTS); + } else { sds err = sdsempty(); err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", @@ -2773,6 +2813,26 @@ static void clusterManagerCheckCluster(int quiet) { } } +static void clusterManagerLog(int level, const char* fmt, ...) { + int use_colors = + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); + if (use_colors) { + printf("\033["); + switch (level) { + case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break; + case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break; + case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break; + case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break; + default: printf(LOG_COLOR_RESET); break; + } + } + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + if (use_colors) printf("\033[" LOG_COLOR_RESET); +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2790,7 +2850,6 @@ cluster_manager_err: /* Cluster Manager Commands */ static int clusterManagerCommandCreate(int argc, char **argv) { - printf("Cluster Manager: Creating Cluster\n"); int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { @@ -2816,7 +2875,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { char *msg = (err ? err : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2835,11 +2894,11 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *msg; if (err) msg = err; else { - msg = " is not empty. Either the node already knows other " + msg = "is not empty. Either the node already knows other " "nodes (check with CLUSTER NODES) or contains some " "key in database 0."; } - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2850,18 +2909,17 @@ static int clusterManagerCommandCreate(int argc, char **argv) { int replicas = config.cluster_manager_command.replicas; int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); if (masters_count < 3) { - fprintf(stderr, - "*** ERROR: Invalid configuration for cluster creation.\n"); - fprintf(stderr, - "*** Redis Cluster requires at least 3 master nodes.\n"); - fprintf(stderr, + clusterManagerLogErr( + "*** ERROR: Invalid configuration for cluster creation.\n" + "*** Redis Cluster requires at least 3 master nodes.\n" "*** This is not possible with %d nodes and %d replicas per node.", node_len, replicas); - fprintf(stderr, "\n*** At least %d nodes are required.\n", - (3 * (replicas + 1))); + clusterManagerLogErr("\n*** At least %d nodes are required.\n", + 3 * (replicas + 1)); return 0; } - printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + clusterManagerLogInfo(">>> Performing hash slots allocation " + "on %d nodes...\n", node_len); int interleaved_len = 0, ips_len = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); @@ -2989,8 +3047,9 @@ assign_replicas: goto cleanup; } else if (err != NULL) zfree(err); } - printf(">>> Nodes configuration updated\n"); - printf(">>> Assign a different config epoch to each node\n"); + clusterManagerLogInfo(">>> Nodes configuration updated\n"); + clusterManagerLogInfo(">>> Assign a different config epoch to " + "each node\n"); int config_epoch = 1; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3001,7 +3060,8 @@ assign_replicas: config_epoch++); if (reply != NULL) freeReplyObject(reply); } - printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join " + "the cluster\n"); clusterManagerNode *first = NULL; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3156,7 +3216,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { argc--; argv++; size_t *argvlen = zmalloc(argc*sizeof(size_t)); - printf(">>> Calling"); + clusterManagerLogInfo(">>> Calling"); for (i = 0; i < argc; i++) { argvlen[i] = strlen(argv[i]); printf(" %s", argv[i]); From 87f5a7c0b4c4f207982a39b1c6d668de1e009931 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 20 Feb 2018 12:01:13 +0100 Subject: [PATCH 13/66] - Fixed bug in clusterManagerGetAntiAffinityScore - Code improvements --- src/redis-cli.c | 57 ++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 6ea44f83..b222f5a8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -505,7 +505,6 @@ static int dictSdsKeyCompare(void *privdata, const void *key1, static void dictSdsDestructor(void *privdata, void *val) { DICT_NOTUSED(privdata); - sdsfree(val); } @@ -2008,11 +2007,13 @@ result: static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_len, clusterManagerNode ***offending, int *offending_len) { - assert(offending != NULL); int score = 0, i, j; int node_len = cluster_manager.nodes->len; - *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); - clusterManagerNode **offending_p = *offending; + clusterManagerNode **offending_p = NULL; + if (offending != NULL) { + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + offending_p = *offending; + } for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); @@ -2021,23 +2022,21 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, clusterManagerNode *node = node_array->nodes[j]; if (node == NULL) continue; if (!ip) ip = node->ip; - sds types; - if (!node->replicate) { - assert(node->name != NULL); - dictEntry *entry = dictFind(related, node->name); - if (entry) types = (sds) dictGetVal(entry); - else types = sdsempty(); - types = sdscatprintf(types, "m%s", types); - dictReplace(related, node->name, types); - } else { - dictEntry *entry = dictFind(related, node->replicate); - if (entry) types = (sds) dictGetVal(entry); - else { - types = sdsempty(); - dictAdd(related, node->replicate, types); - } - sdscat(types, "s"); + sds types, otypes; + // We always use the Master ID as key + sds key = (!node->replicate ? node->name : node->replicate); + assert(key != NULL); + dictEntry *entry = dictFind(related, key); + if (entry) otypes = (sds) dictGetVal(entry); + else { + otypes = sdsempty(); + dictAdd(related, key, otypes); } + // Master type 'm' is always set as the first character of the + // types string. + if (!node->replicate) types = sdscatprintf(otypes, "m%s", otypes); + else types = sdscat(otypes, "s"); + if (types != otypes) dictReplace(related, key, types); } dictIterator *iter = dictGetIterator(related); dictEntry *entry; @@ -2048,6 +2047,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (typeslen < 2) continue; if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); + if (offending == NULL) continue; listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2056,11 +2056,12 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (n->replicate == NULL) continue; if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { *(offending_p++) = n; + if (offending_len != NULL) (*offending_len)++; break; } } } - if (offending_len != NULL) *offending_len = offending_p - *offending; + //if (offending_len != NULL) *offending_len = offending_p - *offending; dictReleaseIterator(iter); dictRelease(related); } @@ -2070,8 +2071,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_len) { - clusterManagerNode **offenders = NULL, **aux; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + clusterManagerNode **offenders = NULL; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); @@ -2088,7 +2089,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, &offending_len); if (score == 0) break; int rand_idx = rand() % offending_len; - clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode *first = offenders[rand_idx], + *second = NULL; clusterManagerNode **other_replicas = zcalloc((node_len - 1) * sizeof(*other_replicas)); int other_replicas_count = 0; @@ -2110,9 +2112,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - zfree(aux), aux = NULL; int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, - &aux, NULL); + NULL, NULL); if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2120,8 +2121,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - zfree(aux), aux = NULL; - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2134,7 +2134,6 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); - zfree(aux); } static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { From 8f4f001dc37c27c9a35ef17a5a4fcfd6520bd311 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 22 Feb 2018 18:32:39 +0100 Subject: [PATCH 14/66] Cluster Manager: - Almost all Cluster Manager related code moved to the same section. - Many macroes converted to functions - Added various comments - Little code restyling --- src/redis-cli.c | 460 ++++++++++++++++++++++++++++-------------------- 1 file changed, 271 insertions(+), 189 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b222f5a8..b72c31cf 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -75,54 +75,8 @@ (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) -#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) -#define CLUSTER_MANAGER_ERROR(err) do { \ - if (cluster_manager.errors == NULL) \ - cluster_manager.errors = listCreate(); \ - listAddNodeTail(cluster_manager.errors, err); \ - clusterManagerLogErr("%s\n", (char *) err); \ -} while(0) - -#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ - memset(n->slots, 0, sizeof(n->slots)); \ - n->slots_count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ - array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ - array->alloc = array->nodes; \ - array->len = alloc_len; \ - array->count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ - if (array->nodes > array->alloc) { \ - array->len = array->nodes - array->alloc; \ - array->nodes = array->alloc; \ - array->count = 0; \ - int i = 0; \ - for(; i < array->len; i++) { \ - if (array->nodes[i] != NULL) array->count++;\ - } \ - } \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) - -#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ - assert(array->nodes < (array->nodes + array->len)); \ - if (*array->nodes != NULL) array->count--; \ - nodeptr = *array->nodes; \ - array->nodes++; \ - array->len--; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ - assert(array->nodes < (array->nodes + array->len)); \ - assert(nodeptr != NULL); \ - array->nodes[array->count++] = nodeptr; \ -} while(0) +#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree(array->alloc) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ @@ -190,6 +144,7 @@ typedef struct clusterManagerCommand { int flags; int replicas; } clusterManagerCommand; +static void createClusterManagerCommand(char *cmdname, int argc, char **argv); static redisContext *context; @@ -237,88 +192,6 @@ static struct config { clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager */ - -static struct clusterManager { - list *nodes; - list *errors; -} cluster_manager; - -typedef struct clusterManagerNode { - redisContext *context; - sds name; - char *ip; - int port; - uint64_t current_epoch; - time_t ping_sent; - time_t ping_recv; - int flags; - sds replicate; - list replicas; - int dirty; - uint8_t slots[CLUSTER_MANAGER_SLOTS]; - int slots_count; - int replicas_count; - list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; -} clusterManagerNode; - -typedef struct clusterManagerNodeArray { - clusterManagerNode **nodes; - clusterManagerNode **alloc; - int len; - int count; -} clusterManagerNodeArray; - -static dictType clusterManagerDictType = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ -}; - -static clusterManagerNode *clusterManagerNewNode(char *ip, int port); -static clusterManagerNode *clusterManagerNodeByName(const char *name); -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); -static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, - char **err); -static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); -static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); -static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len); -static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len); -static sds clusterManagerNodeInfo(clusterManagerNode *node); -static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); -static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); -static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); -static void clusterManagerLog(int level, const char* fmt, ...); - -typedef int clusterManagerCommandProc(int argc, char **argv); -typedef struct clusterManagerCommandDef { - char *name; - clusterManagerCommandProc *proc; - int arity; - char *args; - char *options; -} clusterManagerCommandDef; -static int clusterManagerIsConfigConsistent(void); - -/* Cluster Manager commands. */ - -static int clusterManagerCommandCreate(int argc, char **argv); -static int clusterManagerCommandInfo(int argc, char **argv); -static int clusterManagerCommandCheck(int argc, char **argv); -static int clusterManagerCommandCall(int argc, char **argv); -static int clusterManagerCommandHelp(int argc, char **argv); - /* User preferences. */ static struct pref { int hints; @@ -1291,14 +1164,6 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ -static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - clusterManagerCommand *cmd = &config.cluster_manager_command; - cmd->name = cmdname; - cmd->argc = argc; - cmd->argv = argc ? argv : NULL; - if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; -} - static int parseOptions(int argc, char **argv) { int i; @@ -1828,6 +1693,100 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +/* The Cluster Manager global structure */ +static struct clusterManager { + list *nodes; /* List of nodes int he configuration. */ + list *errors; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; /* Master ID if node is a slave */ + list replicas; + int dirty; /* Node has changes that can be flushed */ + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + int replicas_count; + list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; +} clusterManagerNode; + +/* Data structure used to represent a sequence of nodes. */ +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; /* Actual nodes array */ + clusterManagerNode **alloc; /* Pointer to the allocated memory */ + int len; /* Actual length of the array */ + int count; /* Non-NULL nodes count */ +} clusterManagerNodeArray; + +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + +typedef int clusterManagerCommandProc(int argc, char **argv); + +/* Cluster Manager helper functions */ + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); +static void clusterManagerNodeResetSlots(clusterManagerNode *node); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_count, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_count); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); +static int clusterManagerIsConfigConsistent(void); +static void clusterManagerOnError(sds err); +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int len); +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array); +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr); +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node); + +/* Cluster Manager commands. */ + +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); + +typedef struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; + char *args; + char *options; +} clusterManagerCommandDef; + clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "cluster-replicas"}, @@ -1838,6 +1797,16 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; + +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; +} + + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / sizeof(clusterManagerCommandDef); @@ -1930,7 +1899,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); return node; } @@ -1954,41 +1923,49 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); - int is_err = 0; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { +static void clusterManagerNodeResetSlots(clusterManagerNode *node) { + memset(node->slots, 0, sizeof(node->slots)); + node->slots_count = 0; +} + +static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, + char **err) +{ + redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO"); + if (err != NULL) *err = NULL; + if (info == NULL) return NULL; + if (info->type == REDIS_REPLY_ERROR) { + if (err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); strcpy(*err, info->str); } freeReplyObject(info); - return 0; + return NULL; } + return info; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); + if (info == NULL) return 0; int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); freeReplyObject(info); return is_cluster; } +/* Checks whether the node is empty. Node is considered not-empty if it has + * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); int is_err = 0, is_empty = 1; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } - is_empty = 0; - goto result; - } + if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; goto result; } freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (err != NULL) *err = NULL; if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); @@ -2004,8 +1981,37 @@ result: return is_empty; } +/* Return the anti-affinity score, which is a measure of the amount of + * violations of anti-affinity in the current cluster layout, that is, how + * badly the masters and slaves are distributed in the different IP + * addresses so that slaves of the same master are not in the master + * host and are also in different hosts. + * + * The score is calculated as follows: + * + * SAME_AS_MASTER = 10000 * each slave in the same IP of its master. + * SAME_AS_SLAVE = 1 * each slave having the same IP as another slave + of the same master. + * FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE + * + * So a greater score means a worse anti-affinity level, while zero + * means perfect anti-affinity. + * + * The anti affinity optimizator will try to get a score as low as + * possible. Since we do not want to sacrifice the fact that slaves should + * not be in the same host as the master, we assign 10000 times the score + * to this violation, so that we'll optimize for the second factor only + * if it does not impact the first one. + * + * The ipnodes argument is an array of clusterManagerNodeArray, one for + * each IP, while ip_count is the total number of IPs in the configuration. + * + * The function returns the above score, and the list of + * offending slaves can be stored into the 'offending' argument, + * so that the optimizer can try changing the configuration of the + * slaves violating the anti-affinity goals. */ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len) + int ip_count, clusterManagerNode ***offending, int *offending_len) { int score = 0, i, j; int node_len = cluster_manager.nodes->len; @@ -2014,7 +2020,10 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); offending_p = *offending; } - for (i = 0; i < ip_len; i++) { + /* For each set of nodes in the same host, split by + * related nodes (masters and slaves which are involved in + * replication of each other) */ + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; @@ -2038,6 +2047,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, else types = sdscat(otypes, "s"); if (types != otypes) dictReplace(related, key, types); } + /* Now it's trivial to check, for each related group having the + * same host, what is their local score. */ dictIterator *iter = dictGetIterator(related); dictEntry *entry; while ((entry = dictNext(iter)) != NULL) { @@ -2048,6 +2059,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); if (offending == NULL) continue; + /* Populate the list of offending nodes. */ listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2069,15 +2081,16 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, } static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len) + int ip_count) { clusterManagerNode **offenders = NULL; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, + NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; - int maxiter = 500 * node_len; + int maxiter = 500 * node_len; // Effort is proportional to cluster size... srand(time(NULL)); while (maxiter > 0) { int offending_len = 0; @@ -2085,9 +2098,14 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(offenders); offenders = NULL; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, + &offenders, &offending_len); - if (score == 0) break; + if (score == 0) break; // Optimal anti affinity reached + /* We'll try to randomly swap a slave's assigned master causing + * an affinity problem with another random slave, to see if we + * can improve the affinity. */ int rand_idx = rand() % offending_len; clusterManagerNode *first = offenders[rand_idx], *second = NULL; @@ -2112,8 +2130,12 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, NULL, NULL); + /* If the change actually makes thing worse, revert. Otherwise + * leave as it is becuase the best solution may need a few + * combined swaps. */ if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2121,7 +2143,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2136,6 +2158,7 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); int first_range_idx = -1, last_slot_idx = -1, i; @@ -2303,11 +2326,13 @@ cleanup: return success; } +/* Flush the dirty node configuration by calling replicate for slaves or + * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; int is_err = 0, success = 1; - *err = NULL; + if (err != NULL) *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", node->replicate); @@ -2317,14 +2342,15 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { strcpy(*err, reply->str); } success = 0; + /* If the cluster did not already joined it is possible that + * the slave does not know the master node yet. So on errors + * we return ASAP leaving the dirty flag set, to flush the + * config later. */ goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) { - success = 0; - goto cleanup; - } + if (!added || *err != NULL) success = 0; } node->dirty = 0; cleanup: @@ -2342,6 +2368,11 @@ static void clusterManagerWaitForClusterJoin(void) { printf("\n"); } +/* Load node's cluster configuration by calling "CLUSTER NODES" command. + * Node's configuration (name, replicate, slots, ...) is then updated. + * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument, + * and node already knows other nodes, the node's friends list is populated + * with the other nodes info. */ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { @@ -2391,7 +2422,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; currentNode = node; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); //TODO: just while(remaining) && assign p inside the block @@ -2501,7 +2532,6 @@ cleanup: * point. All nodes will be loaded inside the cluster_manager.nodes list. * Warning: if something goes wrong, it will free the starting node before * returning 0. */ - static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -2681,7 +2711,6 @@ static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; int consistent = (listLength(cluster_manager.nodes) <= 1); // If the Cluster has only one node, it's always consistent - // Does it make sense? if (consistent) return 1; sds first_cfg = NULL; listIter li; @@ -2705,6 +2734,13 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static void clusterManagerOnError(sds err) { + if (cluster_manager.errors == NULL) + cluster_manager.errors = listCreate(); + listAddNodeTail(cluster_manager.errors, err); + clusterManagerLogErr("%s\n", (char *) err); +} + static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -2732,7 +2768,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -2761,7 +2797,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } if (n->importing != NULL) { if (open_slots == NULL) @@ -2779,7 +2815,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } } if (open_slots != NULL) { @@ -2808,7 +2844,7 @@ static void clusterManagerCheckCluster(int quiet) { err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } } @@ -2832,6 +2868,53 @@ static void clusterManagerLog(int level, const char* fmt, ...) { if (use_colors) printf("\033[" LOG_COLOR_RESET); } +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int alloc_len) +{ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*)); + array->alloc = array->nodes; + array->len = alloc_len; + array->count = 0; +} + +/* Reset array->nodes to the original array allocation and re-count non-NULL + * nodes. */ +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) { + if (array->nodes > array->alloc) { + array->len = array->nodes - array->alloc; + array->nodes = array->alloc; + array->count = 0; + int i = 0; + for(; i < array->len; i++) { + if (array->nodes[i] != NULL) array->count++; + } + } +} + +/* Shift array->nodes and store the shifted node into 'nodeptr'. */ +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr) +{ + assert(array->nodes < (array->nodes + array->len)); + /* If the first node to be shifted is not NULL, decrement count. */ + if (*array->nodes != NULL) array->count--; + /* Store the first node to be shifted into 'nodeptr'. */ + *nodeptr = *array->nodes; + /* Shift the nodes array and decrement length. */ + array->nodes++; + array->len--; +} + +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node) +{ + assert(array->nodes < (array->nodes + array->len)); + assert(node != NULL); + assert(array->count < array->len); + array->nodes[array->count++] = node; +} + +/* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2919,7 +3002,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } clusterManagerLogInfo(">>> Performing hash slots allocation " "on %d nodes...\n", node_len); - int interleaved_len = 0, ips_len = 0; + int interleaved_len = 0, ip_count = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); @@ -2929,7 +3012,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; int found = 0; - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { char *ip = ips[i]; if (!strcmp(ip, n->ip)) { found = 1; @@ -2937,19 +3020,19 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } } if (!found) { - ips[ips_len++] = n->ip; + ips[ip_count++] = n->ip; } clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->nodes == NULL) - CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); - CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + clusterManagerNodeArrayInit(node_array, node_len); + clusterManagerNodeArrayAdd(node_array, n); } while (interleaved_len < node_len) { - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->count > 0) { - clusterManagerNode *n; - CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + clusterManagerNode *n = NULL; + clusterManagerNodeArrayShift(node_array, &n); interleaved[interleaved_len++] = n; } } @@ -3019,11 +3102,11 @@ assign_replicas: printf("Adding extra replicas...\n"); goto assign_replicas; } - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + clusterManagerNodeArrayReset(node_array); } - clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); fflush(stdout); @@ -3031,7 +3114,6 @@ assign_replicas: int nread = read(fileno(stdin),buf,4); buf[3] = '\0'; if (nread != 0 && !strcmp("yes", buf)) { - printf("\nFlushing configuration!\n"); listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3128,7 +3210,7 @@ cleanup: zfree(ips); for (i = 0; i < node_len; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array); } zfree(ip_nodes); return success; From 2f056b83318a0daf13ac6be9e549f0f2c2ba97fb Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 10:44:11 +0100 Subject: [PATCH 15/66] Cluster Manager: reshard command, fixed slots parsing bug and other minor bugs. --- src/redis-cli.c | 655 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 593 insertions(+), 62 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b72c31cf..68ae7cfa 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -69,6 +69,13 @@ #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" #define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 +#define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 + +#define CLUSTER_MANAGER_INVALID_HOST_ARG \ + "Invalid arguments: you need to pass either a valid " \ + "address (ie. 120.0.0.1:7000) or space separated IP " \ + "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) #define CLUSTER_MANAGER_NODE_CONNECT(n) \ @@ -103,9 +110,14 @@ #define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 #define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 #define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_OPT_COLD 1 << 1 +#define CLUSTER_MANAGER_OPT_UPDATE 1 << 2 +#define CLUSTER_MANAGER_OPT_QUIET 1 << 6 +#define CLUSTER_MANAGER_OPT_VERBOSE 1 << 7 #define CLUSTER_MANAGER_LOG_LVL_INFO 1 #define CLUSTER_MANAGER_LOG_LVL_WARN 2 @@ -143,6 +155,11 @@ typedef struct clusterManagerCommand { char **argv; int flags; int replicas; + char *from; + char *to; + int slots; + int timeout; + int pipeline; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1261,6 +1278,19 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { + config.cluster_manager_command.from = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { + config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { + config.cluster_manager_command.slots = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { + config.cluster_manager_command.timeout = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { + config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-yes")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_YES; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1358,7 +1388,7 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" -" --cluster [args...]\n" +" --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" @@ -1729,6 +1759,12 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; +/* Used for reshard table. */ +typedef struct clusterManagerReshardTableItem { + clusterManagerNode *source; + int slot; +} clusterManagerReshardTableItem; + static dictType clusterManagerDictType = { dictSdsHash, /* hash function */ NULL, /* key dup */ @@ -1754,7 +1790,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_count, clusterManagerNode ***offending, int *offending_len); static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); -static sds clusterManagerNodeInfo(clusterManagerNode *node); +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); @@ -1776,6 +1812,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1789,9 +1826,11 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", - "cluster-replicas"}, - {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"reshard", clusterManagerCommandReshard, -1, "host:port", + "from ,to ,slots ,yes,timeout ,pipeline "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1829,6 +1868,38 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +/* Get host ip and port from command arguments. If only one argument has + * been provided it must be in the form of 'ip:port', elsewhere + * the first argument must be the ip and the second one the port. + * If host and port can be detected, it returns 1 and it stores host and + * port into variables referenced by'ip_ptr' and 'port_ptr' pointers, + * elsewhere it returns 0. */ +static int getClusterHostFromCmdArgs(int argc, char **argv, + char **ip_ptr, int *port_ptr) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else return 0; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) return 0; + else { + *ip_ptr = ip; + *port_ptr = port; + } + return 1; +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2188,8 +2259,12 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } -static sds clusterManagerNodeInfo(clusterManagerNode *node) { +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); + sds spaces = sdsempty(); + int i; + for (i = 0; i < indent; i++) spaces = sdscat(spaces, " "); + if (indent) info = sdscat(info, spaces); int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); char *role = (is_master ? "M" : "S"); sds slots = NULL; @@ -2198,17 +2273,18 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { else { slots = clusterManagerNodeSlotsString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" - " slots:%S (%u slots) " + "%s slots:%S (%u slots) " "", //TODO: flags string - role, node->name, node->ip, node->port, + role, node->name, node->ip, node->port, spaces, slots, node->slots_count); sdsfree(slots); } if (node->replicate != NULL) - info = sdscatfmt(info, "\n replicates %S", node->replicate); + info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); else if (node->replicas_count) - info = sdscatfmt(info, "\n %U additional replica(s)", - node->replicas_count); + info = sdscatfmt(info, "\n%s %U additional replica(s)", + spaces, node->replicas_count); + sdsfree(spaces); return info; } @@ -2218,7 +2294,7 @@ static void clusterManagerShowNodes(void) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; - sds info = clusterManagerNodeInfo(node); + sds info = clusterManagerNodeInfo(node, 0); printf("%s\n", info); sdsfree(info); } @@ -2306,7 +2382,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { - success = 1; + success = 0; goto cleanup; } reply = (redisReply*) _reply; @@ -2326,6 +2402,193 @@ cleanup: return success; } +/* Set slot status to "importing" or "migrating" */ +static int clusterManagerSetSlot(clusterManagerNode *node1, + clusterManagerNode *node2, + int slot, const char *mode, char **err) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " + "SETSLOT %d %s %s", + slot, mode, + (char *) node2->name); + if (err != NULL) *err = NULL; + if (!reply) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + return 0; + } + return 1; +} + +static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int timeout, + int pipeline, int verbose, + char **err) +{ + int success = 1; + while (1) { + redisReply *reply = NULL, *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " + "GETKEYSINSLOT %d %d", slot, + pipeline); + success = (reply != NULL); + if (!success) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + assert(reply->type == REDIS_REPLY_ARRAY); + size_t count = reply->elements; + if (count == 0) { + freeReplyObject(reply); + break; + } + char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + /* Calling MIGRATE command. */ + size_t argc = count + 8; + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + argv[6] = "REPLACE"; + argv_len[6] = 7; + argv[7] = "KEYS"; + argv_len[7] = 4; + for (size_t i = 0; i < count; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + 8; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (verbose) dots[i] = '.'; + } + if (verbose) dots[count] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); + if (!success) goto next; + migrate_reply = (redisReply *) _reply; + if (migrate_reply->type == REDIS_REPLY_ERROR) { + // TODO: Implement fix. + success = 0; + if (err != NULL) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + if (verbose) { + printf("%s", dots); + fflush(stdout); + } +next: + if (reply != NULL) freeReplyObject(reply); + if (migrate_reply != NULL) freeReplyObject(migrate_reply); + zfree(argv); + zfree(argv_len); + if (!success) break; + } + return success; +} + +/* Move slots between source and target nodes using MIGRATE. + * + * Options: + * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key. + * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots / + * reconfiguring the nodes. + * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes. + * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages. +*/ +static int clusterManagerMoveSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int opts, char**err) +{ + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) { + printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip, + source->port, target->ip, target->port); + fflush(stdout); + } + if (err != NULL) *err = NULL; + int pipeline = config.cluster_manager_command.pipeline, + timeout = config.cluster_manager_command.timeout, + print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE), + option_cold = (opts & CLUSTER_MANAGER_OPT_COLD), + success = 1; + if (!option_cold) { + success = clusterManagerSetSlot(target, source, slot, + "importing", err); + if (!success) return 0; + success = clusterManagerSetSlot(source, target, slot, + "migrating", err); + if (!success) return 0; + } + success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout, + pipeline, print_dots, err); + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n"); + if (!success) return 0; + /* Set the new node as the owner of the slot in all the known nodes. */ + if (!option_cold) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER " + "SETSLOT %d %s %s", + slot, "node", + target->name); + success = (r != NULL); + if (!success) return 0; + if (r->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + } + } + freeReplyObject(r); + if (!success) return 0; + } + } + /* Update the node logical config */ + if (opts & CLUSTER_MANAGER_OPT_UPDATE) { + source->slots[slot] = 0; + target->slots[slot] = 1; + } + return 1; +} + /* Flush the dirty node configuration by calling replicate for slaves or * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { @@ -2425,20 +2688,24 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; remaining -= (p - line); char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds dst = sdsnew(p); node->migrating_count += 2; @@ -2451,6 +2718,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } else if ((p = strstr(slotsdef, "-<-"))) {//Importing *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds src = sdsnew(p); node->importing_count += 2; @@ -2605,8 +2874,9 @@ invalid_friend: if (n->replicate != NULL) { clusterManagerNode *master = clusterManagerNodeByName(n->replicate); if (master == NULL) { - printf("*** WARNING: %s:%d claims to be slave of unknown " - "node ID %s.\n", n->ip, n->port, n->replicate); + clusterManagerLogWarn("*** WARNING: %s:%d claims to be " + "slave of unknown node ID %s.\n", + n->ip, n->port, n->replicate); } else master->replicas_count++; } } @@ -2619,6 +2889,12 @@ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { return strcmp(*i1, *i2); } +int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node2->slots_count - node1->slots_count; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -2651,16 +2927,18 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { if (remaining == 0) continue; char **slots = NULL; int c = 0; - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; int size = (p - line); remaining -= size; tot_size += size; char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] != '[') { c++; slots = zrealloc(slots, (c * sizeof(char *))); @@ -2792,7 +3070,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->migrating_count; i += 2) { sds slot = n->migrating[i]; - dictAdd(open_slots, slot, n->migrating[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->migrating[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2810,7 +3088,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->importing_count; i += 2) { sds slot = n->importing[i]; - dictAdd(open_slots, slot, n->importing[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->importing[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2848,6 +3126,76 @@ static void clusterManagerCheckCluster(int quiet) { } } +static clusterManagerNode *clusterNodeForResharding(char *id, + clusterManagerNode *target, + int *raise_err) +{ + clusterManagerNode *node = NULL; + const char *invalid_node_msg = "*** The specified node is not known or " + "not a master, please retry.\n"; + node = clusterManagerNodeByName(id); + *raise_err = 0; + if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { + clusterManagerLogErr(invalid_node_msg); + *raise_err = 1; + return NULL; + } else if (node != NULL && target != NULL) { + if (!strcmp(node->name, target->name)) { + clusterManagerLogErr( "*** It is not possible to use " + "the target node as " + "source node.\n"); + return NULL; + } + } + return node; +} + +static list *clusterManagerComputeReshardTable(list *sources, int numslots) { + list *moved = listCreate(); + int src_count = listLength(sources), i = 0, tot_slots = 0, j; + clusterManagerNode **sorted = zmalloc(src_count * sizeof(**sorted)); + listIter li; + listNode *ln; + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + tot_slots += node->slots_count; + sorted[i++] = node; + } + qsort(sorted, src_count, sizeof(clusterManagerNode *), + clusterManagerSlotCountCompareDesc); + for (i = 0; i < src_count; i++) { + clusterManagerNode *node = sorted[i]; + float n = ((float) numslots / tot_slots * node->slots_count); + if (i == 0) n = ceil(n); + else n = floor(n); + int max = (int) n, count = 0; + for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) { + int slot = node->slots[j]; + if (!slot) continue; + if (count >= max || (int)listLength(moved) >= numslots) break; + clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + item->source = node; + item->slot = j; + listAddNodeTail(moved, item); + count++; + } + } + zfree(sorted); + return moved; +} + +static void clusterManagerShowReshardTable(list *table) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + clusterManagerNode *n = item->source; + printf(" Moving slot %d from %s\n", item->slot, (char *) n->name); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3219,59 +3567,218 @@ cleanup: static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } static int clusterManagerCommandCheck(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); clusterManagerCheckCluster(0); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandReshard(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerCheckCluster(0); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + fflush(stdout); + fprintf(stderr, + "*** Please fix your cluster problems before resharding\n"); + return 0; + } + int slots = config.cluster_manager_command.slots; + if (!slots) { + while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) { + printf("How many slots do you want to move (from 1 to %d)? ", + CLUSTER_MANAGER_SLOTS); + fflush(stdout); + char buf[6]; + int nread = read(fileno(stdin),buf,6); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + slots = atoi(buf); + } + } + char buf[255]; + char *to = config.cluster_manager_command.to, + *from = config.cluster_manager_command.from; + while (to == NULL) { + printf("What is the receiving node ID? "); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (strlen(buf) > 0) to = buf; + } + int raise_err = 0; + clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err); + if (target == NULL) return 0; + list *sources = listCreate(); + list *table = NULL; + int all = 0, result = 1; + if (from == NULL) { + printf("Please enter all the source node IDs.\n"); + printf(" Type 'all' to use all the nodes as source nodes for " + "the hash slots.\n"); + printf(" Type 'done' once you entered all the source nodes IDs.\n"); + while (1) { + printf("Source node #%lu: ", listLength(sources) + 1); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (!strcmp(buf, "done")) break; + else if (!strcmp(buf, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(buf, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + } else { + char *p; + while((p = strchr(from, ',')) != NULL) { + *p = '\0'; + if (!strcmp(from, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + from = p + 1; + } + /* Check if there's still another source to process. */ + if (!all && strlen(from) > 0) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + listIter li; + listNode *ln; + if (all) { + listEmpty(sources); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!sdscmp(n->name, target->name)) continue; + listAddNodeTail(sources, n); + } + } + if (listLength(sources) == 0) { + fprintf(stderr, "*** No source nodes given, operation aborted.\n"); + result = 0; + goto cleanup; + } + printf("\nReady to move %d slots.\n", slots); + printf(" Source nodes:\n"); + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *src = ln->value; + sds info = clusterManagerNodeInfo(src, 4); + printf("%s\n", info); + sdsfree(info); + } + printf(" Destination node:\n"); + sds info = clusterManagerNodeInfo(target, 4); + printf("%s\n", info); + sdsfree(info); + table = clusterManagerComputeReshardTable(sources, slots); + printf(" Resharding plan:\n"); + clusterManagerShowReshardTable(table); + if (!(config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_YES)) + { + printf("Do you want to proceed with the proposed " + "reshard plan (yes/no)? "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread <= 0 || strcmp("yes", buf) != 0) { + result = 0; + goto cleanup; + } + } + int opts = CLUSTER_MANAGER_OPT_VERBOSE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + char *err = NULL; + result = clusterManagerMoveSlot(item->source, target, item->slot, + opts, &err); + if (!result) { + if (err != NULL) { + clusterManagerLogErr("\n%s\n", err); + zfree(err); + } + goto cleanup; + } + } +cleanup: + listRelease(sources); + if (table) { + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } @@ -3332,13 +3839,32 @@ static int clusterManagerCommandHelp(int argc, char **argv) { sizeof(clusterManagerCommandDef); int i = 0, j; fprintf(stderr, "Cluster Manager Commands:\n"); + int padding = 15; for (; i < commands_count; i++) { clusterManagerCommandDef *def = &(clusterManagerCommands[i]); - int namelen = strlen(def->name), padlen = 15 - namelen; + int namelen = strlen(def->name), padlen = padding - namelen; fprintf(stderr, " %s", def->name); for (j = 0; j < padlen; j++) fprintf(stderr, " "); fprintf(stderr, "%s\n", (def->args ? def->args : "")); - //TODO: if (def->options) + if (def->options != NULL) { + int optslen = strlen(def->options); + char *p = def->options, *eos = p + optslen; + char *comma = NULL; + while ((comma = strchr(p, ',')) != NULL) { + int deflen = (int)(comma - p); + char buf[255]; + memcpy(buf, p, deflen); + buf[deflen] = '\0'; + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", buf); + p = comma + 1; + if (p >= eos) break; + } + if (p < eos) { + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", p); + } + } } return 0; } @@ -4640,6 +5166,11 @@ int main(int argc, char **argv) { config.cluster_manager_command.argv = NULL; config.cluster_manager_command.flags = 0; config.cluster_manager_command.replicas = 0; + config.cluster_manager_command.from = NULL; + config.cluster_manager_command.to = NULL; + config.cluster_manager_command.slots = 0; + config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; + config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; pref.hints = 1; spectrum_palette = spectrum_palette_color; From fb41b8bb9c6de44ebfd56759efcd7f3edc22aedc Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 11:49:10 +0100 Subject: [PATCH 16/66] Fixed memory write error in clusterManagerGetConfigSignature --- src/redis-cli.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 68ae7cfa..366c36fa 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2295,7 +2295,7 @@ static void clusterManagerShowNodes(void) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; sds info = clusterManagerNodeInfo(node, 0); - printf("%s\n", info); + printf("%s\n", (char *) info); sdsfree(info); } } @@ -2916,8 +2916,8 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { line = p + 1; if (i == 0) { nodename = token; - tot_size = p - token; - name_len = tot_size; + tot_size = (p - token); + name_len = tot_size++; // Make room for ':' in tot_size } else if (i == 8) break; i++; } @@ -2951,6 +2951,7 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { node_count++; node_configs = zrealloc(node_configs, (node_count * sizeof(char *))); + /* Make room for '|' separators. */ tot_size += (sizeof(char) * (c - 1)); char *cfg = zmalloc((sizeof(char) * tot_size) + 1); memcpy(cfg, nodename, name_len); @@ -3760,7 +3761,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { opts, &err); if (!result) { if (err != NULL) { - clusterManagerLogErr("\n%s\n", err); + //clusterManagerLogErr("\n%s\n", err); zfree(err); } goto cleanup; From ce14d23740a465e0ed2daa65288c397c311eda95 Mon Sep 17 00:00:00 2001 From: Artix Date: Wed, 28 Feb 2018 15:21:08 +0100 Subject: [PATCH 17/66] Cluster Manager: fixed some memory error --- src/redis-cli.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 366c36fa..64ec48b5 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2412,14 +2412,19 @@ static int clusterManagerSetSlot(clusterManagerNode *node1, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; + int success = 1; if (reply->type == REDIS_REPLY_ERROR) { + success = 0; if (err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, err); } - return 0; + goto cleanup; } - return 1; +cleanup: + freeReplyObject(reply); + return success; } static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, @@ -3175,7 +3180,7 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) { int slot = node->slots[j]; if (!slot) continue; if (count >= max || (int)listLength(moved) >= numslots) break; - clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + clusterManagerReshardTableItem *item = zmalloc(sizeof(*item)); item->source = node; item->slot = j; listAddNodeTail(moved, item); From d518733073de78f6fa3bb1dbf3992639dd672138 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 2 Mar 2018 17:06:50 +0100 Subject: [PATCH 18/66] ClusterManager: fixed --cluster-from 'all' parsing --- src/redis-cli.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 64ec48b5..fe73f4a4 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3137,12 +3137,12 @@ static clusterManagerNode *clusterNodeForResharding(char *id, int *raise_err) { clusterManagerNode *node = NULL; - const char *invalid_node_msg = "*** The specified node is not known or " - "not a master, please retry.\n"; + const char *invalid_node_msg = "*** The specified node (%s) is not known " + "or not a master, please retry.\n"; node = clusterManagerNodeByName(id); *raise_err = 0; if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { - clusterManagerLogErr(invalid_node_msg); + clusterManagerLogErr(invalid_node_msg, id); *raise_err = 1; return NULL; } else if (node != NULL && target != NULL) { @@ -3700,12 +3700,15 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } /* Check if there's still another source to process. */ if (!all && strlen(from) > 0) { - clusterManagerNode *src = - clusterNodeForResharding(from, target, &raise_err); - if (src != NULL) listAddNodeTail(sources, src); - else if (raise_err) { - result = 0; - goto cleanup; + if (!strcmp(from, "all")) all = 1; + if (!all) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } } } } From a4cfd503ea1f5b112032e9edf7b564d5d7eced93 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 6 Mar 2018 13:06:04 +0200 Subject: [PATCH 19/66] clusterManagerAddSlots: changed the way ADDSLOTS command is built --- src/redis-cli.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index fe73f4a4..e2b1fb2f 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2354,32 +2354,28 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) redisReply *reply = NULL; void *_reply = NULL; int is_err = 0, success = 1; - int argc; - sds *argv = NULL; - size_t *argvlen = NULL; + /* First two args are used for the command itself. */ + int argc = node->slots_count + 2; + sds *argv = zmalloc(argc * sizeof(*argv)); + size_t *argvlen = zmalloc(argc * sizeof(*argvlen)); + argv[0] = "CLUSTER"; + argv[1] = "ADDSLOTS"; + argvlen[0] = 7; + argvlen[1] = 8; *err = NULL; - sds cmd = sdsnew("CLUSTER ADDSLOTS "); - int i, added = 0; + int i, argv_idx = 2; for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { - int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (argv_idx >= argc) break; if (node->slots[i]) { - char *fmt = (!last_slot ? "%u " : "%u"); - cmd = sdscatfmt(cmd, fmt, i); - added++; + argv[argv_idx] = sdsfromlonglong((long long) i); + argvlen[argv_idx] = sdslen(argv[argv_idx]); + argv_idx++; } } - if (!added) { + if (!argv_idx) { success = 0; goto cleanup; } - argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) { - success = 0; - goto cleanup; - } - argvlen = zmalloc(argc*sizeof(size_t)); - for (i = 0; i < argc; i++) - argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { success = 0; @@ -2395,9 +2391,11 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } cleanup: - sdsfree(cmd); zfree(argvlen); - if (argv != NULL) sdsfreesplitres(argv,argc); + if (argv != NULL) { + for (i = 2; i < argc; i++) sdsfree(argv[i]); + zfree(argv); + } if (reply != NULL) freeReplyObject(reply); return success; } From d8fc307cc65c909ed8c7f801fcd757ef1f9e68b7 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 23 Mar 2018 16:46:43 +0100 Subject: [PATCH 20/66] Cluster Manager: rebalance command --- src/redis-cli.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 11 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index e2b1fb2f..69ba39ac 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -71,6 +71,7 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 +#define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ "Invalid arguments: you need to pass either a valid " \ @@ -108,10 +109,13 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 -#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 -#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 +#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 +#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 +#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -157,9 +161,12 @@ typedef struct clusterManagerCommand { int replicas; char *from; char *to; + char **weight; + int weight_argc; int slots; int timeout; int pipeline; + float threshold; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -206,6 +213,7 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + int verbose; clusterManagerCommand cluster_manager_command; } config; @@ -1266,6 +1274,8 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--verbose")) { + config.verbose = 1; } else if (!strcmp(argv[i],"--cluster") && !lastarg) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; @@ -1282,15 +1292,35 @@ static int parseOptions(int argc, char **argv) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) { + int widx = i + 1; + char **weight = argv + widx; + int wargc = 0; + for (; widx < argc; widx++) { + if (strstr(argv[widx], "--") == argv[widx]) break; + wargc++; + } + if (wargc > 0) { + config.cluster_manager_command.weight = weight; + config.cluster_manager_command.weight_argc = wargc; + } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { config.cluster_manager_command.timeout = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) { + config.cluster_manager_command.threshold = atof(argv[++i]); } else if (!strcmp(argv[i],"--cluster-yes")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_YES; + } else if (!strcmp(argv[i],"--cluster-simulate")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1390,6 +1420,7 @@ static void usage(void) { " are not rolled back from the server memory.\n" " --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" +" --verbose Verbose mode.\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" @@ -1749,6 +1780,8 @@ typedef struct clusterManagerNode { sds *importing; int migrating_count; int importing_count; + float weight; /* Weight used by rebalance */ + int balance; /* Used by rebalance */ } clusterManagerNode; /* Data structure used to represent a sequence of nodes. */ @@ -1780,6 +1813,7 @@ typedef int clusterManagerCommandProc(int argc, char **argv); static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static clusterManagerNode *clusterManagerNodeByName(const char *name); +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1813,6 +1847,7 @@ static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); +static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1831,6 +1866,9 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, + {"rebalance", clusterManagerCommandRebalance, -1, "host:port", + "weight ,use-empty-masters," + "timeout ,simulate,pipeline ,threshold "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1970,10 +2008,13 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; + node->weight = 1.0f; + node->balance = 0; clusterManagerNodeResetSlots(node); return node; } +/* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -1994,6 +2035,32 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } +/* Like get_node_by_name but the specified name can be just the first + * part of the node ID as long as the prefix in unique across the + * cluster. + */ +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name) +{ + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && + strstr(n->name, lcname) == n->name) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static void clusterManagerNodeResetSlots(clusterManagerNode *node) { memset(node->slots, 0, sizeof(node->slots)); node->slots_count = 0; @@ -2898,6 +2965,12 @@ int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { return node2->slots_count - node1->slots_count; } +int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node1->balance - node2->balance; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -3200,6 +3273,19 @@ static void clusterManagerShowReshardTable(list *table) { } } +static void clusterManagerReleaseReshardTable(list *table) { + if (table != NULL) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3775,14 +3861,199 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } cleanup: listRelease(sources); - if (table) { - listRewind(table, &li); - while ((ln = listNext(&li)) != NULL) { - clusterManagerReshardTableItem *item = ln->value; - zfree(item); - } - listRelease(table); + clusterManagerReleaseReshardTable(table); + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandRebalance(int argc, char **argv) { + int port = 0; + char *ip = NULL; + clusterManagerNode **weightedNodes = NULL; + list *involved = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int result = 1, i; + if (config.cluster_manager_command.weight != NULL) { + for (i = 0; i < config.cluster_manager_command.weight_argc; i++) { + char *name = config.cluster_manager_command.weight[i]; + char *p = strchr(name, '='); + if (p == NULL) { + result = 0; + goto cleanup; + } + *p = '\0'; + float w = atof(++p); + clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name); + if (n == NULL) { + clusterManagerLogErr("*** No such master node %s\n", name); + result = 0; + goto cleanup; + } + n->weight = w; + } } + float total_weight = 0; + int nodes_involved = 0; + int use_empty = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; + + involved = listCreate(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + /* Compute the total cluster weight. */ + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!use_empty && n->slots_count == 0) { + n->weight = 0; + continue; + } + total_weight += n->weight; + nodes_involved++; + listAddNodeTail(involved, n); + } + weightedNodes = zmalloc(nodes_involved * + sizeof(clusterManagerNode *)); + if (weightedNodes == NULL) goto cleanup; + /* Check cluster, only proceed if it looks sane. */ + clusterManagerCheckCluster(1); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + clusterManagerLogErr("*** Please fix your cluster problems " + "before rebalancing" ); + result = 0; + goto cleanup; + } + /* Calculate the slots balance for each node. It's the number of + * slots the node should lose (if positive) or gain (if negative) + * in order to be balanced. */ + int threshold_reached = 0, total_balance = 0; + float threshold = config.cluster_manager_command.threshold; + i = 0; + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + weightedNodes[i++] = n; + int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + (int) n->weight); + n->balance = n->slots_count - expected; + total_balance += n->balance; + /* Compute the percentage of difference between the + * expected number of slots and the real one, to see + * if it's over the threshold specified by the user. */ + int over_threshold = 0; + if (config.cluster_manager_command.threshold > 0) { + if (n->slots_count > 0) { + float err_perc = fabs((100-(100.0*expected/n->slots_count))); + if (err_perc > threshold) over_threshold = 1; + } else if (expected > 1) { + over_threshold = 1; + } + } + if (over_threshold) threshold_reached = 1; + } + if (!threshold_reached) { + clusterManagerLogErr("*** No rebalancing needed! " + "All nodes are within the %.2f%% threshold.\n", + config.cluster_manager_command.threshold); + result = 0; + goto cleanup; + } + /* Because of rounding, it is possible that the balance of all nodes + * summed does not give 0. Make sure that nodes that have to provide + * slots are always matched by nodes receiving slots. */ + while (total_balance > 0) { + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->balance < 0 && total_balance > 0) { + n->balance--; + total_balance--; + } + } + } + /* Sort nodes by their slots balance. */ + qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *), + clusterManagerCompareNodeBalance); + clusterManagerLogInfo(">>> Rebalancing across %d nodes. " + "Total weight = %.2f\n", + nodes_involved, total_weight); + if (config.verbose) { + for (i = 0; i < nodes_involved; i++) { + clusterManagerNode *n = weightedNodes[i]; + printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance); + } + } + /* Now we have at the start of the 'sn' array nodes that should get + * slots, at the end nodes that must give slots. + * We take two indexes, one at the start, and one at the end, + * incrementing or decrementing the indexes accordingly til we + * find nodes that need to get/provide slots. */ + int dst_idx = 0; + int src_idx = nodes_involved - 1; + int simulate = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + while (dst_idx < src_idx) { + clusterManagerNode *dst = weightedNodes[dst_idx]; + clusterManagerNode *src = weightedNodes[src_idx]; + int db = abs(dst->balance); + int sb = abs(src->balance); + int numslots = (db < sb ? db : sb); + if (numslots > 0) { + printf("Moving %d slots from %s:%d to %s:%d\n", numslots, + src->ip, + src->port, + dst->ip, + dst->port); + /* Actaully move the slots. */ + list *lsrc = listCreate(), *table = NULL; + listAddNodeTail(lsrc, src); + table = clusterManagerComputeReshardTable(lsrc, numslots); + listRelease(lsrc); + int table_len = (int) listLength(table); + if (!table || table_len != numslots) { + clusterManagerLogErr("*** Assertio failed: Reshard table " + "!= number of slots"); + result = 0; + goto end_move; + } + if (simulate) { + for (i = 0; i < table_len; i++) printf("#"); + } else { + int opts = CLUSTER_MANAGER_OPT_QUIET | + CLUSTER_MANAGER_OPT_UPDATE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + result = clusterManagerMoveSlot(item->source, + dst, + item->slot, + opts, NULL); + if (!result) goto end_move; + printf("#"); + fflush(stdout); + } + + } + printf("\n"); +end_move: + clusterManagerReleaseReshardTable(table); + if (!result) goto cleanup; + } + /* Update nodes balance. */ + dst->balance += numslots; + src->balance -= numslots; + if (dst->balance == 0) dst_idx++; + if (src->balance == 0) src_idx --; + } +cleanup: + if (involved != NULL) listRelease(involved); + if (weightedNodes != NULL) zfree(weightedNodes); return result; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5168,6 +5439,7 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; + config.verbose = 0; config.cluster_manager_command.name = NULL; config.cluster_manager_command.argc = 0; config.cluster_manager_command.argv = NULL; @@ -5175,9 +5447,12 @@ int main(int argc, char **argv) { config.cluster_manager_command.replicas = 0; config.cluster_manager_command.from = NULL; config.cluster_manager_command.to = NULL; + config.cluster_manager_command.weight = NULL; config.cluster_manager_command.slots = 0; config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; + config.cluster_manager_command.threshold = + CLUSTER_MANAGER_REBALANCE_THRESHOLD; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 8969254e669d89f882200cb2848a316d05ea4b04 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 6 Apr 2018 18:02:40 +0200 Subject: [PATCH 21/66] Cluster Manager: fix command. --- src/redis-cli.c | 715 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 642 insertions(+), 73 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 69ba39ac..8af1130c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -151,6 +151,7 @@ static uint64_t dictSdsHash(const void *key); static int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); static void dictSdsDestructor(void *privdata, void *val); +static void dictListDestructor(void *privdata, void *val); /* Cluster Manager Command Info */ typedef struct clusterManagerCommand { @@ -406,6 +407,12 @@ static void dictSdsDestructor(void *privdata, void *val) sdsfree(val); } +void dictListDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + listRelease((list*)val); +} + /* _serverAssert is needed by dict */ void _serverAssert(const char *estr, const char *file, int line) { fprintf(stderr, "=== ASSERTION FAILED ==="); @@ -1446,6 +1453,15 @@ static void usage(void) { exit(1); } +static int confirmWithYes(char *msg) { + printf("%s (type 'yes' to accept): ", msg); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + return (nread != 0 && !strcmp("yes", buf)); +} + /* Turn the plain C strings into Sds strings */ static char **convertToSds(int count, char** args) { int j; @@ -1751,7 +1767,7 @@ static int evalMode(int argc, char **argv) { } /*------------------------------------------------------------------------------ - * Cluster Manager mode + * Cluster Manager *--------------------------------------------------------------------------- */ /* The Cluster Manager global structure */ @@ -1760,6 +1776,9 @@ static struct clusterManager { list *errors; } cluster_manager; +/* Used by clusterManagerFixSlotsCoverage */ +dict *clusterManagerUncoveredSlots = NULL; + typedef struct clusterManagerNode { redisContext *context; sds name; @@ -1776,10 +1795,12 @@ typedef struct clusterManagerNode { int slots_count; int replicas_count; list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; + sds *migrating; /* An array of sds where even strings are slots and odd + * strings are the destination node IDs. */ + sds *importing; /* An array of sds where even strings are slots and odd + * strings are the source node IDs. */ + int migrating_count; /* Length of the migrating array (migrating slots*2) */ + int importing_count; /* Length of the importing array (importing slots*2) */ float weight; /* Weight used by rebalance */ int balance; /* Used by rebalance */ } clusterManagerNode; @@ -1829,7 +1850,7 @@ static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); +static int clusterManagerCheckCluster(int quiet); static void clusterManagerLog(int level, const char* fmt, ...); static int clusterManagerIsConfigConsistent(void); static void clusterManagerOnError(sds err); @@ -1846,6 +1867,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); @@ -1863,6 +1885,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, @@ -1988,6 +2011,8 @@ static void freeClusterManager(void) { listRelease(cluster_manager.errors); cluster_manager.errors = NULL; } + if (clusterManagerUncoveredSlots != NULL) + dictRelease(clusterManagerUncoveredSlots); } static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { @@ -2013,6 +2038,38 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } +/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the + * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * of reply error (it's up to the caller function to free it), elsewhere + * the error is directly printed. */ +static int clusterManagerCheckRedisReply(clusterManagerNode *n, + redisReply *r, char **err) +{ + int is_err = 0; + if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) { + if (is_err) { + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str); + } + return 0; + } + return 1; +} + +static void clusterManagerRemoveNodeFromList(list *nodelist, + clusterManagerNode *node) { + listIter li; + listNode *ln; + listRewind(nodelist, &li); + while ((ln = listNext(&li)) != NULL) { + if (node == ln->value) { + listDelNode(nodelist, ln); + break; + } + } +} /* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { @@ -2470,10 +2527,10 @@ cleanup: /* Set slot status to "importing" or "migrating" */ static int clusterManagerSetSlot(clusterManagerNode *node1, clusterManagerNode *node2, - int slot, const char *mode, char **err) { + int slot, const char *status, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " "SETSLOT %d %s %s", - slot, mode, + slot, status, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; @@ -2492,6 +2549,70 @@ cleanup: return success; } +/* Migrate keys taken from reply->elements. It returns the reply from the + * MIGRATE command, or NULL if something goes wrong. If the argument 'dots' + * is not NULL, a dot will be printed for every migrated key. */ +static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source, + clusterManagerNode *target, + redisReply *reply, + int replace, int timeout, + char *dots) +{ + redisReply *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + int c = (replace ? 8 : 7); + size_t argc = c + reply->elements; + size_t i, offset = 6; // Keys Offset + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + if (replace) { + argv[offset] = "REPLACE"; + argv_len[offset] = 7; + offset++; + } + argv[offset] = "KEYS"; + argv_len[offset] = 4; + offset++; + for (i = 0; i < reply->elements; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + offset; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (dots) dots[i] = '.'; + } + if (dots) dots[reply->elements] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + int success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]); + if (!success) goto cleanup; + migrate_reply = (redisReply *) _reply; +cleanup: + zfree(argv); + zfree(argv_len); + return migrate_reply; +} + +/* Migrate all keys in the given slot from source to target.*/ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, clusterManagerNode *target, int slot, int timeout, @@ -2499,10 +2620,11 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, char **err) { int success = 1; + int do_fix = (config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX); while (1) { + char *dots = NULL; redisReply *reply = NULL, *migrate_reply = NULL; - char **argv = NULL; - size_t *argv_len = NULL; reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " "GETKEYSINSLOT %d %d", slot, pipeline); @@ -2523,57 +2645,37 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, freeReplyObject(reply); break; } - char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + if (verbose) dots = zmalloc((count+1) * sizeof(char)); /* Calling MIGRATE command. */ - size_t argc = count + 8; - argv = zcalloc(argc * sizeof(char *)); - argv_len = zcalloc(argc * sizeof(size_t)); - char portstr[255]; - char timeoutstr[255]; - snprintf(portstr, 10, "%d", target->port); - snprintf(timeoutstr, 10, "%d", timeout); - argv[0] = "MIGRATE"; - argv_len[0] = 7; - argv[1] = target->ip; - argv_len[1] = strlen(target->ip); - argv[2] = portstr; - argv_len[2] = strlen(portstr); - argv[3] = ""; - argv_len[3] = 0; - argv[4] = "0"; - argv_len[4] = 1; - argv[5] = timeoutstr; - argv_len[5] = strlen(timeoutstr); - argv[6] = "REPLACE"; - argv_len[6] = 7; - argv[7] = "KEYS"; - argv_len[7] = 4; - for (size_t i = 0; i < count; i++) { - redisReply *entry = reply->element[i]; - size_t idx = i + 8; - assert(entry->type == REDIS_REPLY_STRING); - argv[idx] = (char *) sdsnew(entry->str); - argv_len[idx] = entry->len; - if (verbose) dots[i] = '.'; - } - if (verbose) dots[count] = '\0'; - void *_reply = NULL; - redisAppendCommandArgv(source->context,argc, - (const char**)argv,argv_len); - success = (redisGetReply(source->context, &_reply) == REDIS_OK); - for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); - if (!success) goto next; - migrate_reply = (redisReply *) _reply; + migrate_reply = clusterManagerMigrateKeysInReply(source, target, + reply, 0, timeout, + dots); + if (migrate_reply == NULL) goto next; if (migrate_reply->type == REDIS_REPLY_ERROR) { - // TODO: Implement fix. - success = 0; - if (err != NULL) { - *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); - strcpy(*err, migrate_reply->str); - printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + if (do_fix && strstr(migrate_reply->str, "BUSYKEY")) { + clusterManagerLogWarn("*** Target key exists. " + "Replacing it for FIX.\n"); + freeReplyObject(migrate_reply); + /* Try to migrate keys adding REPLACE option. */ + migrate_reply = clusterManagerMigrateKeysInReply(source, + target, + reply, + 1, timeout, + NULL); + success = (migrate_reply != NULL && + migrate_reply->type != REDIS_REPLY_ERROR); + } else success = 0; + if (!success) { + if (migrate_reply != NULL) { + if (err) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + } + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; } - goto next; } if (verbose) { printf("%s", dots); @@ -2582,8 +2684,7 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, next: if (reply != NULL) freeReplyObject(reply); if (migrate_reply != NULL) freeReplyObject(migrate_reply); - zfree(argv); - zfree(argv_len); + if (dots) zfree(dots); if (!success) break; } return success; @@ -2729,6 +2830,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, *link_status = NULL; + UNUSED(link_status); int i = 0; while ((p = strchr(line, ' ')) != NULL) { *p = '\0'; @@ -2974,11 +3076,11 @@ int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; + char **node_configs = NULL; redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); if (reply == NULL || reply->type == REDIS_REPLY_ERROR) goto cleanup; char *lines = reply->str, *p, *line; - char **node_configs = NULL; while ((p = strstr(lines, "\n")) != NULL) { i = 0; *p = '\0'; @@ -3057,8 +3159,10 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { } cleanup: if (reply != NULL) freeReplyObject(reply); - for (i = 0; i < node_count; i++) zfree(node_configs[i]); - zfree(node_configs); + if (node_configs != NULL) { + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + } return signature; } @@ -3114,9 +3218,453 @@ static int clusterManagerGetCoveredSlots(char *all_slots) { return totslots; } -static void clusterManagerCheckCluster(int quiet) { +static void clusterManagerPrintSlotsList(list *slots) { + listIter li; + listNode *ln; + listRewind(slots, &li); + sds first = NULL; + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + if (!first) first = slot; + else printf(", "); + printf("%s", slot); + } + printf("\n"); +} + +/* Return the node, among 'nodes' with the greatest number of keys + * in the specified slot. */ +static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, + int slot, + char **err) +{ + clusterManagerNode *node = NULL; + int numkeys = 0; + listIter li; + listNode *ln; + listRewind(nodes, &li); + if (err) *err = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *r = + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + int success = clusterManagerCheckRedisReply(n, r, err); + if (success) { + if (r->integer > numkeys || node == NULL) { + numkeys = r->integer; + node = n; + } + } + if (r != NULL) freeReplyObject(r); + /* If the reply contains errors */ + if (!success) { + if (err != NULL && *err != NULL) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + node = NULL; + break; + } + } + return node; +} + +static int clusterManagerFixSlotsCoverage(char *all_slots) { + int i, fixed = 0; + list *none = NULL, *single = NULL, *multi = NULL; + clusterManagerLogInfo(">>> Fixing slots coverage...\n"); + printf("List of not covered slots: \n"); + int uncovered_count = 0; + sds log = sdsempty(); + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int covered = all_slots[i]; + if (!covered) { + sds key = sdsfromlonglong((long long) i); + if (uncovered_count++ > 0) printf(","); + printf("%s", (char *) key); + list *slot_nodes = listCreate(); + sds slot_nodes_str = sdsempty(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", i, 1); + if (!clusterManagerCheckRedisReply(n, reply, NULL)) { + fixed = -1; + if (reply) freeReplyObject(reply); + goto cleanup; + } + assert(reply->type == REDIS_REPLY_ARRAY); + if (reply->elements > 0) { + listAddNodeTail(slot_nodes, n); + if (listLength(slot_nodes) > 1) + slot_nodes_str = sdscat(slot_nodes_str, ", "); + slot_nodes_str = sdscatfmt(slot_nodes_str, + "%s:%u", n->ip, n->port); + } + freeReplyObject(reply); + } + log = sdscatfmt(log, "\nSlot %S has keys in %u nodes: %S", + key, listLength(slot_nodes), slot_nodes_str); + sdsfree(slot_nodes_str); + dictAdd(clusterManagerUncoveredSlots, key, slot_nodes); + } + } + printf("\n%s\n", log); + /* For every slot, take action depending on the actual condition: + * 1) No node has keys for this slot. + * 2) A single node has keys for this slot. + * 3) Multiple nodes have keys for this slot. */ + none = listCreate(); + single = listCreate(); + multi = listCreate(); + dictIterator *iter = dictGetIterator(clusterManagerUncoveredSlots); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + list *nodes = (list *) dictGetVal(entry); + switch (listLength(nodes)){ + case 0: listAddNodeTail(none, slot); break; + case 1: listAddNodeTail(single, slot); break; + default: listAddNodeTail(multi, slot); break; + } + } + dictReleaseIterator(iter); + + /* Handle case "1": keys in no node. */ + if (listLength(none) > 0) { + printf("The following uncovered slots have no keys " + "across the cluster:\n"); + clusterManagerPrintSlotsList(none); + if (confirmWithYes("Fix these slots by covering with a random node?")){ + srand(time(NULL)); + listIter li; + listNode *ln; + listRewind(none, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + long idx = (long) (rand() % listLength(cluster_manager.nodes)); + listNode *node_n = listIndex(cluster_manager.nodes, idx); + assert(node_n != NULL); + clusterManagerNode *n = node_n->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "2": keys only in one node. */ + if (listLength(single) > 0) { + printf("The following uncovered slots have keys in just one node:\n"); + clusterManagerPrintSlotsList(single); + if (confirmWithYes("Fix these slots by covering with those nodes?")){ + listIter li; + listNode *ln; + listRewind(single, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + listNode *fn = listFirst(nodes); + assert(fn != NULL); + clusterManagerNode *n = fn->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "3": keys in multiple nodes. */ + if (listLength(multi) > 0) { + printf("The folowing uncovered slots have keys in multiple nodes:\n"); + clusterManagerPrintSlotsList(multi); + if (confirmWithYes("Fix these slots by moving keys " + "into a single node?")) { + listIter li; + listNode *ln; + listRewind(multi, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + int s = atoi(slot); + clusterManagerNode *target = + clusterManagerGetNodeWithMostKeysInSlot(nodes, s, NULL); + if (target == NULL) { + fixed = -1; + goto cleanup; + } + clusterManagerLogInfo(">>> Covering slot %s moving keys " + "to %s:%d\n", slot, + target->ip, target->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER SETSLOT %s %s", slot, "STABLE"); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + listIter nli; + listNode *nln; + listRewind(nodes, &nli); + while ((nln = listNext(&nli)) != NULL) { + clusterManagerNode *src = nln->value; + if (src == target) continue; + /* Set the source node in 'importing' state + * (even if we will actually migrate keys away) + * in order to avoid receiving redirections + * for MIGRATE. */ + redisReply *r = CLUSTER_MANAGER_COMMAND(src, + "CLUSTER SETSLOT %s %s %s", slot, + "IMPORTING", target->name); + if (!clusterManagerCheckRedisReply(target, r, NULL)) + fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + int opts = CLUSTER_MANAGER_OPT_VERBOSE | + CLUSTER_MANAGER_OPT_COLD; + if (!clusterManagerMoveSlot(src, target, s, opts, NULL)) { + fixed = -1; + goto cleanup; + } + } + fixed++; + } + } + } +cleanup: + sdsfree(log); + if (none) listRelease(none); + if (single) listRelease(single); + if (multi) listRelease(multi); + return fixed; +} + +/* Slot 'slot' was found to be in importing or migrating state in one or + * more nodes. This function fixes this condition by migrating keys where + * it seems more sensible. */ +static int clusterManagerFixOpenSlot(int slot) { + clusterManagerLogInfo(">>> Fixing open slot %d\n", slot); + /* Try to obtain the current slot owner, according to the current + * nodes configuration. */ + int success = 1; + list *owners = listCreate(); + list *migrating = listCreate(); + list *importing = listCreate(); + sds migrating_str = sdsempty(); + sds importing_str = sdsempty(); + clusterManagerNode *owner = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots[slot]) { + if (owner == NULL) owner = n; + listAddNodeTail(owners, n); + } + } + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->migrating) { + for (int i = 0; i < n->migrating_count; i += 2) { + sds migrating_slot = n->migrating[i]; + if (atoi(migrating_slot) == slot) { + char *sep = (listLength(migrating) == 0 ? "" : ","); + migrating_str = sdscatfmt(migrating_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(migrating, n); + break; + } + } + } + if (n->importing) { + for (int i = 0; i < n->importing_count; i += 2) { + sds importing_slot = n->importing[i]; + if (atoi(importing_slot) == slot) { + char *sep = (listLength(importing) == 0 ? "" : ","); + importing_str = sdscatfmt(importing_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(importing, n); + break; + } + } + } + } + printf("Set as migrating in: %s\n", migrating_str); + printf("Set as importing in: %s\n", importing_str); + /* If there is no slot owner, set as owner the slot with the biggest + * number of keys, among the set of migrating / importing nodes. */ + if (owner == NULL) { + clusterManagerLogInfo(">>> Nobody claims ownership, " + "selecting an owner...\n"); + owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes, + slot, NULL); + // If we still don't have an owner, we can't fix it. + if (owner == NULL) { + clusterManagerLogErr("[ERR] Can't select a slot owner. " + "Impossible to fix.\n"); + success = 0; + goto cleanup; + } + + // Use ADDSLOTS to assign the slot. + printf("*** Configuring %s:%d as the slot owner\n", owner->ip, + owner->port); + redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " + "SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER ADDSLOTS %d", slot); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Make sure this information will propagate. Not strictly needed + * since there is no past owner, so all the other nodes will accept + * whatever epoch this node will claim the slot with. */ + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Remove the owner from the list of migrating/importing + * nodes. */ + clusterManagerRemoveNodeFromList(migrating, owner); + clusterManagerRemoveNodeFromList(importing, owner); + } + /* If there are multiple owners of the slot, we need to fix it + * so that a single node is the owner and all the other nodes + * are in importing state. Later the fix can be handled by one + * of the base cases above. + * + * Note that this case also covers multiple nodes having the slot + * in migrating state, since migrating is a valid state only for + * slot owners. */ + if (listLength(owners) > 1) { + owner = clusterManagerGetNodeWithMostKeysInSlot(owners, slot, NULL); + listRewind(owners, &li); + redisReply *reply = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER DELSLOT %d", slot); + success = clusterManagerCheckRedisReply(n, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + success = clusterManagerSetSlot(n, owner, slot, "importing", NULL); + if (!success) goto cleanup; + clusterManagerRemoveNodeFromList(importing, n); //Avoid duplicates + listAddNodeTail(importing, n); + } + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + } + int move_opts = CLUSTER_MANAGER_OPT_VERBOSE; + /* Case 1: The slot is in migrating state in one slot, and in + * importing state in 1 slot. That's trivial to address. */ + if (listLength(migrating) == 1 && listLength(importing) == 1) { + clusterManagerNode *src = listFirst(migrating)->value; + clusterManagerNode *dst = listFirst(importing)->value; + success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); + } + /* Case 2: There are multiple nodes that claim the slot as importing, + * they probably got keys about the slot after a restart so opened + * the slot. In this case we just move all the keys to the owner + * according to the configuration. */ + else if (listLength(migrating) == 0 && listLength(importing) > 0) { + clusterManagerLogInfo(">>> Moving all the %d slot keys to its " + "owner %s:%d\n", slot, owner->ip, owner->port); + move_opts |= CLUSTER_MANAGER_OPT_COLD; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL); + if (!success) goto cleanup; + clusterManagerLogInfo(">>> Setting %d as STABLE in " + "%s:%d\n", slot, n->ip, n->port); + + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } + } else { + int try_to_close_slot = (listLength(importing) == 0 && + listLength(migrating) == 1); + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", slot, 10); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) { + if (success) try_to_close_slot = (r->elements == 0); + freeReplyObject(r); + } + if (!success) goto cleanup; + } + /* Case 3: There are no slots claiming to be in importing state, but + * there is a migrating node that actually don't have any key. We + * can just close the slot, probably a reshard interrupted in the middle. */ + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } else { + success = 0; + clusterManagerLogErr("[ERR] Sorry, redis-cli can't fix this slot " + "yet (work in progress). Slot is set as " + "migrating in %s, as importing in %s, " + "owner is %s:%d\n", migrating_str, + importing_str, owner->ip, owner->port); + } + } +cleanup: + listRelease(owners); + listRelease(migrating); + listRelease(importing); + sdsfree(migrating_str); + sdsfree(importing_str); + return success; +} + +static int clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); - if (!ln) return; + if (!ln) return 0; + int result = 1; + int do_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX; clusterManagerNode *node = ln->value; clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); @@ -3124,6 +3672,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); clusterManagerOnError(err); + result = 0; } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -3174,6 +3723,7 @@ static void clusterManagerCheckCluster(int quiet) { } } if (open_slots != NULL) { + result = 0; dictIterator *iter = dictGetIterator(open_slots); dictEntry *entry; sds errstr = sdsnew("[WARNING] The following slots are open: "); @@ -3185,6 +3735,17 @@ static void clusterManagerCheckCluster(int quiet) { } clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); + if (do_fix) { + // Fix open slots. + dictReleaseIterator(iter); + iter = dictGetIterator(open_slots); + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + result = clusterManagerFixOpenSlot(atoi(slot)); + if (!result) break; + } + } + dictReleaseIterator(iter); dictRelease(open_slots); } clusterManagerLogInfo(">>> Check slots coverage...\n"); @@ -3200,7 +3761,16 @@ static void clusterManagerCheckCluster(int quiet) { "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); clusterManagerOnError(err); + result = 0; + if (do_fix/* && result*/) { + dictType dtype = clusterManagerDictType; + dtype.valDestructor = dictListDestructor; + clusterManagerUncoveredSlots = dictCreate(&dtype, NULL); + int fixed = clusterManagerFixSlotsCoverage(slots); + if (fixed > 0) result = 1; + } } + return result; } static clusterManagerNode *clusterNodeForResharding(char *id, @@ -3546,12 +4116,7 @@ assign_replicas: } clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); - printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); - fflush(stdout); - char buf[4]; - int nread = read(fileno(stdin),buf,4); - buf[3] = '\0'; - if (nread != 0 && !strcmp("yes", buf)) { + if (confirmWithYes("Can I set the above configuration?")) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3674,13 +4239,17 @@ static int clusterManagerCommandCheck(int argc, char **argv) { clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); - clusterManagerCheckCluster(0); - return 1; + return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } +static int clusterManagerCommandFix(int argc, char **argv) { + config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX; + return clusterManagerCommandCheck(argc, argv); +} + static int clusterManagerCommandReshard(int argc, char **argv) { int port = 0; char *ip = NULL; From eaaa3202e6e8e5cded996ee057fd16fe1fd9baf0 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:25:25 +0200 Subject: [PATCH 22/66] Cluster Manager: import command --- src/Makefile | 2 +- src/redis-cli.c | 216 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 195 insertions(+), 23 deletions(-) diff --git a/src/Makefile b/src/Makefile index a5e0e231..a64454da 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.c REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 8af1130c..96bde356 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,7 +74,7 @@ #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ - "Invalid arguments: you need to pass either a valid " \ + "[ERR] Invalid arguments: you need to pass either a valid " \ "address (ie. 120.0.0.1:7000) or space separated IP " \ "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) @@ -115,7 +115,9 @@ #define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 #define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 #define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6 +#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -237,6 +239,8 @@ static long getLongInfoField(char *info, char *field); * Utility functions *--------------------------------------------------------------------------- */ +uint16_t crc16(const char *buf, int len); + static long long ustime(void) { struct timeval tv; long long ust; @@ -1325,6 +1329,12 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-simulate")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-replace")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_REPLACE; + } else if (!strcmp(argv[i],"--cluster-copy")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_COPY; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1870,6 +1880,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1892,6 +1903,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -2383,6 +2396,37 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } +/* ----------------------------------------------------------------------------- + * Key space handling + * -------------------------------------------------------------------------- */ + +/* We have 16384 hash slots. The hash slot of a given key is obtained + * as the least significant 14 bits of the crc16 of the key. + * + * However if the key contains the {...} pattern, only the part between + * { and } is hashed. This may be useful in the future to force certain + * keys to be in the same node (assuming no resharding is in progress). */ +static unsigned int keyHashSlot(char *key, int keylen) { + int s, e; /* start-end indexes of { and } */ + + for (s = 0; s < keylen; s++) + if (key[s] == '{') break; + + /* No '{' ? Hash the whole key. This is the base case. */ + if (s == keylen) return crc16(key,keylen) & 0x3FFF; + + /* '{' found? Check if we have the corresponding '}'. */ + for (e = s+1; e < keylen; e++) + if (key[e] == '}') break; + + /* No '}' or nothing between {} ? Hash the whole key. */ + if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; + + /* If we are here there is both a { and a } on its right. Hash + * what is in the middle between { and }. */ + return crc16(key+s+1,e-s-1) & 0x3FFF; +} + static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -3533,8 +3577,8 @@ static int clusterManagerFixOpenSlot(int slot) { } // Use ADDSLOTS to assign the slot. - printf("*** Configuring %s:%d as the slot owner\n", owner->ip, - owner->port); + clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n", + owner->ip, owner->port); redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " "SETSLOT %d %s", slot, "STABLE"); @@ -4527,7 +4571,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { if (over_threshold) threshold_reached = 1; } if (!threshold_reached) { - clusterManagerLogErr("*** No rebalancing needed! " + clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); result = 0; @@ -4586,7 +4630,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { listRelease(lsrc); int table_len = (int) listLength(table); if (!table || table_len != numslots) { - clusterManagerLogErr("*** Assertio failed: Reshard table " + clusterManagerLogErr("*** Assertion failed: Reshard table " "!= number of slots"); result = 0; goto end_move; @@ -4629,23 +4673,148 @@ invalid_args: return 0; } -static int clusterManagerCommandCall(int argc, char **argv) { - int port = 0; - char *ip = NULL; - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - int i; - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else { - fprintf(stderr, - "Invalid arguments: first agrumnt must be host:port.\n"); - return 0; +static int clusterManagerCommandImport(int argc, char **argv) { + int success = 1; + int port = 0, src_port = 0; + char *ip = NULL, *src_ip = NULL; + char *invalid_args_msg = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) { + invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG; + goto invalid_args; } + if (config.cluster_manager_command.from == NULL) { + invalid_args_msg = "[ERR] Option '--cluster-from' is required for " + "subcommand 'import'.\n"; + goto invalid_args; + } + char *src_host[] = {config.cluster_manager_command.from}; + if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) { + invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to " + "pass a valid address (ie. 120.0.0.1:7000).\n"; + goto invalid_args; + } + clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n", + src_ip, src_port, ip, port); + + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + char *reply_err = NULL; + redisReply *src_reply = NULL; + // Connect to the source node. + redisContext *src_ctx = redisConnect(src_ip, src_port); + if (src_ctx->err) { + success = 0; + fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip, + src_port, src_ctx->errstr); + goto cleanup; + } + src_reply = reconnectingRedisCommand(src_ctx, "INFO"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + if (getLongInfoField(src_reply->str, "cluster_enabled")) { + clusterManagerLogErr("[ERR] The source node should not be a " + "cluster node.\n"); + success = 0; + goto cleanup; + } + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + int size = src_reply->integer, i; + clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size); + + // Build a slot -> node map + clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS]; + memset(slots_map, 0, sizeof(slots_map) / sizeof(clusterManagerNode *)); + listIter li; + listNode *ln; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots_count == 0) continue; + if (n->slots[i]) { + slots_map[i] = n; + break; + } + } + } + + char cmdfmt[50] = "MIGRATE %s %d %s %d %d"; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY) + strcat(cmdfmt, " %s"); + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE) + strcat(cmdfmt, " %s"); + + /* Use SCAN to iterate over the keys, migrating to the + * right node as needed. */ + int cursor = -999, timeout = config.cluster_manager_command.timeout; + while (cursor != 0) { + if (cursor < 0) cursor = 0; + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d", + cursor, 1000); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + assert(src_reply->type == REDIS_REPLY_ARRAY); + assert(src_reply->elements >= 2); + assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY); + if (src_reply->element[0]->type == REDIS_REPLY_STRING) + cursor = atoi(src_reply->element[0]->str); + else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER) + cursor = src_reply->element[0]->integer; + int keycount = src_reply->element[1]->elements; + for (i = 0; i < keycount; i++) { + redisReply *kr = src_reply->element[1]->element[i]; + assert(kr->type == REDIS_REPLY_STRING); + char *key = kr->str; + uint16_t slot = keyHashSlot(key, kr->len); + clusterManagerNode *target = slots_map[slot]; + printf("Migrating %s to %s:%d: ", key, target->ip, target->port); + redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, + target->ip, target->port, + key, 0, timeout, + "COPY", "REPLACE"); + if (!r || r->type == REDIS_REPLY_ERROR) { + if (r && r->str) { + clusterManagerLogErr("Source %s:%d replied with " + "error:\n%s\n", src_ip, src_port, + r->str); + } + success = 0; + } + freeReplyObject(r); + if (!success) goto cleanup; + clusterManagerLogOk("OK\n"); + } + } +cleanup: + if (reply_err) + clusterManagerLogErr("Source %s:%d replied with error:\n%s\n", + src_ip, src_port, reply_err); + if (src_ctx) redisFree(src_ctx); + if (src_reply) freeReplyObject(src_reply); + return success; +invalid_args: + fprintf(stderr, "%s", invalid_args_msg); + return 0; +} + +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0, i; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; @@ -4677,6 +4846,9 @@ static int clusterManagerCommandCall(int argc, char **argv) { } zfree(argvlen); return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; } static int clusterManagerCommandHelp(int argc, char **argv) { From 81ab5a3b280886ed52289de2ac0984cc62ce5538 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:53:24 +0200 Subject: [PATCH 23/66] Cluster Manager: added clusterManagerCheckCluster to import command --- src/redis-cli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 96bde356..34072b74 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4698,6 +4698,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; char *reply_err = NULL; redisReply *src_reply = NULL; // Connect to the source node. From 615aefe6ba3e8a1878ce6ff5dd5c4d6909515431 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 17:08:53 +0200 Subject: [PATCH 24/66] Cluster Manager: add-node command. --- src/redis-cli.c | 168 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 154 insertions(+), 14 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 34072b74..c0d80801 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -165,6 +165,7 @@ typedef struct clusterManagerCommand { char *from; char *to; char **weight; + char *master_id; int weight_argc; int slots; int timeout; @@ -1299,6 +1300,8 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) { + config.cluster_manager_command.master_id = argv[++i]; } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { @@ -1335,6 +1338,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-copy")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_COPY; + } else if (!strcmp(argv[i],"--cluster-slave")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SLAVE; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1847,6 +1853,8 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name); static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); @@ -1875,6 +1883,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, /* Cluster Manager commands. */ static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandAddNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1895,6 +1904,8 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3030,8 +3041,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { - char *msg = (e ? e : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); + clusterManagerPrintNotClusterNodeError(node, e); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -3313,6 +3323,27 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, return node; } +/* This function returns the master that has the least number of replicas + * in the cluster. If there are multiple masters with the same smaller + * number of replicas, one at random is returned. */ + +static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { + clusterManagerNode *node = NULL; + int lowest_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (node == NULL || n->replicas_count < lowest_count) { + node = n; + lowest_count = n->replicas_count; + } + } + return node; +} + static int clusterManagerFixSlotsCoverage(char *all_slots) { int i, fixed = 0; list *none = NULL, *single = NULL, *multi = NULL; @@ -3966,6 +3997,26 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, array->nodes[array->count++] = node; } +static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node, + char *err) +{ + char *msg; + if (err) msg = err; + else { + msg = "is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err) +{ + char *msg = (err ? err : "is not configured as a cluster node."); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + /* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; @@ -4008,8 +4059,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { - char *msg = (err ? err : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotClusterNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4025,14 +4075,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } err = NULL; if (!clusterManagerNodeIsEmpty(node, &err)) { - char *msg; - if (err) msg = err; - else { - msg = "is not empty. Either the node already knows other " - "nodes (check with CLUSTER NODES) or contains some " - "key in database 0."; - } - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotEmptyNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4263,6 +4306,104 @@ cleanup: return success; } +static int clusterManagerCommandAddNode(int argc, char **argv) { + int success = 1; + redisReply *reply = NULL; + char *ref_ip = NULL, *ip = NULL; + int ref_port = 0, port = 0; + if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port)) + goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) + goto invalid_args; + clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port, + ref_ip, ref_port); + // Check the existing cluster + clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; + + /* If --cluster-master-id was specified, try to resolve it now so that we + * abort before starting with the node configuration. */ + clusterManagerNode *master_node = NULL; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) { + char *master_id = config.cluster_manager_command.master_id; + if (master_id != NULL) { + master_node = clusterManagerNodeByName(master_id); + if (master_node == NULL) { + clusterManagerLogErr("[ERR] No such master ID %s\n", master_id); + return 0; + } + } else { + master_node = clusterManagerNodeWithLeastReplicas(); + assert(master_node != NULL); + printf("Automatically selected master %s:%d\n", master_node->ip, + master_node->port); + } + } + + // Add the new node + clusterManagerNode *new_node = clusterManagerNewNode(ip, port); + int added = 0; + CLUSTER_MANAGER_NODE_CONNECT(new_node); + if (new_node->context->err) { + clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n", + ip, port); + success = 0; + goto cleanup; + } + char *err = NULL; + if (!(success = clusterManagerNodeIsCluster(new_node, &err))) { + clusterManagerPrintNotClusterNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err); + zfree(err); + } + success = 0; + goto cleanup; + } + if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) { + clusterManagerPrintNotEmptyNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + clusterManagerNode *first = listFirst(cluster_manager.nodes)->value; + listAddNodeTail(cluster_manager.nodes, new_node); + added = 1; + + // Send CLUSTER MEET command to the new node + clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it " + "join the cluster.\n", ip, port); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d", + first->ip, first->port); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + + /* Additional configuration is needed if the node is added as a slave. */ + if (master_node) { + sleep(1); + clusterManagerWaitForClusterJoin(); + clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", + master_node->ip, master_node->port); + freeReplyObject(reply); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", + master_node->name); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + } + clusterManagerLogOk("[OK] New node added correctly.\n"); +cleanup: + if (!added && new_node) freeClusterManagerNode(new_node); + if (reply) freeReplyObject(reply); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -4531,8 +4672,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { nodes_involved++; listAddNodeTail(involved, n); } - weightedNodes = zmalloc(nodes_involved * - sizeof(clusterManagerNode *)); + weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *)); if (weightedNodes == NULL) goto cleanup; /* Check cluster, only proceed if it looks sane. */ clusterManagerCheckCluster(1); From 551f8f05911918466de45f275f6c2f387a73893f Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 18:22:44 +0200 Subject: [PATCH 25/66] - Cluster Manager: del-node command. - Cluster Manager: fixed bug in clusterManagerNodeWithLeastReplicas --- src/redis-cli.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c0d80801..daad385d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1884,6 +1884,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandAddNode(int argc, char **argv); +static int clusterManagerCommandDeleteNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1906,6 +1907,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { "replicas "}, {"add-node", clusterManagerCommandAddNode, 2, "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3335,7 +3337,7 @@ static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; - if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; if (node == NULL || n->replicas_count < lowest_count) { node = n; lowest_count = n->replicas_count; @@ -4404,6 +4406,73 @@ invalid_args: return 0; } +static int clusterManagerCommandDeleteNode(int argc, char **argv) { + UNUSED(argc); + int success = 1; + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + char *node_id = argv[1]; + clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n", + node_id, ip, port); + clusterManagerNode *ref_node = clusterManagerNewNode(ip, port); + clusterManagerNode *node = NULL; + + // Load cluster information + if (!clusterManagerLoadInfoFromNode(ref_node, 0)) return 0; + + // Check if the node exists and is not empty + node = clusterManagerNodeByName(node_id); + if (node == NULL) { + clusterManagerLogErr("[ERR] No such node ID %s\n", node_id); + return 0; + } + if (node->slots_count != 0) { + clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data " + "away and try again.\n", node->ip, node->port); + return 0; + } + + // Send CLUSTER FORGET to all the nodes but the node to remove + clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the " + "cluster...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == node) continue; + if (n->replicate && !strcasecmp(n->replicate, node_id)) { + // Reconfigure the slave to replicate with some other node + clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); + //TODO: check whether master could be the same as node + assert(master != NULL); + clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", + n->ip, n->port, master->ip, master->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", + master->name); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s", + node_id); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + + // Finally shutdown the node + clusterManagerLogInfo(">>> SHUTDOWN the node.\n"); + redisReply *r = redisCommand(node->context, "SHUTDOWN"); + success = clusterManagerCheckRedisReply(node, r, NULL); + if (r) freeReplyObject(r); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -5026,6 +5095,9 @@ static int clusterManagerCommandHelp(int argc, char **argv) { } } } + fprintf(stderr, "\nFor check, fix, reshard, del-node, set-timeout you " + "can specify the host and port of any working node in " + "the cluster.\n\n"); return 0; } From 07bd371087242765c47142fb379e72bd88475f3a Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 13 Apr 2018 16:09:22 +0200 Subject: [PATCH 26/66] Cluster Manager: set-timeout command --- src/redis-cli.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index daad385d..e7600b91 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1890,6 +1890,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandSetTimeout(int argc, char **argv); static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1905,21 +1906,23 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, - {"add-node", clusterManagerCommandAddNode, 2, - "new_host:new_port existing_host:existing_port", "slave,master-id "}, - {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, - {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, - {"import", clusterManagerCommandImport, 1, "host:port", - "from ,copy,replace"}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, + {"set-timeout", clusterManagerCommandSetTimeout, 2, + "host:port milliseconds", NULL}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -4882,6 +4885,61 @@ invalid_args: return 0; } +static int clusterManagerCommandSetTimeout(int argc, char **argv) { + UNUSED(argc); + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + int timeout = atoi(argv[1]); + if (timeout < 100) { + fprintf(stderr, "Setting a node timeout of less than 100 " + "milliseconds is a bad idea.\n"); + return 0; + } + // Load cluster information + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int ok_count = 0, err_count = 0; + + clusterManagerLogInfo(">>> Reconfiguring node timeout in every " + "cluster node...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + char *err = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d", + "SET", + "cluster-node-timeout", + timeout); + if (reply == NULL) goto reply_err; + int ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE"); + if (reply == NULL) goto reply_err; + ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip, + n->port); + ok_count++; + continue; +reply_err: + if (err == NULL) err = ""; + clusterManagerLogErr("ERR setting node-timeot for %s:%d: %s\n", n->ip, + n->port, err); + err_count++; + } + clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n", + ok_count, err_count); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandImport(int argc, char **argv) { int success = 1; int port = 0, src_port = 0; From 3fce4301ec54e173c811a8e08c320d9ef86c4de2 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 19 Apr 2018 18:52:01 +0200 Subject: [PATCH 27/66] Cluster Manager: code improvements and more comments added. --- src/redis-cli.c | 66 +++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index e7600b91..c0283b28 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -68,7 +68,7 @@ #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" -#define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 @@ -172,6 +172,7 @@ typedef struct clusterManagerCommand { int pipeline; float threshold; } clusterManagerCommand; + static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1788,7 +1789,7 @@ static int evalMode(int argc, char **argv) { /* The Cluster Manager global structure */ static struct clusterManager { - list *nodes; /* List of nodes int he configuration. */ + list *nodes; /* List of nodes in the configuration. */ list *errors; } cluster_manager; @@ -1821,7 +1822,7 @@ typedef struct clusterManagerNode { int balance; /* Used by rebalance */ } clusterManagerNode; -/* Data structure used to represent a sequence of nodes. */ +/* Data structure used to represent a sequence of cluster nodes. */ typedef struct clusterManagerNodeArray { clusterManagerNode **nodes; /* Actual nodes array */ clusterManagerNode **alloc; /* Pointer to the allocated memory */ @@ -1829,7 +1830,7 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; -/* Used for reshard table. */ +/* Used for the reshard table. */ typedef struct clusterManagerReshardTableItem { clusterManagerNode *source; int slot; @@ -1865,7 +1866,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); +static void clusterManagerShowClusterInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static int clusterManagerCheckCluster(int quiet); @@ -2067,8 +2068,9 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } + /* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the - * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * latest case, if the 'err' arg is not NULL, it gets allocated with a copy * of reply error (it's up to the caller function to free it), elsewhere * the error is directly printed. */ static int clusterManagerCheckRedisReply(clusterManagerNode *n, @@ -2100,7 +2102,7 @@ static void clusterManagerRemoveNodeFromList(list *nodelist, } } -/* Return the node with the specified ID or NULL. */ +/* Return the node with the specified name (ID) or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -2121,7 +2123,7 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -/* Like get_node_by_name but the specified name can be just the first +/* Like clusterManagerNodeByName but the specified name can be just the first * part of the node ID as long as the prefix in unique across the * cluster. */ @@ -2152,6 +2154,7 @@ static void clusterManagerNodeResetSlots(clusterManagerNode *node) { node->slots_count = 0; } +/* Call "INFO" redis command on the specified node and return the reply. */ static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, char **err) { @@ -2181,7 +2184,7 @@ static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { redisReply *info = clusterManagerGetNodeRedisInfo(node, err); - int is_err = 0, is_empty = 1; + int is_empty = 1; if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; @@ -2190,11 +2193,7 @@ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); if (err != NULL) *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } + if (!clusterManagerCheckRedisReply(node, info, err)) { is_empty = 0; goto result; } @@ -2422,7 +2421,7 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { * However if the key contains the {...} pattern, only the part between * { and } is hashed. This may be useful in the future to force certain * keys to be in the same node (assuming no resharding is in progress). */ -static unsigned int keyHashSlot(char *key, int keylen) { +static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) { int s, e; /* start-end indexes of { and } */ for (s = 0; s < keylen; s++) @@ -2443,6 +2442,7 @@ static unsigned int keyHashSlot(char *key, int keylen) { return crc16(key+s+1,e-s-1) & 0x3FFF; } +/* Return a string representation of the cluster node. */ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -2484,7 +2484,7 @@ static void clusterManagerShowNodes(void) { } } -static void clusterManagerShowInfo(void) { +static void clusterManagerShowClusterInfo(void) { int masters = 0; int keys = 0; listIter li; @@ -2533,11 +2533,12 @@ static void clusterManagerShowInfo(void) { printf("%.2f keys per slot on average.\n", keys_per_slot); } +/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0, success = 1; + int success = 1; /* First two args are used for the command itself. */ int argc = node->slots_count + 2; sds *argv = zmalloc(argc * sizeof(*argv)); @@ -2566,14 +2567,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } reply = (redisReply*) _reply; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } - success = 0; - goto cleanup; - } + success = clusterManagerCheckRedisReply(node, reply, err); cleanup: zfree(argvlen); if (argv != NULL) { @@ -2821,7 +2815,7 @@ static int clusterManagerMoveSlot(clusterManagerNode *source, } /* Flush the dirty node configuration by calling replicate for slaves or - * adding the slots for masters. */ + * adding the slots defined in the masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; @@ -2852,6 +2846,7 @@ cleanup: return success; } +/* Wait until the cluster configuration is consistent. */ static void clusterManagerWaitForClusterJoin(void) { printf("Waiting for the cluster to join\n"); while(!clusterManagerIsConfigConsistent()) { @@ -2871,13 +2866,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0, success = 1; + int success = 1; *err = NULL; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } + if (!clusterManagerCheckRedisReply(node, reply, err)) { success = 0; goto cleanup; } @@ -3114,6 +3105,7 @@ invalid_friend: return 1; } +/* Compare functions used by various sorting operations. */ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { const char **i1 = (const char **)slot1; const char **i2 = (const char **)slot2; @@ -3252,6 +3244,7 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +/* Add the error string to cluster_manager.errors and print it. */ static void clusterManagerOnError(sds err) { if (cluster_manager.errors == NULL) cluster_manager.errors = listCreate(); @@ -3259,6 +3252,9 @@ static void clusterManagerOnError(sds err) { clusterManagerLogErr("%s\n", (char *) err); } +/* Check the slots coverage of the cluster. The 'all_slots' argument must be + * and array of 16384 bytes. Every covered slot will be set to 1 in the + * 'all_slots' array. The function returns the total number if covered slots.*/ static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -4482,7 +4478,7 @@ static int clusterManagerCommandInfo(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return 1; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -4495,7 +4491,7 @@ static int clusterManagerCommandCheck(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5047,7 +5043,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { redisReply *kr = src_reply->element[1]->element[i]; assert(kr->type == REDIS_REPLY_STRING); char *key = kr->str; - uint16_t slot = keyHashSlot(key, kr->len); + uint16_t slot = clusterManagerKeyHashSlot(key, kr->len); clusterManagerNode *target = slots_map[slot]; printf("Migrating %s to %s:%d: ", key, target->ip, target->port); redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, From cc29fd3e28388823a6befc7ddd96d0e2d67161bb Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 18:08:30 +0200 Subject: [PATCH 28/66] Cluster Manager: fixed bug when parsing CLUSTER NODES reply (clusterManagerNodeLoadInfo) --- src/redis-cli.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c0283b28..b55cf93e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2922,6 +2922,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, line = p + 1; remaining--; } else line = p; + char *dash = NULL; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating @@ -2953,7 +2954,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, node->importing[node->importing_count - 1] = src; } - } else if ((p = strchr(slotsdef, '-')) != NULL) { + } else if ((dash = strchr(slotsdef, '-')) != NULL) { + p = dash; int start, stop; *p = '\0'; start = atoi(slotsdef); @@ -5078,7 +5080,7 @@ invalid_args: static int clusterManagerCommandCall(int argc, char **argv) { int port = 0, i; char *ip = NULL; - if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; From 57d895104c48ef43d8bb7bac7a4e859c5a90f192 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:25:08 +0200 Subject: [PATCH 29/66] Cluster Manager: fixed expected slots calculation (rebalance) Cluster Manager: fixed argument parsing after --cluster-weight --- src/redis-cli.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b55cf93e..36531f88 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1318,6 +1318,7 @@ static int parseOptions(int argc, char **argv) { if (wargc > 0) { config.cluster_manager_command.weight = weight; config.cluster_manager_command.weight_argc = wargc; + i += wargc; } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); @@ -4724,7 +4725,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { int nodes_involved = 0; int use_empty = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; - involved = listCreate(); listIter li; listNode *ln; @@ -4762,15 +4762,15 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; weightedNodes[i++] = n; - int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * - (int) n->weight); + int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + n->weight); n->balance = n->slots_count - expected; total_balance += n->balance; /* Compute the percentage of difference between the * expected number of slots and the real one, to see * if it's over the threshold specified by the user. */ int over_threshold = 0; - if (config.cluster_manager_command.threshold > 0) { + if (threshold > 0) { if (n->slots_count > 0) { float err_perc = fabs((100-(100.0*expected/n->slots_count))); if (err_perc > threshold) over_threshold = 1; @@ -4784,7 +4784,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); - result = 0; goto cleanup; } /* Because of rounding, it is possible that the balance of all nodes From 04eac76bae8d5fb4699e14ebccb91f0d45c41e2d Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:29:42 +0200 Subject: [PATCH 30/66] Cluster tests now using redis-cli instead of redis-trib --- tests/cluster/tests/04-resharding.tcl | 10 +++++----- tests/cluster/tests/12-replica-migration-2.tcl | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl index 0ccbf717..68fba135 100644 --- a/tests/cluster/tests/04-resharding.tcl +++ b/tests/cluster/tests/04-resharding.tcl @@ -73,12 +73,12 @@ test "Cluster consistency during live resharding" { flush stdout set target [dict get [get_myself [randomInt 5]] id] set tribpid [lindex [exec \ - ../../../src/redis-trib.rb reshard \ - --from all \ - --to $target \ - --slots 100 \ - --yes \ + ../../../src/redis-cli --cluster reshard \ 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-from all \ + --cluster-to $target \ + --cluster-slots 100 \ + --cluster-yes \ | [info nameofexecutable] \ ../tests/helpers/onlydots.tcl \ &] 0] diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl index 48ecd1d5..3d8b7b04 100644 --- a/tests/cluster/tests/12-replica-migration-2.tcl +++ b/tests/cluster/tests/12-replica-migration-2.tcl @@ -31,9 +31,9 @@ test "Each master should have at least two replicas attached" { set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=0 \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=0 >@ stdout ] } test "Master #0 should lose its replicas" { @@ -49,10 +49,10 @@ test "Resharding back some slot to master #0" { # new resharding. after 10000 set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=.01 \ - --use-empty-masters \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=.01 \ + --cluster-use-empty-masters >@ stdout] } test "Master #0 should re-acquire one or more replicas" { From ad911a338a00286f8a5cec445bfa1f5ab05cd527 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 12 Jan 2018 11:06:24 +0100 Subject: [PATCH 31/66] Cluster Manager mode --- src/redis-cli.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 108 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index d80973e7..92467a6b 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -65,6 +65,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -77,6 +78,16 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; +/* Cluster Manager command info */ +struct clusterManagerCommand { + char *name; + int argc; + char **argv; + int flags; + int replicas; +}; + + static redisContext *context; static struct config { char *hostip; @@ -119,8 +130,29 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + struct clusterManagerCommand cluster_manager_command; } config; +/* Cluster Manager commands. */ +typedef int clusterManagerCommandProc(int argc, char **argv); +static struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; +}; + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("CLUSTER: create\n"); + printf("Arguments: %d\n", argc); + printf("Replicas: %d\n", config.cluster_manager_command.replicas); + fprintf(stderr, "Not implemented yet!\n"); + return 0; +} + +struct clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2} +}; + /* User preferences. */ static struct pref { int hints; @@ -1061,6 +1093,13 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + struct clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; +} + static int parseOptions(int argc, char **argv) { int i; @@ -1146,6 +1185,18 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--cluster") && !lastarg) { + if (CLUSTER_MANAGER_MODE()) usage(); + char *cmd = argv[++i]; + int j = i; + for (; j < argc; j++) if (argv[j][0] == '-') break; + j--; + createClusterManagerCommand(cmd, j - i, argv + i); + i = j; + } else if (!strcmp(argv[i],"--cluster") && lastarg) { + usage(); + } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { + config.cluster_manager_command.replicas = atoi(argv[++i]); } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1243,9 +1294,13 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" +" --cluster [args...]\n" +" Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" +"Cluster Manager Commands:\n" +"\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" " redis-cli get mypasswd\n" @@ -1569,6 +1624,43 @@ static int evalMode(int argc, char **argv) { return retval; } +/*------------------------------------------------------------------------------ + * Cluster Manager mode + *--------------------------------------------------------------------------- */ + +static clusterManagerCommandProc *validateClusterManagerCommand(void) { + int i, commands_count = sizeof(clusterManagerCommands) / + sizeof(struct clusterManagerCommandDef); + clusterManagerCommandProc *proc = NULL; + char *cmdname = config.cluster_manager_command.name; + int argc = config.cluster_manager_command.argc; + for (i = 0; i < commands_count; i++) { + struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + if (!strcmp(cmddef.name, cmdname)) { + if ((cmddef.arity > 0 && argc != cmddef.arity) || + (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { + fprintf(stderr, "[ERR] Wrong number of arguments for " + "specified --cluster sub command\n"); + return NULL; + } + proc = cmddef.proc; + } + } + if (!proc) fprintf(stderr, "Unknown --cluster subcommand\n"); + return proc; +} + +static void clusterManagerMode(clusterManagerCommandProc *proc) { + int argc = config.cluster_manager_command.argc; + char **argv = config.cluster_manager_command.argv; + if (!proc(argc, argv)) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + exit(0); +} + /*------------------------------------------------------------------------------ * Latency and latency history modes *--------------------------------------------------------------------------- */ @@ -2862,7 +2954,11 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; - + config.cluster_manager_command.name = NULL; + config.cluster_manager_command.argc = 0; + config.cluster_manager_command.argv = NULL; + config.cluster_manager_command.flags = 0; + config.cluster_manager_command.replicas = 0; pref.hints = 1; spectrum_palette = spectrum_palette_color; @@ -2878,6 +2974,17 @@ int main(int argc, char **argv) { argc -= firstarg; argv += firstarg; + /* Cluster Manager mode */ + if (CLUSTER_MANAGER_MODE()) { + clusterManagerCommandProc *proc = validateClusterManagerCommand(); + if (!proc) { + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); + } + clusterManagerMode(proc); + } + /* Latency mode */ if (config.latency_mode) { if (cliConnect(0) == REDIS_ERR) exit(1); From 486c7af7b8c75d76df4cf9ea2571d4e5a6e000c3 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 16:26:21 +0100 Subject: [PATCH 32/66] Cluster Manager: 'create', 'info' and 'check' commands --- src/Makefile | 2 +- src/redis-cli.c | 1297 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 1272 insertions(+), 27 deletions(-) diff --git a/src/Makefile b/src/Makefile index 3f6ac454..14112aa1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 92467a6b..9943d575 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -41,13 +41,15 @@ #include #include #include -#include +#include #include #include #include #include #include /* use sds.h from hiredis, so that only one set of sds functions will be present in the binary */ +#include "dict.h" +#include "adlist.h" #include "zmalloc.h" #include "linenoise.h" #include "help.h" @@ -65,7 +67,64 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) +#define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_COMMAND(n,...) \ + (reconnectingRedisCommand(n->context, __VA_ARGS__)) +#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) + +#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ + memset(n->slots, 0, sizeof(n->slots)); \ + n->slots_count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ + array->alloc = array->nodes; \ + array->len = alloc_len; \ + array->count = 0; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ + if (array->nodes > array->alloc) { \ + array->len = array->nodes - array->alloc; \ + array->nodes = array->alloc; \ + array->count = 0; \ + int i = 0; \ + for(; i < array->len; i++) { \ + if (array->nodes[i] != NULL) array->count++;\ + } \ + } \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) + +#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ + assert(array->nodes < (array->nodes + array->len)); \ + if (*array->nodes != NULL) array->count--; \ + nodeptr = *array->nodes; \ + array->nodes++; \ + array->len--; \ +} while(0) + +#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ + assert(array->nodes < (array->nodes + array->len)); \ + assert(nodeptr != NULL); \ + array->nodes[array->count++] = nodeptr; \ +} while(0) + +#define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ + fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + +#define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 +#define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_FLAG_FRIEND 1 << 2 +#define CLUSTER_MANAGER_FLAG_NOADDR 1 << 3 +#define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 +#define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -79,13 +138,13 @@ int *spectrum_palette; int spectrum_palette_size; /* Cluster Manager command info */ -struct clusterManagerCommand { +typedef struct clusterManagerCommand { char *name; int argc; char **argv; int flags; int replicas; -}; +} clusterManagerCommand; static redisContext *context; @@ -130,28 +189,70 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; - struct clusterManagerCommand cluster_manager_command; + clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager commands. */ +/* Cluster Manager */ + +static struct clusterManager { + list *nodes; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; + int dirty; + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + list *friends; +} clusterManagerNode; + +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; + clusterManagerNode **alloc; + int len; + int count; +} clusterManagerNodeArray; + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); typedef int clusterManagerCommandProc(int argc, char **argv); -static struct clusterManagerCommandDef { +typedef struct clusterManagerCommandDef { char *name; clusterManagerCommandProc *proc; int arity; -}; + char *args; + char *options; +} clusterManagerCommandDef; +static int clusterManagerIsConfigConsistent(void); -static int clusterManagerCommandCreate(int argc, char **argv) { - printf("CLUSTER: create\n"); - printf("Arguments: %d\n", argc); - printf("Replicas: %d\n", config.cluster_manager_command.replicas); - fprintf(stderr, "Not implemented yet!\n"); - return 0; -} +/* Cluster Manager commands. */ -struct clusterManagerCommandDef clusterManagerCommands[] = { - {"create", clusterManagerCommandCreate, -2} -}; +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ static struct pref { @@ -165,6 +266,9 @@ char *redisGitSHA1(void); char *redisGitDirty(void); static int cliConnect(int force); +static char *getInfoField(char *info, char *field); +static long getLongInfoField(char *info, char *field); + /*------------------------------------------------------------------------------ * Utility functions *--------------------------------------------------------------------------- */ @@ -317,6 +421,36 @@ static void parseRedisUri(const char *uri) { config.dbnum = atoi(curr); } +static uint64_t dictSdsHash(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +static void dictSdsDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + sdsfree(val); +} + +/* _serverAssert is needed by dict */ +void _serverAssert(const char *estr, const char *file, int line) { + fprintf(stderr, "=== ASSERTION FAILED ==="); + fprintf(stderr, "==> %s:%d '%s' is not true",file,line,estr); + *((char*)-1) = 'x'; +} + /*------------------------------------------------------------------------------ * Help functions *--------------------------------------------------------------------------- */ @@ -1094,7 +1228,7 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. *--------------------------------------------------------------------------- */ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - struct clusterManagerCommand *cmd = &config.cluster_manager_command; + clusterManagerCommand *cmd = &config.cluster_manager_command; cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; @@ -1191,7 +1325,7 @@ static int parseOptions(int argc, char **argv) { int j = i; for (; j < argc; j++) if (argv[j][0] == '-') break; j--; - createClusterManagerCommand(cmd, j - i, argv + i); + createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { usage(); @@ -1300,6 +1434,7 @@ static void usage(void) { " --version Output version and exit.\n" "\n" "Cluster Manager Commands:\n" +" Use --cluster help to list all available cluster manager commands.\n" "\n" "Examples:\n" " cat /etc/passwd | redis-cli -x set mypasswd\n" @@ -1628,14 +1763,22 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +clusterManagerCommandDef clusterManagerCommands[] = { + {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", + "cluster-replicas"}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"help", clusterManagerCommandHelp, 0, NULL, NULL} +}; + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / - sizeof(struct clusterManagerCommandDef); + sizeof(clusterManagerCommandDef); clusterManagerCommandProc *proc = NULL; char *cmdname = config.cluster_manager_command.name; int argc = config.cluster_manager_command.argc; for (i = 0; i < commands_count; i++) { - struct clusterManagerCommandDef cmddef = clusterManagerCommands[i]; + clusterManagerCommandDef cmddef = clusterManagerCommands[i]; if (!strcmp(cmddef.name, cmdname)) { if ((cmddef.arity > 0 && argc != cmddef.arity) || (cmddef.arity < 0 && argc < (cmddef.arity * -1))) { @@ -1650,15 +1793,1117 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +static void freeClusterManagerNode(clusterManagerNode *node) { + if (node->context != NULL) redisFree(node->context); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *fn = ln->value; + freeClusterManagerNode(fn); + } + listRelease(node->friends); + node->friends = NULL; + } + if (node->name != NULL) sdsfree(node->name); + if (node->replicate != NULL) sdsfree(node->replicate); + if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) + sdsfree(node->ip); + zfree(node); +} + +static void freeClusterManager(void) { + if (cluster_manager.nodes != NULL) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes,&li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + freeClusterManagerNode(n); + } + listRelease(cluster_manager.nodes); + cluster_manager.nodes = NULL; + } +} + +static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { + clusterManagerNode *node = zmalloc(sizeof(*node)); + node->context = NULL; + node->name = NULL; + node->ip = ip; + node->port = port; + node->current_epoch = 0; + node->ping_sent = 0; + node->ping_recv = 0; + node->flags = 0; + node->replicate = NULL; + node->dirty = 0; + node->friends = NULL; + CLUSTER_MANAGER_RESET_SLOTS(node); + return node; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + freeReplyObject(info); + return 0; + } + int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); + freeReplyObject(info); + return is_cluster; +} + +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { + redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + int is_err = 0, is_empty = 1; + *err = NULL; + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + if (strstr(info->str, "db0:") != NULL) { + is_empty = 0; + goto result; + } + freeReplyObject(info); + info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((info->len + 1) * sizeof(char)); + strcpy(*err, info->str); + } + is_empty = 0; + goto result; + } + long known_nodes = getLongInfoField(info->str, "cluster_known_nodes"); + is_empty = (known_nodes == 1); +result: + freeReplyObject(info); + return is_empty; +} + +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_len, clusterManagerNode ***offending, int *offending_len) +{ + assert(offending != NULL); + int score = 0, i, j; + int node_len = cluster_manager.nodes->len; + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + clusterManagerNode **offending_p = *offending; + dictType dtype = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ + }; + for (i = 0; i < ip_len; i++) { + clusterManagerNodeArray *node_array = &(ipnodes[i]); + dict *related = dictCreate(&dtype, NULL); + char *ip = NULL; + for (j = 0; j < node_array->len; j++) { + clusterManagerNode *node = node_array->nodes[j]; + if (node == NULL) continue; + if (!ip) ip = node->ip; + sds types; + if (!node->replicate) { + assert(node->name != NULL); + dictEntry *entry = dictFind(related, node->name); + if (entry) types = (sds) dictGetVal(entry); + else types = sdsempty(); + types = sdscatprintf(types, "m%s", types); + dictReplace(related, node->name, types); + } else { + dictEntry *entry = dictFind(related, node->replicate); + if (entry) types = (sds) dictGetVal(entry); + else { + types = sdsempty(); + dictAdd(related, node->replicate, types); + } + sdscat(types, "s"); + } + } + dictIterator *iter = dictGetIterator(related); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds types = (sds) dictGetVal(entry); + sds name = (sds) dictGetKey(entry); + int typeslen = sdslen(types); + if (typeslen < 2) continue; + if (types[0] == 'm') score += (10000 * (typeslen - 1)); + else score += (1 * typeslen); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate == NULL) continue; + if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { + *(offending_p++) = n; + break; + } + } + } + if (offending_len != NULL) *offending_len = offending_p - *offending; + dictReleaseIterator(iter); + dictRelease(related); + } + return score; +} + +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_len) +{ + clusterManagerNode **offenders = NULL, **aux; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + if (score == 0) goto cleanup; + printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + int node_len = cluster_manager.nodes->len; + int maxiter = 500 * node_len; + srand(time(NULL)); + while (maxiter > 0) { + int offending_len = 0; + if (offenders != NULL) { + zfree(offenders); + offenders = NULL; + } + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + &offending_len); + if (score == 0) break; + int rand_idx = rand() % offending_len; + clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode **other_replicas = zcalloc((node_len - 1) * + sizeof(*other_replicas)); + int other_replicas_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n != first && n->replicate != NULL) + other_replicas[other_replicas_count++] = n; + } + if (other_replicas_count == 0) { + zfree(other_replicas); + break; + } + rand_idx = rand() % other_replicas_count; + second = other_replicas[rand_idx]; + char *first_master = first->replicate, + *second_master = second->replicate; + first->replicate = second_master, first->dirty = 1; + second->replicate = first_master, second->dirty = 1; + zfree(aux), aux = NULL; + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + &aux, NULL); + if (new_score > score) { + first->replicate = first_master; + second->replicate = second_master; + } + zfree(other_replicas); + maxiter--; + } + zfree(aux), aux = NULL; + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + char *msg; + if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + else if (score >= 10000) + msg = ("[WARNING] Some slaves are in the same host as their master"); + else + msg=("[WARNING] Some slaves of the same master are in the same host"); + printf("%s\n", msg); +cleanup: + zfree(offenders); + zfree(aux); +} + +static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { + sds slots = sdsempty(); + int first_range_idx = -1, last_slot_idx = -1, i; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int has_slot = node->slots[i]; + if (has_slot) { + if (first_range_idx == -1) { + if (sdslen(slots)) slots = sdscat(slots, ","); + first_range_idx = i; + slots = sdscatfmt(slots, "[%u", i); + } + last_slot_idx = i; + } else { + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) + slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + last_slot_idx = -1; + first_range_idx = -1; + } + } + if (last_slot_idx >= 0) { + if (first_range_idx == last_slot_idx) slots = sdscat(slots, "]"); + else slots = sdscatfmt(slots, "-%u]", last_slot_idx); + } + return slots; +} + +static sds clusterManagerNodeInfo(clusterManagerNode *node) { + sds info = sdsempty(); + int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); + char *role = (is_master ? "M" : "S"); + sds slots = NULL; + if (node->dirty && node->replicate != NULL) + info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); + else { + slots = clusterManagerNodeSlotsString(node); + info = sdscatfmt(info, "%s: %S %s:%u\n" + " slots:%S (%u slots) " + "", //TODO: flags string + role, node->name, node->ip, node->port, + slots, node->slots_count); + sdsfree(slots); + } + if (node->replicate != NULL) + info = sdscatfmt(info, "\n replicates %S", node->replicate); + //else if () {} //TODO: add replicas info + return info; +} + +static void clusterManagerShowNodes(void) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds info = clusterManagerNodeInfo(node); + printf("%s\n", info); + sdsfree(info); + } +} + +static void clusterManagerShowInfo(void) { + int masters = 0; + int keys = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!(node->flags & CLUSTER_MANAGER_FLAG_SLAVE)) { + if (!node->name) continue; + int replicas = 0; + int dbsize = -1; + char name[9]; + memcpy(name, node->name, 8); + name[8] = '\0'; + listIter ri; + listNode *rn; + listRewind(cluster_manager.nodes, &ri); + while ((rn = listNext(&ri)) != NULL) { + clusterManagerNode *n = rn->value; + if (n == node || !(n->flags & CLUSTER_MANAGER_FLAG_SLAVE)) + continue; + if (n->replicate && !strcmp(n->replicate, node->name)) + replicas++; + } + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "DBSIZE"); + if (reply != NULL || reply->type == REDIS_REPLY_INTEGER) + dbsize = reply->integer; + if (dbsize < 0) { + char *err = ""; + if (reply != NULL && reply->type == REDIS_REPLY_ERROR) + err = reply->str; + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + if (reply != NULL) freeReplyObject(reply); + return; + }; + if (reply != NULL) freeReplyObject(reply); + printf("%s:%d (%s...) -> %d keys | %d slots | %d slaves.\n", + node->ip, node->port, name, dbsize, + node->slots_count, replicas); + masters++; + keys += dbsize; + } + } + printf("[OK] %d keys in %d masters.\n", keys, masters); + float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; + printf("%.2f keys per slot on average.\n", keys_per_slot); +} + +static int clusterManagerAddSlots(clusterManagerNode *node, char**err) +{ + redisReply *reply = NULL; + void *_reply = NULL; + int is_err = 0; + int argc; + sds *argv = NULL; + size_t *argvlen = NULL; + *err = NULL; + sds cmd = sdsnew("CLUSTER ADDSLOTS "); + int i, added = 0; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (node->slots[i]) { + char *fmt = (!last_slot ? "%u " : "%u"); + cmd = sdscatfmt(cmd, fmt, i); + added++; + } + } + if (!added) goto node_cmd_err; + argv = cliSplitArgs(cmd, &argc); + if (argc == 0 || argv == NULL) goto node_cmd_err; + argvlen = zmalloc(argc*sizeof(size_t)); + for (i = 0; i < argc; i++) + argvlen[i] = sdslen(argv[i]); + redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); + if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + reply = (redisReply*) _reply; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + sdsfree(cmd); + zfree(argvlen); + sdsfreesplitres(argv,argc); + freeReplyObject(reply); + return 1; +node_cmd_err: + sdsfree(cmd); + zfree(argvlen); + if (argv != NULL) sdsfreesplitres(argv,argc); + if (reply != NULL) freeReplyObject(reply); + return 0; +} + +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { + if (!node->dirty) return 0; + redisReply *reply = NULL; + int is_err = 0; + *err = NULL; + if (node->replicate != NULL) { + reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", + node->replicate); + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + } else { + int added = clusterManagerAddSlots(node, err); + if (!added || *err != NULL) goto node_cmd_err; + } + node->dirty = 0; + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static void clusterManagerWaitForClusterJoin(void) { + printf("Waiting for the cluster to join\n"); + while(!clusterManagerIsConfigConsistent()) { + printf("."); + fflush(stdout); + sleep(1); + } + printf("\n"); +} + +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err) +{ + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + int is_err = 0; + *err = NULL; + if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { + if (is_err && err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + goto node_cmd_err; + } + int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); + char *lines = reply->str, *p, *line; + while ((p = strstr(lines, "\n")) != NULL) { + *p = '\0'; + line = lines; + lines = p + 1; + char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, + *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, + *link_status = NULL; + int i = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + switch(i++){ + case 0: name = token; break; + case 1: addr = token; break; + case 2: flags = token; break; + case 3: master_id = token; break; + case 4: ping_sent = token; break; + case 5: ping_recv = token; break; + case 6: config_epoch = token; break; + case 7: link_status = token; break; + } + if (i == 8) break; // Slots + } + if (!flags) goto node_cmd_err; + int myself = (strstr(flags, "myself") != NULL); + if (strstr(flags, "noaddr") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + node->flags |= CLUSTER_MANAGER_FLAG_FAIL; + clusterManagerNode *currentNode = NULL; + if (myself) { + node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; + currentNode = node; + CLUSTER_MANAGER_RESET_SLOTS(node); + if (i == 8) { + int remaining = strlen(line); + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + remaining -= (p - line); + + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] == '[') { + //TODO: migrating/importing + } else if ((p = strchr(slotsdef, '-')) != NULL) { + int start, stop; + *p = '\0'; + start = atoi(slotsdef); + stop = atoi(p + 1); + node->slots_count += (stop - (start - 1)); + while (start <= stop) node->slots[start++] = 1; + } else if (p > slotsdef) { + node->slots[atoi(slotsdef)] = 1; + node->slots_count++; + } + } + } + node->dirty = 0; + } else if (!getfriends) { + if (!(node->flags & CLUSTER_MANAGER_FLAG_MYSELF)) continue; + else break; + } else { + if (addr == NULL) { + // TODO: find a better err message + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); + goto node_cmd_err; + } + *c = '\0'; + int port = atoi(++c); + currentNode = clusterManagerNewNode(sdsnew(addr), port); + currentNode->flags |= CLUSTER_MANAGER_FLAG_FRIEND; + if (node->friends == NULL) node->friends = listCreate(); + listAddNodeTail(node->friends, currentNode); + } + if (name != NULL) currentNode->name = sdsnew(name); + if (strstr(flags, "slave") != NULL) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + } + if (config_epoch != NULL) + currentNode->current_epoch = atoll(config_epoch); + if (ping_sent != NULL) currentNode->ping_sent = atoll(ping_sent); + if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); + if (!getfriends && myself) break; + } + freeReplyObject(reply); + return 1; +node_cmd_err: + freeReplyObject(reply); + return 0; +} + +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { + if (node->context == NULL) + node->context = redisConnect(node->ip, node->port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, + node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; + char *e = NULL; + if (!clusterManagerNodeIsCluster(node, &e)) { + char *msg = (e ? e : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + if (e) zfree(e); + freeClusterManagerNode(node); + return 0; + } + e = NULL; + if (!clusterManagerNodeLoadInfo(node, opts, &e)) { + if (e) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, e); + zfree(e); + } + freeClusterManagerNode(node); + return 0; + } + cluster_manager.nodes = listCreate(); + listAddNodeTail(cluster_manager.nodes, node); + if (node->friends != NULL) { + listIter li; + listNode *ln; + listRewind(node->friends, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *friend = ln->value; + if (!friend->ip || !friend->port) continue; + if (!friend->context) + friend->context = redisConnect(friend->ip, friend->port); + if (friend->context->err) continue; + e = NULL; + if (clusterManagerNodeLoadInfo(friend, 0, &e)) { + if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | + CLUSTER_MANAGER_FLAG_DISCONNECT | + CLUSTER_MANAGER_FLAG_FAIL)) continue; + listAddNodeTail(cluster_manager.nodes, friend); + + } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + } + listRelease(node->friends); + node->friends = NULL; + } + return 1; +} + +int clusterManagerSlotCompare(const void *slot1, const void *slot2) { + const char **i1 = (const char **)slot1; + const char **i2 = (const char **)slot2; + return strcmp(*i1, *i2); +} + +static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { + sds signature = NULL; + int node_count = 0, i = 0, name_len = 0; + redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); + if (reply == NULL || reply->type == REDIS_REPLY_ERROR) + goto cleanup; + char *lines = reply->str, *p, *line; + char **node_configs = NULL; + while ((p = strstr(lines, "\n")) != NULL) { + i = 0; + *p = '\0'; + line = lines; + lines = p + 1; + char *nodename = NULL; + int tot_size = 0; + while ((p = strchr(line, ' ')) != NULL) { + *p = '\0'; + char *token = line; + line = p + 1; + if (i == 0) { + nodename = token; + tot_size = p - token; + name_len = tot_size; + } else if (i == 8) break; + i++; + } + if (i != 8) continue; + if (nodename == NULL) continue; + int remaining = strlen(line); + if (remaining == 0) continue; + char **slots = NULL; + int c = 0; + //TODO: just while(remaining) && assign p inside the block + while ((p = strchr(line, ' ')) != NULL || remaining) { + if (p == NULL) p = line + remaining; + int size = (p - line); + remaining -= size; + tot_size += size; + char *slotsdef = line; + *p = '\0'; + if (remaining) line = p + 1; + else line = p; + if (slotsdef[0] != '[') { + c++; + slots = zrealloc(slots, (c * sizeof(char *))); + slots[c - 1] = slotsdef; + } + } + if (c > 0) { + if (c > 1) + qsort(slots, c, sizeof(char *), clusterManagerSlotCompare); + node_count++; + node_configs = + zrealloc(node_configs, (node_count * sizeof(char *))); + tot_size += (sizeof(char) * (c - 1)); + char *cfg = zmalloc((sizeof(char) * tot_size) + 1); + memcpy(cfg, nodename, name_len); + char *sp = cfg + name_len; + *(sp++) = ':'; + for (i = 0; i < c; i++) { + if (i > 0) *(sp++) = '|'; + int slen = strlen(slots[i]); + memcpy(sp, slots[i], slen); + sp += slen; + } + *(sp++) = '\0'; + node_configs[node_count - 1] = cfg; + } + zfree(slots); + } + if (node_count > 0) { + if (node_count > 1) { + qsort(node_configs, node_count, sizeof(char *), + clusterManagerSlotCompare); + } + signature = sdsempty(); + for (i = 0; i < node_count; i++) { + if (i > 0) signature = sdscatprintf(signature, "%c", '|'); + signature = sdscatfmt(signature, "%s", node_configs[i]); + } + } +cleanup: + if (reply != NULL) freeReplyObject(reply); + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + return signature; +} + +static int clusterManagerIsConfigConsistent(void) { + if (cluster_manager.nodes == NULL) return 0; + int consistent = 0; + sds first_cfg = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + sds cfg = clusterManagerGetConfigSignature(node); + if (cfg == NULL) { + consistent = 0; + break; + } + if (first_cfg == NULL) first_cfg = cfg; + else { + consistent = !sdscmp(first_cfg, cfg); + sdsfree(cfg); + if (!consistent) break; + } + } + if (first_cfg != NULL) sdsfree(first_cfg); + return consistent; +} + +static void clusterManagerCheckCluster(int quiet) { + listNode *ln = listFirst(cluster_manager.nodes); + if (!ln) return; + clusterManagerNode *node = ln->value; + printf(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); + if (!quiet) clusterManagerShowNodes(); + if (!clusterManagerIsConfigConsistent()) + printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array + else + printf("[OK] All nodes agree about slots configuration.\n"); + //TODO:check_open_slots + //TODO:check_slots_coverage +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; - if (!proc(argc, argv)) { - sdsfree(config.hostip); - sdsfree(config.mb_delim); - exit(1); - } + cluster_manager.nodes = NULL; + if (!proc(argc, argv)) goto cluster_manager_err; + freeClusterManager(); exit(0); +cluster_manager_err: + freeClusterManager(); + sdsfree(config.hostip); + sdsfree(config.mb_delim); + exit(1); +} + +/* Cluster Manager Commands */ + +static int clusterManagerCommandCreate(int argc, char **argv) { + printf("Cluster Manager: Creating Cluster\n"); + int i, j; + cluster_manager.nodes = listCreate(); + for (i = 0; i < argc; i++) { + char *addr = argv[i]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c == NULL) { + fprintf(stderr, "Invalid address format: %s\n", addr); + return 0; + } + *c = '\0'; + char *ip = addr; + int port = atoi(++c); + clusterManagerNode *node = clusterManagerNewNode(ip, port); + node->context = redisConnect(ip, port); + if (node->context->err) { + fprintf(stderr,"Could not connect to Redis at "); + fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); + freeClusterManagerNode(node); + return 0; + } + char *err = NULL; + if (!clusterManagerNodeIsCluster(node, &err)) { + char *msg = (err ? err : "is not configured as a cluster node."); + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeLoadInfo(node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + freeClusterManagerNode(node); + return 0; + } + err = NULL; + if (!clusterManagerNodeIsEmpty(node, &err)) { + char *msg; + if (err) msg = err; + else { + msg = " is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + if (err) zfree(err); + freeClusterManagerNode(node); + return 0; + } + listAddNodeTail(cluster_manager.nodes, node); + } + int node_len = cluster_manager.nodes->len; + int replicas = config.cluster_manager_command.replicas; + int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); + if (masters_count < 3) { + fprintf(stderr, + "*** ERROR: Invalid configuration for cluster creation.\n"); + fprintf(stderr, + "*** Redis Cluster requires at least 3 master nodes.\n"); + fprintf(stderr, + "*** This is not possible with %d nodes and %d replicas per node.", + node_len, replicas); + fprintf(stderr, "\n*** At least %d nodes are required.\n", + (3 * (replicas + 1))); + return 0; + } + printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + int interleaved_len = 0, ips_len = 0; + clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); + char **ips = zcalloc(node_len * sizeof(char*)); + clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + int found = 0; + for (i = 0; i < ips_len; i++) { + char *ip = ips[i]; + if (!strcmp(ip, n->ip)) { + found = 1; + break; + } + } + if (!found) { + ips[ips_len++] = n->ip; + } + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->nodes == NULL) + CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); + CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + } + while (interleaved_len < node_len) { + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = &(ip_nodes[i]); + if (node_array->count > 0) { + clusterManagerNode *n; + CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + interleaved[interleaved_len++] = n; + } + } + } + clusterManagerNode **masters = interleaved; + interleaved += masters_count; + interleaved_len -= masters_count; + float slots_per_node = CLUSTER_MANAGER_SLOTS / (float) masters_count; + long first = 0; + float cursor = 0.0f; + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + long last = lround(cursor + slots_per_node - 1); + if (last > CLUSTER_MANAGER_SLOTS || i == (masters_count - 1)) + last = CLUSTER_MANAGER_SLOTS - 1; + if (last < first) last = first; + printf("Master[%d] -> Slots %lu - %lu\n", i, first, last); + master->slots_count = 0; + for (j = first; j <= last; j++) { + master->slots[j] = 1; + master->slots_count++; + } + master->dirty = 1; + first = last + 1; + cursor += slots_per_node; + } + + int assign_unused = 0, available_count = interleaved_len; +assign_replicas: + for (i = 0; i < masters_count; i++) { + clusterManagerNode *master = masters[i]; + int assigned_replicas = 0; + while (assigned_replicas < replicas) { + if (available_count == 0) break; + clusterManagerNode *found = NULL, *slave = NULL; + int firstNodeIdx = -1; + for (j = 0; j < interleaved_len; j++) { + clusterManagerNode *n = interleaved[j]; + if (n == NULL) continue; + if (strcmp(n->ip, master->ip)) { + found = n; + interleaved[j] = NULL; + break; + } + if (firstNodeIdx < 0) firstNodeIdx = j; + } + if (found) slave = found; + else if (firstNodeIdx >= 0) { + slave = interleaved[firstNodeIdx]; + interleaved_len -= (interleaved - (interleaved + firstNodeIdx)); + interleaved += (firstNodeIdx + 1); + } + if (slave != NULL) { + assigned_replicas++; + available_count--; + slave->replicate = sdsnew(master->name); + slave->dirty = 1; + } else break; + printf("Adding replica %s:%d to %s:%d\n", slave->ip, slave->port, + master->ip, master->port); + if (assign_unused) break; + } + } + if (!assign_unused && available_count > 0) { + assign_unused = 1; + printf("Adding extra replicas...\n"); + goto assign_replicas; + } + for (i = 0; i < ips_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + } + clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerShowNodes(); + printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread != 0 && !strcmp("yes", buf)) { + printf("\nFlushing configuration!\n"); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && node->dirty && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + printf(">>> Nodes configuration updated\n"); + printf(">>> Assign a different config epoch to each node\n"); + int config_epoch = 1; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, + "cluster set-config-epoch %d", + config_epoch++); + if (reply != NULL) freeReplyObject(reply); + } + printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerNode *first = NULL; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (first == NULL) { + first = node; + continue; + } + redisReply *reply = NULL; + reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", + first->ip, first->port); + if (reply != NULL) freeReplyObject(reply); + } + // Give one second for the join to start, in order to avoid that + // waiting for cluster join will find all the nodes agree about + // the config as they are still empty with unassigned slots. + sleep(1); + clusterManagerWaitForClusterJoin(); + // Useful for the replicas //TODO: create a function for this? + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!node->dirty) continue; + char *err = NULL; + int flushed = clusterManagerFlushNodeConfig(node, &err); + if (!flushed && !node->replicate) { + if (err != NULL) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); + zfree(err); + } + goto cmd_err; + } + } + // Reset Nodes + listRewind(cluster_manager.nodes, &li); + clusterManagerNode *first_node = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + if (!first_node) first_node = node; + else freeClusterManagerNode(node); + } + listEmpty(cluster_manager.nodes); + if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + clusterManagerCheckCluster(0); + } + /* Free everything */ + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 1; +cmd_err: + zfree(masters); + zfree(ips); + for (i = 0; i < node_len; i++) { + clusterManagerNodeArray *node_array = ip_nodes + i; + CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + } + zfree(ip_nodes); + return 0; +} + +static int clusterManagerCommandInfo(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandCheck(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else goto invalid_args; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerShowInfo(); + clusterManagerCheckCluster(0); + return 1; +invalid_args: + fprintf(stderr, "Invalid arguments: you need to pass either a valid " + "address (ie. 120.0.0.1:7000) or space separated IP " + "and port (ie. 120.0.0.1 7000)\n"); + return 0; +} + +static int clusterManagerCommandHelp(int argc, char **argv) { + UNUSED(argc); + UNUSED(argv); + int commands_count = sizeof(clusterManagerCommands) / + sizeof(clusterManagerCommandDef); + int i = 0, j; + fprintf(stderr, "Cluster Manager Commands:\n"); + for (; i < commands_count; i++) { + clusterManagerCommandDef *def = &(clusterManagerCommands[i]); + int namelen = strlen(def->name), padlen = 15 - namelen; + fprintf(stderr, " %s", def->name); + for (j = 0; j < padlen; j++) fprintf(stderr, " "); + fprintf(stderr, "%s\n", (def->args ? def->args : "")); + //TODO: if (def->options) + } + return 0; } /*------------------------------------------------------------------------------ From 8c7ad80f9f9c916620873bf8e52f18b51ce9208c Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 17:57:16 +0100 Subject: [PATCH 33/66] Added check for open slots (clusterManagerCheckCluster) --- src/redis-cli.c | 162 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 143 insertions(+), 19 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9943d575..b20cd31d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,6 +74,13 @@ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) +#define CLUSTER_MANAGER_ERROR(err) do { \ + if (cluster_manager.errors == NULL) \ + cluster_manager.errors = listCreate(); \ + listAddNodeTail(cluster_manager.errors, err); \ + fprintf(stderr, "%s\n", (char *) err); \ +} while(0) + #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ memset(n->slots, 0, sizeof(n->slots)); \ n->slots_count = 0; \ @@ -137,7 +144,14 @@ int spectrum_palette_mono[] = {0,233,234,235,237,239,241,243,245,247,249,251,253 int *spectrum_palette; int spectrum_palette_size; -/* Cluster Manager command info */ +/* Dict Helpers */ + +static uint64_t dictSdsHash(const void *key); +static int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2); +static void dictSdsDestructor(void *privdata, void *val); + +/* Cluster Manager Command Info */ typedef struct clusterManagerCommand { char *name; int argc; @@ -196,6 +210,7 @@ static struct config { static struct clusterManager { list *nodes; + list *errors; } cluster_manager; typedef struct clusterManagerNode { @@ -212,6 +227,10 @@ typedef struct clusterManagerNode { uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; } clusterManagerNode; typedef struct clusterManagerNodeArray { @@ -221,6 +240,15 @@ typedef struct clusterManagerNodeArray { int count; } clusterManagerNodeArray; +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1810,13 +1838,22 @@ static void freeClusterManagerNode(clusterManagerNode *node) { if (node->replicate != NULL) sdsfree(node->replicate); if ((node->flags & CLUSTER_MANAGER_FLAG_FRIEND) && node->ip) sdsfree(node->ip); + int i; + if (node->migrating != NULL) { + for (i = 0; i < node->migrating_count; i++) sdsfree(node->migrating[i]); + zfree(node->migrating); + } + if (node->importing != NULL) { + for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); + zfree(node->importing); + } zfree(node); } static void freeClusterManager(void) { + listIter li; + listNode *ln; if (cluster_manager.nodes != NULL) { - listIter li; - listNode *ln; listRewind(cluster_manager.nodes,&li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; @@ -1825,9 +1862,18 @@ static void freeClusterManager(void) { listRelease(cluster_manager.nodes); cluster_manager.nodes = NULL; } + if (cluster_manager.errors != NULL) { + listRewind(cluster_manager.errors,&li); + while ((ln = listNext(&li)) != NULL) { + sds err = ln->value; + sdsfree(err); + } + listRelease(cluster_manager.errors); + cluster_manager.errors = NULL; + } } -static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { +static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNode *node = zmalloc(sizeof(*node)); node->context = NULL; node->name = NULL; @@ -1840,6 +1886,10 @@ static clusterManagerNode *clusterManagerNewNode(char * ip, int port) { node->replicate = NULL; node->dirty = 0; node->friends = NULL; + node->migrating = NULL; + node->importing = NULL; + node->migrating_count = 0; + node->importing_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } @@ -1902,17 +1952,9 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int node_len = cluster_manager.nodes->len; *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); clusterManagerNode **offending_p = *offending; - dictType dtype = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ - }; for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); - dict *related = dictCreate(&dtype, NULL); + dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; for (j = 0; j < node_array->len; j++) { clusterManagerNode *node = node_array->nodes[j]; @@ -2291,7 +2333,32 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (remaining) line = p + 1; else line = p; if (slotsdef[0] == '[') { - //TODO: migrating/importing + slotsdef++; + if ((p = strstr(slotsdef, "->-"))) { // Migrating + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds dst = sdsnew(p); + node->migrating_count += 2; + node->migrating = zrealloc(node->migrating, + (node->migrating_count * sizeof(sds))); + node->migrating[node->migrating_count - 2] = + slot; + node->migrating[node->migrating_count - 1] = + dst; + } else if ((p = strstr(slotsdef, "-<-"))) {//Importing + *p = '\0'; + p += 3; + sds slot = sdsnew(slotsdef); + sds src = sdsnew(p); + node->importing_count += 2; + node->importing = zrealloc(node->importing, + (node->importing_count * sizeof(sds))); + node->importing[node->importing_count - 2] = + slot; + node->importing[node->importing_count - 1] = + src; + } } else if ((p = strchr(slotsdef, '-')) != NULL) { int start, stop; *p = '\0'; @@ -2529,11 +2596,68 @@ static void clusterManagerCheckCluster(int quiet) { printf(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); if (!quiet) clusterManagerShowNodes(); - if (!clusterManagerIsConfigConsistent()) - printf("[ERR] Nodes don't agree about configuration!\n"); //TODO: in redis-trib this error is added to @errors array - else - printf("[OK] All nodes agree about slots configuration.\n"); - //TODO:check_open_slots + if (!clusterManagerIsConfigConsistent()) { + sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); + CLUSTER_MANAGER_ERROR(err); + } else printf("[OK] All nodes agree about slots configuration.\n"); + // Check open slots + listIter li; + listRewind(cluster_manager.nodes, &li); + int i; + dict *open_slots = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->migrating != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "migrating state ", + n->ip, + n->port); + for (i = 0; i < n->migrating_count; i += 2) { + sds slot = n->migrating[i]; + dictAdd(open_slots, slot, n->migrating[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + if (n->importing != NULL) { + if (open_slots == NULL) + open_slots = dictCreate(&clusterManagerDictType, NULL); + sds errstr = sdsempty(); + errstr = sdscatprintf(errstr, + "[WARNING] Node %s:%d has slots in " + "importing state ", + n->ip, + n->port); + for (i = 0; i < n->importing_count; i += 2) { + sds slot = n->importing[i]; + dictAdd(open_slots, slot, n->importing[i + 1]); + char *fmt = (i > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + errstr = sdscat(errstr, "."); + CLUSTER_MANAGER_ERROR(errstr); + } + } + if (open_slots != NULL) { + dictIterator *iter = dictGetIterator(open_slots); + dictEntry *entry; + sds errstr = sdsnew("[WARNING] The following slots are open: "); + i = 0; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + char *fmt = (i++ > 0 ? ",%S" : "%S"); + errstr = sdscatfmt(errstr, fmt, slot); + } + fprintf(stderr, "%s.\n", (char *) errstr); + sdsfree(errstr); + dictRelease(open_slots); + } //TODO:check_slots_coverage } From b3e0ca3412edbed27f07e2cd5f62b1506c013029 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 31 Jan 2018 19:25:02 +0100 Subject: [PATCH 34/66] - Cluster Manager: fixed various memory leaks - Cluster Manager: fixed flags assignment in clusterManagerNodeLoadInfo --- src/redis-cli.c | 54 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index b20cd31d..a596afca 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2310,12 +2310,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (!flags) goto node_cmd_err; int myself = (strstr(flags, "myself") != NULL); - if (strstr(flags, "noaddr") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - node->flags |= CLUSTER_MANAGER_FLAG_FAIL; clusterManagerNode *currentNode = NULL; if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; @@ -2396,10 +2390,22 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (node->friends == NULL) node->friends = listCreate(); listAddNodeTail(node->friends, currentNode); } - if (name != NULL) currentNode->name = sdsnew(name); + if (name != NULL) { + if (currentNode->name) sdsfree(currentNode->name); + currentNode->name = sdsnew(name); + } + if (strstr(flags, "noaddr") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + if (strstr(flags, "disconnected") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + if (strstr(flags, "fail") != NULL) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; if (strstr(flags, "slave") != NULL) { currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) currentNode->replicate = sdsnew(master_id); + if (master_id != NULL) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -2442,27 +2448,39 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { freeClusterManagerNode(node); return 0; } + listIter li; + listNode *ln; + if (cluster_manager.nodes != NULL) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) + freeClusterManagerNode((clusterManagerNode *) ln->value); + listRelease(cluster_manager.nodes); + } cluster_manager.nodes = listCreate(); listAddNodeTail(cluster_manager.nodes, node); if (node->friends != NULL) { - listIter li; - listNode *ln; listRewind(node->friends, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *friend = ln->value; - if (!friend->ip || !friend->port) continue; + if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) friend->context = redisConnect(friend->ip, friend->port); - if (friend->context->err) continue; + if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { if (friend->flags & (CLUSTER_MANAGER_FLAG_NOADDR | CLUSTER_MANAGER_FLAG_DISCONNECT | - CLUSTER_MANAGER_FLAG_FAIL)) continue; + CLUSTER_MANAGER_FLAG_FAIL)) + goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); - - } else fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + } else { + fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", + friend->ip, friend->port); + goto invalid_friend; + } + continue; +invalid_friend: + freeClusterManagerNode(friend); } listRelease(node->friends); node->friends = NULL; @@ -2601,6 +2619,7 @@ static void clusterManagerCheckCluster(int quiet) { CLUSTER_MANAGER_ERROR(err); } else printf("[OK] All nodes agree about slots configuration.\n"); // Check open slots + printf(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2836,6 +2855,7 @@ assign_replicas: if (slave != NULL) { assigned_replicas++; available_count--; + if (slave->replicate) sdsfree(slave->replicate); slave->replicate = sdsnew(master->name); slave->dirty = 1; } else break; @@ -2873,7 +2893,7 @@ assign_replicas: zfree(err); } goto cmd_err; - } + } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); printf(">>> Assign a different config epoch to each node\n"); From 65d37960e78512e9d530ee8f1030f091ce784557 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 17:43:36 +0100 Subject: [PATCH 35/66] Cluster Manager: slots coverage check. --- src/redis-cli.c | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index a596afca..0dede2d9 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2607,6 +2607,24 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static int clusterManagerGetCoveredSlots(char *all_slots) { + if (cluster_manager.nodes == NULL) return 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + int totslots = 0, i; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + if (node->slots[i] && !all_slots[i]) { + all_slots[i] = 1; + totslots++; + } + } + } + return totslots; +} + static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; @@ -2677,7 +2695,19 @@ static void clusterManagerCheckCluster(int quiet) { sdsfree(errstr); dictRelease(open_slots); } - //TODO:check_slots_coverage + printf(">>> Check slots coverage...\n"); + char slots[CLUSTER_MANAGER_SLOTS]; + memset(slots, 0, CLUSTER_MANAGER_SLOTS); + int coverage = clusterManagerGetCoveredSlots(slots); + if (coverage == CLUSTER_MANAGER_SLOTS) + printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); + else { + sds err = sdsempty(); + err = sdscatprintf(err, "[ERR] Not all %d slots are " + "covered by nodes.\n", + CLUSTER_MANAGER_SLOTS); + CLUSTER_MANAGER_ERROR(err); + } } static void clusterManagerMode(clusterManagerCommandProc *proc) { From 4cc8de1a371696ea16485d18c082b4c6481bf5b5 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 1 Feb 2018 20:09:30 +0100 Subject: [PATCH 36/66] Cluster Manager: reply error catch for MEET command --- src/redis-cli.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 0dede2d9..83638616 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2949,7 +2949,16 @@ assign_replicas: redisReply *reply = NULL; reply = CLUSTER_MANAGER_COMMAND(node, "cluster meet %s %d", first->ip, first->port); - if (reply != NULL) freeReplyObject(reply); + int is_err = 0; + if (reply != NULL) { + if ((is_err = reply->type == REDIS_REPLY_ERROR)) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, reply->str); + freeReplyObject(reply); + } else { + is_err = 1; + fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); + } + if (is_err) goto cmd_err; } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about From c002b95d89b917641ddbdb502d29bafa919a2212 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 11:29:25 +0100 Subject: [PATCH 37/66] Cluster Manager: cluster is considered consistent if only one node has been found --- src/redis-cli.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 83638616..19c8fcdd 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2584,7 +2584,10 @@ cleanup: static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; - int consistent = 0; + int consistent = (listLength(cluster_manager.nodes) <= 1); + // If the Cluster has only one node, it's always consistent + // Does it make sense? + if (consistent) return 1; sds first_cfg = NULL; listIter li; listNode *ln; From 2f48d62423e7e19cb872472b1c229e47f1529cc7 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 7 Feb 2018 12:02:56 +0100 Subject: [PATCH 38/66] ClusterManager: added replicas count to clusterManagerNode --- src/redis-cli.c | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 19c8fcdd..791b0dd8 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -223,9 +223,11 @@ typedef struct clusterManagerNode { time_t ping_recv; int flags; sds replicate; + list replicas; int dirty; uint8_t slots[CLUSTER_MANAGER_SLOTS]; int slots_count; + int replicas_count; list *friends; sds *migrating; sds *importing; @@ -250,6 +252,7 @@ static dictType clusterManagerDictType = { }; static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); @@ -265,6 +268,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); + typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { char *name; @@ -1890,10 +1894,31 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->importing = NULL; node->migrating_count = 0; node->importing_count = 0; + node->replicas_count = 0; CLUSTER_MANAGER_RESET_SLOTS(node); return node; } +static clusterManagerNode *clusterManagerNodeByName(const char *name) { + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && !sdscmp(n->name, lcname)) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); int is_err = 0; @@ -2119,7 +2144,9 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { } if (node->replicate != NULL) info = sdscatfmt(info, "\n replicates %S", node->replicate); - //else if () {} //TODO: add replicas info + else if (node->replicas_count) + info = sdscatfmt(info, "\n %U additional replica(s)", + node->replicas_count); return info; } @@ -2485,6 +2512,18 @@ invalid_friend: listRelease(node->friends); node->friends = NULL; } + // Count replicas for each node + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->replicate != NULL) { + clusterManagerNode *master = clusterManagerNodeByName(n->replicate); + if (master == NULL) { + printf("*** WARNING: %s:%d claims to be slave of unknown " + "node ID %s.\n", n->ip, n->port, n->replicate); + } else master->replicas_count++; + } + } return 1; } From 1ad1f00163ecd4c794ce94479de1dc3084187b6e Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 9 Feb 2018 13:02:37 +0100 Subject: [PATCH 39/66] Cluster Manager: CLUSTER_MANAGER_NODE_CONNECT macro --- src/redis-cli.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 791b0dd8..4ce3a12d 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -70,6 +70,8 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) +#define CLUSTER_MANAGER_NODE_CONNECT(n) \ + (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) #define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) @@ -2449,7 +2451,7 @@ node_cmd_err: static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) - node->context = redisConnect(node->ip, node->port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", node->ip, node->port, @@ -2491,7 +2493,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { clusterManagerNode *friend = ln->value; if (!friend->ip || !friend->port) goto invalid_friend; if (!friend->context) - friend->context = redisConnect(friend->ip, friend->port); + CLUSTER_MANAGER_NODE_CONNECT(friend); if (friend->context->err) goto invalid_friend; e = NULL; if (clusterManagerNodeLoadInfo(friend, 0, &e)) { @@ -2785,7 +2787,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *ip = addr; int port = atoi(++c); clusterManagerNode *node = clusterManagerNewNode(ip, port); - node->context = redisConnect(ip, port); + CLUSTER_MANAGER_NODE_CONNECT(node); if (node->context->err) { fprintf(stderr,"Could not connect to Redis at "); fprintf(stderr,"%s:%d: %s\n", ip, port, node->context->errstr); From 2d677e2bf354342efefadce9b9536ccc1ab3005b Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 13 Feb 2018 12:00:06 +0100 Subject: [PATCH 40/66] Cluster Manager: 'call' command. --- src/redis-cli.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4ce3a12d..00b5e90a 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -286,6 +286,7 @@ static int clusterManagerIsConfigConsistent(void); static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); /* User preferences. */ @@ -1802,6 +1803,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { "cluster-replicas"}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"call", clusterManagerCommandCall, -2, + "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -2449,6 +2452,11 @@ node_cmd_err: return 0; } +/* Retrieves info about the cluster using argument 'node' as the starting + * point. All nodes will be loaded inside the cluster_manager.nodes list. + * Warning: if something goes wrong, it will free the starting node before + * returning 0. */ + static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -3115,6 +3123,56 @@ invalid_args: return 0; } +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0; + char *ip = NULL; + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + int i; + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else { + fprintf(stderr, + "Invalid arguments: first agrumnt must be host:port.\n"); + return 0; + } + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + argc--; + argv++; + size_t *argvlen = zmalloc(argc*sizeof(size_t)); + printf(">>> Calling"); + for (i = 0; i < argc; i++) { + argvlen[i] = strlen(argv[i]); + printf(" %s", argv[i]); + } + printf("\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (!n->context) CLUSTER_MANAGER_NODE_CONNECT(n); + redisReply *reply = NULL; + redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); + int status = redisGetReply(n->context, (void **)(&reply)); + if (status != REDIS_OK || reply == NULL ) + printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + else { + sds formatted_reply = cliFormatReplyTTY(reply, ""); + printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); + sdsfree(formatted_reply); + } + if (reply != NULL) freeReplyObject(reply); + } + zfree(argvlen); + return 1; +} + static int clusterManagerCommandHelp(int argc, char **argv) { UNUSED(argc); UNUSED(argv); From 307d995f75d9e0d8dab12d2dd2aca0499481257d Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 17:54:46 +0100 Subject: [PATCH 41/66] Cluster Manager: improved cleanup/error handling in various functions --- src/redis-cli.c | 101 +++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 45 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 00b5e90a..63a4f69b 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2220,7 +2220,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; int argc; sds *argv = NULL; size_t *argvlen = NULL; @@ -2235,39 +2235,44 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) added++; } } - if (!added) goto node_cmd_err; + if (!added) { + success = 0; + goto cleanup; + } argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) goto node_cmd_err; + if (argc == 0 || argv == NULL) { + success = 0; + goto cleanup; + } argvlen = zmalloc(argc*sizeof(size_t)); for (i = 0; i < argc; i++) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); - if (redisGetReply(node->context, &_reply) != REDIS_OK) goto node_cmd_err; + if (redisGetReply(node->context, &_reply) != REDIS_OK) { + success = 1; + goto cleanup; + } reply = (redisReply*) _reply; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } - sdsfree(cmd); - zfree(argvlen); - sdsfreesplitres(argv,argc); - freeReplyObject(reply); - return 1; -node_cmd_err: +cleanup: sdsfree(cmd); zfree(argvlen); if (argv != NULL) sdsfreesplitres(argv,argc); if (reply != NULL) freeReplyObject(reply); - return 0; + return success; } static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", @@ -2277,18 +2282,20 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) goto node_cmd_err; + if (!added || *err != NULL) { + success = 0; + goto cleanup; + } } node->dirty = 0; - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply != NULL) freeReplyObject(reply); + return success; } static void clusterManagerWaitForClusterJoin(void) { @@ -2305,14 +2312,15 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0; + int is_err = 0, success = 1; *err = NULL; if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); } - goto node_cmd_err; + success = 0; + goto cleanup; } int getfriends = (opts & CLUSTER_MANAGER_OPT_GETFRIENDS); char *lines = reply->str, *p, *line; @@ -2340,7 +2348,10 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } if (i == 8) break; // Slots } - if (!flags) goto node_cmd_err; + if (!flags) { + success = 0; + goto cleanup; + } int myself = (strstr(flags, "myself") != NULL); clusterManagerNode *currentNode = NULL; if (myself) { @@ -2406,14 +2417,16 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (addr == NULL) { // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } char *c = strrchr(addr, '@'); if (c != NULL) *c = '\0'; c = strrchr(addr, ':'); if (c == NULL) { fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); - goto node_cmd_err; + success = 0; + goto cleanup; } *c = '\0'; int port = atoi(++c); @@ -2445,11 +2458,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (ping_recv != NULL) currentNode->ping_recv = atoll(ping_recv); if (!getfriends && myself) break; } - freeReplyObject(reply); - return 1; -node_cmd_err: - freeReplyObject(reply); - return 0; +cleanup: + if (reply) freeReplyObject(reply); + return success; } /* Retrieves info about the cluster using argument 'node' as the starting @@ -2780,7 +2791,7 @@ cluster_manager_err: static int clusterManagerCommandCreate(int argc, char **argv) { printf("Cluster Manager: Creating Cluster\n"); - int i, j; + int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { char *addr = argv[i]; @@ -2974,7 +2985,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } else if (err != NULL) zfree(err); } printf(">>> Nodes configuration updated\n"); @@ -3010,7 +3022,10 @@ assign_replicas: is_err = 1; fprintf(stderr, "Failed to send CLUSTER MEET command.\n"); } - if (is_err) goto cmd_err; + if (is_err) { + success = 0; + goto cleanup; + } } // Give one second for the join to start, in order to avoid that // waiting for cluster join will find all the nodes agree about @@ -3029,7 +3044,8 @@ assign_replicas: CLUSTER_MANAGER_PRINT_REPLY_ERROR(node, err); zfree(err); } - goto cmd_err; + success = 0; + goto cleanup; } } // Reset Nodes @@ -3041,9 +3057,13 @@ assign_replicas: else freeClusterManagerNode(node); } listEmpty(cluster_manager.nodes); - if (!clusterManagerLoadInfoFromNode(first_node, 0)) goto cmd_err; //TODO: msg? + if (!clusterManagerLoadInfoFromNode(first_node, 0)) { + success = 0; + goto cleanup; //TODO: msg? + } clusterManagerCheckCluster(0); } +cleanup: /* Free everything */ zfree(masters); zfree(ips); @@ -3052,16 +3072,7 @@ assign_replicas: CLUSTER_MANAGER_NODEARRAY_FREE(node_array); } zfree(ip_nodes); - return 1; -cmd_err: - zfree(masters); - zfree(ips); - for (i = 0; i < node_len; i++) { - clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); - } - zfree(ip_nodes); - return 0; + return success; } static int clusterManagerCommandInfo(int argc, char **argv) { From 18910013cd852ea6246a766851b52938cff217e2 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 14 Feb 2018 19:29:28 +0100 Subject: [PATCH 42/66] Cluster Manager: colorized output --- src/redis-cli.c | 130 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 63a4f69b..09ad5497 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -67,6 +67,7 @@ #define REDIS_CLI_HISTFILE_DEFAULT ".rediscli_history" #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" + #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) @@ -80,7 +81,7 @@ if (cluster_manager.errors == NULL) \ cluster_manager.errors = listCreate(); \ listAddNodeTail(cluster_manager.errors, err); \ - fprintf(stderr, "%s\n", (char *) err); \ + clusterManagerLogErr("%s\n", (char *) err); \ } while(0) #define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ @@ -124,7 +125,20 @@ } while(0) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ - fprintf(stderr,"Node %s:%d replied with error:\n%s\n", n->ip, n->port, err); + clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ + n->ip, n->port, err); + +#define clusterManagerLogInfo(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_INFO,__VA_ARGS__) + +#define clusterManagerLogErr(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_ERR,__VA_ARGS__) + +#define clusterManagerLogWarn(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_WARN,__VA_ARGS__) + +#define clusterManagerLogOk(...) \ + clusterManagerLog(CLUSTER_MANAGER_LOG_LVL_SUCCESS,__VA_ARGS__) #define CLUSTER_MANAGER_FLAG_MYSELF 1 << 0 #define CLUSTER_MANAGER_FLAG_SLAVE 1 << 1 @@ -133,7 +147,22 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 + +#define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 + +#define CLUSTER_MANAGER_LOG_LVL_INFO 1 +#define CLUSTER_MANAGER_LOG_LVL_WARN 2 +#define CLUSTER_MANAGER_LOG_LVL_ERR 3 +#define CLUSTER_MANAGER_LOG_LVL_SUCCESS 4 + +#define LOG_COLOR_BOLD "29;1m" +#define LOG_COLOR_RED "31;1m" +#define LOG_COLOR_GREEN "32;1m" +#define LOG_COLOR_YELLOW "33;1m" +#define LOG_COLOR_RESET "0m" /* --latency-dist palettes. */ int spectrum_palette_color_size = 19; @@ -270,6 +299,7 @@ static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); typedef int clusterManagerCommandProc(int argc, char **argv); typedef struct clusterManagerCommandDef { @@ -1267,6 +1297,7 @@ static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { cmd->name = cmdname; cmd->argc = argc; cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; } static int parseOptions(int argc, char **argv) { @@ -2042,7 +2073,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerNode **offenders = NULL, **aux; int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); if (score == 0) goto cleanup; - printf(">>> Trying to optimize slaves allocation for anti-affinity\n"); + clusterManagerLogInfo(">>> Trying to optimize slaves allocation " + "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; int maxiter = 500 * node_len; srand(time(NULL)); @@ -2091,12 +2123,15 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(aux), aux = NULL; score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); char *msg; - if (score == 0) msg = "[OK] Perfect anti-affinity obtained!"; + int perfect = (score == 0); + int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : + CLUSTER_MANAGER_LOG_LVL_WARN); + if (perfect) msg = "[OK] Perfect anti-affinity obtained!"; else if (score >= 10000) msg = ("[WARNING] Some slaves are in the same host as their master"); else msg=("[WARNING] Some slaves of the same master are in the same host"); - printf("%s\n", msg); + clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); zfree(aux); @@ -2211,7 +2246,7 @@ static void clusterManagerShowInfo(void) { keys += dbsize; } } - printf("[OK] %d keys in %d masters.\n", keys, masters); + clusterManagerLogOk("[OK] %d keys in %d masters.\n", keys, masters); float keys_per_slot = keys / (float) CLUSTER_MANAGER_SLOTS; printf("%.2f keys per slot on average.\n", keys_per_slot); } @@ -2482,7 +2517,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { char *msg = (e ? e : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", node->ip, node->port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -2522,8 +2557,9 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { goto invalid_friend; listAddNodeTail(cluster_manager.nodes, friend); } else { - fprintf(stderr,"[ERR] Unable to load info for node %s:%d\n", - friend->ip, friend->port); + clusterManagerLogErr("[ERR] Unable to load info for " + "node %s:%d\n", + friend->ip, friend->port); goto invalid_friend; } continue; @@ -2692,15 +2728,18 @@ static void clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); if (!ln) return; clusterManagerNode *node = ln->value; - printf(">>> Performing Cluster Check (using node %s:%d)\n", - node->ip, node->port); + clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", + node->ip, node->port); if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); CLUSTER_MANAGER_ERROR(err); - } else printf("[OK] All nodes agree about slots configuration.\n"); + } else { + clusterManagerLogOk("[OK] All nodes agree about slots " + "configuration.\n"); + } // Check open slots - printf(">>> Check for open slots...\n"); + clusterManagerLogInfo(">>> Check for open slots...\n"); listIter li; listRewind(cluster_manager.nodes, &li); int i; @@ -2754,17 +2793,18 @@ static void clusterManagerCheckCluster(int quiet) { char *fmt = (i++ > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } - fprintf(stderr, "%s.\n", (char *) errstr); + clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); dictRelease(open_slots); } - printf(">>> Check slots coverage...\n"); + clusterManagerLogInfo(">>> Check slots coverage...\n"); char slots[CLUSTER_MANAGER_SLOTS]; memset(slots, 0, CLUSTER_MANAGER_SLOTS); int coverage = clusterManagerGetCoveredSlots(slots); - if (coverage == CLUSTER_MANAGER_SLOTS) - printf("[OK] All %d slots covered.\n", CLUSTER_MANAGER_SLOTS); - else { + if (coverage == CLUSTER_MANAGER_SLOTS) { + clusterManagerLogOk("[OK] All %d slots covered.\n", + CLUSTER_MANAGER_SLOTS); + } else { sds err = sdsempty(); err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", @@ -2773,6 +2813,26 @@ static void clusterManagerCheckCluster(int quiet) { } } +static void clusterManagerLog(int level, const char* fmt, ...) { + int use_colors = + (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); + if (use_colors) { + printf("\033["); + switch (level) { + case CLUSTER_MANAGER_LOG_LVL_INFO: printf(LOG_COLOR_BOLD); break; + case CLUSTER_MANAGER_LOG_LVL_WARN: printf(LOG_COLOR_YELLOW); break; + case CLUSTER_MANAGER_LOG_LVL_ERR: printf(LOG_COLOR_RED); break; + case CLUSTER_MANAGER_LOG_LVL_SUCCESS: printf(LOG_COLOR_GREEN); break; + default: printf(LOG_COLOR_RESET); break; + } + } + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); + if (use_colors) printf("\033[" LOG_COLOR_RESET); +} + static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2790,7 +2850,6 @@ cluster_manager_err: /* Cluster Manager Commands */ static int clusterManagerCommandCreate(int argc, char **argv) { - printf("Cluster Manager: Creating Cluster\n"); int i, j, success = 1; cluster_manager.nodes = listCreate(); for (i = 0; i < argc; i++) { @@ -2816,7 +2875,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { char *msg = (err ? err : "is not configured as a cluster node."); - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2835,11 +2894,11 @@ static int clusterManagerCommandCreate(int argc, char **argv) { char *msg; if (err) msg = err; else { - msg = " is not empty. Either the node already knows other " + msg = "is not empty. Either the node already knows other " "nodes (check with CLUSTER NODES) or contains some " "key in database 0."; } - fprintf(stderr, "[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -2850,18 +2909,17 @@ static int clusterManagerCommandCreate(int argc, char **argv) { int replicas = config.cluster_manager_command.replicas; int masters_count = CLUSTER_MANAGER_MASTERS_COUNT(node_len, replicas); if (masters_count < 3) { - fprintf(stderr, - "*** ERROR: Invalid configuration for cluster creation.\n"); - fprintf(stderr, - "*** Redis Cluster requires at least 3 master nodes.\n"); - fprintf(stderr, + clusterManagerLogErr( + "*** ERROR: Invalid configuration for cluster creation.\n" + "*** Redis Cluster requires at least 3 master nodes.\n" "*** This is not possible with %d nodes and %d replicas per node.", node_len, replicas); - fprintf(stderr, "\n*** At least %d nodes are required.\n", - (3 * (replicas + 1))); + clusterManagerLogErr("\n*** At least %d nodes are required.\n", + 3 * (replicas + 1)); return 0; } - printf(">>> Performing hash slots allocation on %d nodes...\n", node_len); + clusterManagerLogInfo(">>> Performing hash slots allocation " + "on %d nodes...\n", node_len); int interleaved_len = 0, ips_len = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); @@ -2989,8 +3047,9 @@ assign_replicas: goto cleanup; } else if (err != NULL) zfree(err); } - printf(">>> Nodes configuration updated\n"); - printf(">>> Assign a different config epoch to each node\n"); + clusterManagerLogInfo(">>> Nodes configuration updated\n"); + clusterManagerLogInfo(">>> Assign a different config epoch to " + "each node\n"); int config_epoch = 1; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3001,7 +3060,8 @@ assign_replicas: config_epoch++); if (reply != NULL) freeReplyObject(reply); } - printf(">>> Sending CLUSTER MEET messages to join the cluster\n"); + clusterManagerLogInfo(">>> Sending CLUSTER MEET messages to join " + "the cluster\n"); clusterManagerNode *first = NULL; listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { @@ -3156,7 +3216,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { argc--; argv++; size_t *argvlen = zmalloc(argc*sizeof(size_t)); - printf(">>> Calling"); + clusterManagerLogInfo(">>> Calling"); for (i = 0; i < argc; i++) { argvlen[i] = strlen(argv[i]); printf(" %s", argv[i]); From 4e0c2f9c3c5c800df2c1ed61ed862983b15a80bf Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 20 Feb 2018 12:01:13 +0100 Subject: [PATCH 43/66] - Fixed bug in clusterManagerGetAntiAffinityScore - Code improvements --- src/redis-cli.c | 57 ++++++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 09ad5497..6a5279d2 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -505,7 +505,6 @@ static int dictSdsKeyCompare(void *privdata, const void *key1, static void dictSdsDestructor(void *privdata, void *val) { DICT_NOTUSED(privdata); - sdsfree(val); } @@ -2008,11 +2007,13 @@ result: static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_len, clusterManagerNode ***offending, int *offending_len) { - assert(offending != NULL); int score = 0, i, j; int node_len = cluster_manager.nodes->len; - *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); - clusterManagerNode **offending_p = *offending; + clusterManagerNode **offending_p = NULL; + if (offending != NULL) { + *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); + offending_p = *offending; + } for (i = 0; i < ip_len; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); @@ -2021,23 +2022,21 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, clusterManagerNode *node = node_array->nodes[j]; if (node == NULL) continue; if (!ip) ip = node->ip; - sds types; - if (!node->replicate) { - assert(node->name != NULL); - dictEntry *entry = dictFind(related, node->name); - if (entry) types = (sds) dictGetVal(entry); - else types = sdsempty(); - types = sdscatprintf(types, "m%s", types); - dictReplace(related, node->name, types); - } else { - dictEntry *entry = dictFind(related, node->replicate); - if (entry) types = (sds) dictGetVal(entry); - else { - types = sdsempty(); - dictAdd(related, node->replicate, types); - } - sdscat(types, "s"); + sds types, otypes; + // We always use the Master ID as key + sds key = (!node->replicate ? node->name : node->replicate); + assert(key != NULL); + dictEntry *entry = dictFind(related, key); + if (entry) otypes = (sds) dictGetVal(entry); + else { + otypes = sdsempty(); + dictAdd(related, key, otypes); } + // Master type 'm' is always set as the first character of the + // types string. + if (!node->replicate) types = sdscatprintf(otypes, "m%s", otypes); + else types = sdscat(otypes, "s"); + if (types != otypes) dictReplace(related, key, types); } dictIterator *iter = dictGetIterator(related); dictEntry *entry; @@ -2048,6 +2047,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (typeslen < 2) continue; if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); + if (offending == NULL) continue; listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2056,11 +2056,12 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (n->replicate == NULL) continue; if (!strcmp(n->replicate, name) && !strcmp(n->ip, ip)) { *(offending_p++) = n; + if (offending_len != NULL) (*offending_len)++; break; } } } - if (offending_len != NULL) *offending_len = offending_p - *offending; + //if (offending_len != NULL) *offending_len = offending_p - *offending; dictReleaseIterator(iter); dictRelease(related); } @@ -2070,8 +2071,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_len) { - clusterManagerNode **offenders = NULL, **aux; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + clusterManagerNode **offenders = NULL; + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); @@ -2088,7 +2089,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, &offending_len); if (score == 0) break; int rand_idx = rand() % offending_len; - clusterManagerNode *first = offenders[rand_idx], *second; + clusterManagerNode *first = offenders[rand_idx], + *second = NULL; clusterManagerNode **other_replicas = zcalloc((node_len - 1) * sizeof(*other_replicas)); int other_replicas_count = 0; @@ -2110,9 +2112,8 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - zfree(aux), aux = NULL; int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, - &aux, NULL); + NULL, NULL); if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2120,8 +2121,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - zfree(aux), aux = NULL; - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &aux, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2134,7 +2134,6 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, clusterManagerLog(log_level, "%s\n", msg); cleanup: zfree(offenders); - zfree(aux); } static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { From 7d609ff952dbbcaa3574287a16349e56cbae03bb Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 22 Feb 2018 18:32:39 +0100 Subject: [PATCH 44/66] Cluster Manager: - Almost all Cluster Manager related code moved to the same section. - Many macroes converted to functions - Added various comments - Little code restyling --- src/redis-cli.c | 460 ++++++++++++++++++++++++++++-------------------- 1 file changed, 271 insertions(+), 189 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 6a5279d2..66fc4d18 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -75,54 +75,8 @@ (n->context = redisConnect(n->ip, n->port)); #define CLUSTER_MANAGER_COMMAND(n,...) \ (reconnectingRedisCommand(n->context, __VA_ARGS__)) -#define CLUSTER_MANAGER_NODE_INFO(n) (CLUSTER_MANAGER_COMMAND(n, "INFO")) -#define CLUSTER_MANAGER_ERROR(err) do { \ - if (cluster_manager.errors == NULL) \ - cluster_manager.errors = listCreate(); \ - listAddNodeTail(cluster_manager.errors, err); \ - clusterManagerLogErr("%s\n", (char *) err); \ -} while(0) - -#define CLUSTER_MANAGER_RESET_SLOTS(n) do { \ - memset(n->slots, 0, sizeof(n->slots)); \ - n->slots_count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_INIT(array, alloc_len) do { \ - array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*));\ - array->alloc = array->nodes; \ - array->len = alloc_len; \ - array->count = 0; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_RESET(array) do { \ - if (array->nodes > array->alloc) { \ - array->len = array->nodes - array->alloc; \ - array->nodes = array->alloc; \ - array->count = 0; \ - int i = 0; \ - for(; i < array->len; i++) { \ - if (array->nodes[i] != NULL) array->count++;\ - } \ - } \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_FREE(array) zfree(array->alloc) - -#define CLUSTER_MANAGER_NODEARRAY_SHIFT(array, nodeptr) do {\ - assert(array->nodes < (array->nodes + array->len)); \ - if (*array->nodes != NULL) array->count--; \ - nodeptr = *array->nodes; \ - array->nodes++; \ - array->len--; \ -} while(0) - -#define CLUSTER_MANAGER_NODEARRAY_ADD(array, nodeptr) do { \ - assert(array->nodes < (array->nodes + array->len)); \ - assert(nodeptr != NULL); \ - array->nodes[array->count++] = nodeptr; \ -} while(0) +#define CLUSTER_MANAGER_NODE_ARRAY_FREE(array) zfree(array->alloc) #define CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err) \ clusterManagerLogErr("Node %s:%d replied with error:\n%s\n", \ @@ -190,6 +144,7 @@ typedef struct clusterManagerCommand { int flags; int replicas; } clusterManagerCommand; +static void createClusterManagerCommand(char *cmdname, int argc, char **argv); static redisContext *context; @@ -237,88 +192,6 @@ static struct config { clusterManagerCommand cluster_manager_command; } config; -/* Cluster Manager */ - -static struct clusterManager { - list *nodes; - list *errors; -} cluster_manager; - -typedef struct clusterManagerNode { - redisContext *context; - sds name; - char *ip; - int port; - uint64_t current_epoch; - time_t ping_sent; - time_t ping_recv; - int flags; - sds replicate; - list replicas; - int dirty; - uint8_t slots[CLUSTER_MANAGER_SLOTS]; - int slots_count; - int replicas_count; - list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; -} clusterManagerNode; - -typedef struct clusterManagerNodeArray { - clusterManagerNode **nodes; - clusterManagerNode **alloc; - int len; - int count; -} clusterManagerNodeArray; - -static dictType clusterManagerDictType = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - dictSdsDestructor /* val destructor */ -}; - -static clusterManagerNode *clusterManagerNewNode(char *ip, int port); -static clusterManagerNode *clusterManagerNodeByName(const char *name); -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); -static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, - char **err); -static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); -static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); -static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len); -static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len); -static sds clusterManagerNodeInfo(clusterManagerNode *node); -static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); -static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); -static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); -static void clusterManagerLog(int level, const char* fmt, ...); - -typedef int clusterManagerCommandProc(int argc, char **argv); -typedef struct clusterManagerCommandDef { - char *name; - clusterManagerCommandProc *proc; - int arity; - char *args; - char *options; -} clusterManagerCommandDef; -static int clusterManagerIsConfigConsistent(void); - -/* Cluster Manager commands. */ - -static int clusterManagerCommandCreate(int argc, char **argv); -static int clusterManagerCommandInfo(int argc, char **argv); -static int clusterManagerCommandCheck(int argc, char **argv); -static int clusterManagerCommandCall(int argc, char **argv); -static int clusterManagerCommandHelp(int argc, char **argv); - /* User preferences. */ static struct pref { int hints; @@ -1291,14 +1164,6 @@ static redisReply *reconnectingRedisCommand(redisContext *c, const char *fmt, .. * User interface *--------------------------------------------------------------------------- */ -static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { - clusterManagerCommand *cmd = &config.cluster_manager_command; - cmd->name = cmdname; - cmd->argc = argc; - cmd->argv = argc ? argv : NULL; - if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; -} - static int parseOptions(int argc, char **argv) { int i; @@ -1828,6 +1693,100 @@ static int evalMode(int argc, char **argv) { * Cluster Manager mode *--------------------------------------------------------------------------- */ +/* The Cluster Manager global structure */ +static struct clusterManager { + list *nodes; /* List of nodes int he configuration. */ + list *errors; +} cluster_manager; + +typedef struct clusterManagerNode { + redisContext *context; + sds name; + char *ip; + int port; + uint64_t current_epoch; + time_t ping_sent; + time_t ping_recv; + int flags; + sds replicate; /* Master ID if node is a slave */ + list replicas; + int dirty; /* Node has changes that can be flushed */ + uint8_t slots[CLUSTER_MANAGER_SLOTS]; + int slots_count; + int replicas_count; + list *friends; + sds *migrating; + sds *importing; + int migrating_count; + int importing_count; +} clusterManagerNode; + +/* Data structure used to represent a sequence of nodes. */ +typedef struct clusterManagerNodeArray { + clusterManagerNode **nodes; /* Actual nodes array */ + clusterManagerNode **alloc; /* Pointer to the allocated memory */ + int len; /* Actual length of the array */ + int count; /* Non-NULL nodes count */ +} clusterManagerNodeArray; + +static dictType clusterManagerDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + dictSdsDestructor /* val destructor */ +}; + +typedef int clusterManagerCommandProc(int argc, char **argv); + +/* Cluster Manager helper functions */ + +static clusterManagerNode *clusterManagerNewNode(char *ip, int port); +static clusterManagerNode *clusterManagerNodeByName(const char *name); +static void clusterManagerNodeResetSlots(clusterManagerNode *node); +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, + char **err); +static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); +static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err); +static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, + int ip_count, clusterManagerNode ***offending, int *offending_len); +static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, + int ip_count); +static sds clusterManagerNodeInfo(clusterManagerNode *node); +static void clusterManagerShowNodes(void); +static void clusterManagerShowInfo(void); +static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); +static void clusterManagerWaitForClusterJoin(void); +static void clusterManagerCheckCluster(int quiet); +static void clusterManagerLog(int level, const char* fmt, ...); +static int clusterManagerIsConfigConsistent(void); +static void clusterManagerOnError(sds err); +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int len); +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array); +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr); +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node); + +/* Cluster Manager commands. */ + +static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandInfo(int argc, char **argv); +static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandCall(int argc, char **argv); +static int clusterManagerCommandHelp(int argc, char **argv); + +typedef struct clusterManagerCommandDef { + char *name; + clusterManagerCommandProc *proc; + int arity; + char *args; + char *options; +} clusterManagerCommandDef; + clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "cluster-replicas"}, @@ -1838,6 +1797,16 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; + +static void createClusterManagerCommand(char *cmdname, int argc, char **argv) { + clusterManagerCommand *cmd = &config.cluster_manager_command; + cmd->name = cmdname; + cmd->argc = argc; + cmd->argv = argc ? argv : NULL; + if (isColorTerm()) cmd->flags |= CLUSTER_MANAGER_CMD_FLAG_COLOR; +} + + static clusterManagerCommandProc *validateClusterManagerCommand(void) { int i, commands_count = sizeof(clusterManagerCommands) / sizeof(clusterManagerCommandDef); @@ -1930,7 +1899,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); return node; } @@ -1954,41 +1923,49 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); - int is_err = 0; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { +static void clusterManagerNodeResetSlots(clusterManagerNode *node) { + memset(node->slots, 0, sizeof(node->slots)); + node->slots_count = 0; +} + +static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, + char **err) +{ + redisReply *info = CLUSTER_MANAGER_COMMAND(node, "INFO"); + if (err != NULL) *err = NULL; + if (info == NULL) return NULL; + if (info->type == REDIS_REPLY_ERROR) { + if (err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); strcpy(*err, info->str); } freeReplyObject(info); - return 0; + return NULL; } + return info; +} + +static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); + if (info == NULL) return 0; int is_cluster = (int) getLongInfoField(info->str, "cluster_enabled"); freeReplyObject(info); return is_cluster; } +/* Checks whether the node is empty. Node is considered not-empty if it has + * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { - redisReply *info = CLUSTER_MANAGER_NODE_INFO(node); + redisReply *info = clusterManagerGetNodeRedisInfo(node, err); int is_err = 0, is_empty = 1; - *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } - is_empty = 0; - goto result; - } + if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; goto result; } freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); + if (err != NULL) *err = NULL; if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { if (is_err && err != NULL) { *err = zmalloc((info->len + 1) * sizeof(char)); @@ -2004,8 +1981,37 @@ result: return is_empty; } +/* Return the anti-affinity score, which is a measure of the amount of + * violations of anti-affinity in the current cluster layout, that is, how + * badly the masters and slaves are distributed in the different IP + * addresses so that slaves of the same master are not in the master + * host and are also in different hosts. + * + * The score is calculated as follows: + * + * SAME_AS_MASTER = 10000 * each slave in the same IP of its master. + * SAME_AS_SLAVE = 1 * each slave having the same IP as another slave + of the same master. + * FINAL_SCORE = SAME_AS_MASTER + SAME_AS_SLAVE + * + * So a greater score means a worse anti-affinity level, while zero + * means perfect anti-affinity. + * + * The anti affinity optimizator will try to get a score as low as + * possible. Since we do not want to sacrifice the fact that slaves should + * not be in the same host as the master, we assign 10000 times the score + * to this violation, so that we'll optimize for the second factor only + * if it does not impact the first one. + * + * The ipnodes argument is an array of clusterManagerNodeArray, one for + * each IP, while ip_count is the total number of IPs in the configuration. + * + * The function returns the above score, and the list of + * offending slaves can be stored into the 'offending' argument, + * so that the optimizer can try changing the configuration of the + * slaves violating the anti-affinity goals. */ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, - int ip_len, clusterManagerNode ***offending, int *offending_len) + int ip_count, clusterManagerNode ***offending, int *offending_len) { int score = 0, i, j; int node_len = cluster_manager.nodes->len; @@ -2014,7 +2020,10 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, *offending = zcalloc(node_len * sizeof(clusterManagerNode*)); offending_p = *offending; } - for (i = 0; i < ip_len; i++) { + /* For each set of nodes in the same host, split by + * related nodes (masters and slaves which are involved in + * replication of each other) */ + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ipnodes[i]); dict *related = dictCreate(&clusterManagerDictType, NULL); char *ip = NULL; @@ -2038,6 +2047,8 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, else types = sdscat(otypes, "s"); if (types != otypes) dictReplace(related, key, types); } + /* Now it's trivial to check, for each related group having the + * same host, what is their local score. */ dictIterator *iter = dictGetIterator(related); dictEntry *entry; while ((entry = dictNext(iter)) != NULL) { @@ -2048,6 +2059,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, if (types[0] == 'm') score += (10000 * (typeslen - 1)); else score += (1 * typeslen); if (offending == NULL) continue; + /* Populate the list of offending nodes. */ listIter li; listNode *ln; listRewind(cluster_manager.nodes, &li); @@ -2069,15 +2081,16 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, } static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, - int ip_len) + int ip_count) { clusterManagerNode **offenders = NULL; - int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + int score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, + NULL, NULL); if (score == 0) goto cleanup; clusterManagerLogInfo(">>> Trying to optimize slaves allocation " "for anti-affinity\n"); int node_len = cluster_manager.nodes->len; - int maxiter = 500 * node_len; + int maxiter = 500 * node_len; // Effort is proportional to cluster size... srand(time(NULL)); while (maxiter > 0) { int offending_len = 0; @@ -2085,9 +2098,14 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(offenders); offenders = NULL; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, &offenders, + score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, + &offenders, &offending_len); - if (score == 0) break; + if (score == 0) break; // Optimal anti affinity reached + /* We'll try to randomly swap a slave's assigned master causing + * an affinity problem with another random slave, to see if we + * can improve the affinity. */ int rand_idx = rand() % offending_len; clusterManagerNode *first = offenders[rand_idx], *second = NULL; @@ -2112,8 +2130,12 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, *second_master = second->replicate; first->replicate = second_master, first->dirty = 1; second->replicate = first_master, second->dirty = 1; - int new_score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, + int new_score = clusterManagerGetAntiAffinityScore(ipnodes, + ip_count, NULL, NULL); + /* If the change actually makes thing worse, revert. Otherwise + * leave as it is becuase the best solution may need a few + * combined swaps. */ if (new_score > score) { first->replicate = first_master; second->replicate = second_master; @@ -2121,7 +2143,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, zfree(other_replicas); maxiter--; } - score = clusterManagerGetAntiAffinityScore(ipnodes, ip_len, NULL, NULL); + score = clusterManagerGetAntiAffinityScore(ipnodes, ip_count, NULL, NULL); char *msg; int perfect = (score == 0); int log_level = (perfect ? CLUSTER_MANAGER_LOG_LVL_SUCCESS : @@ -2136,6 +2158,7 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); int first_range_idx = -1, last_slot_idx = -1, i; @@ -2303,11 +2326,13 @@ cleanup: return success; } +/* Flush the dirty node configuration by calling replicate for slaves or + * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; int is_err = 0, success = 1; - *err = NULL; + if (err != NULL) *err = NULL; if (node->replicate != NULL) { reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER REPLICATE %s", node->replicate); @@ -2317,14 +2342,15 @@ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { strcpy(*err, reply->str); } success = 0; + /* If the cluster did not already joined it is possible that + * the slave does not know the master node yet. So on errors + * we return ASAP leaving the dirty flag set, to flush the + * config later. */ goto cleanup; } } else { int added = clusterManagerAddSlots(node, err); - if (!added || *err != NULL) { - success = 0; - goto cleanup; - } + if (!added || *err != NULL) success = 0; } node->dirty = 0; cleanup: @@ -2342,6 +2368,11 @@ static void clusterManagerWaitForClusterJoin(void) { printf("\n"); } +/* Load node's cluster configuration by calling "CLUSTER NODES" command. + * Node's configuration (name, replicate, slots, ...) is then updated. + * If CLUSTER_MANAGER_OPT_GETFRIENDS flag is set into 'opts' argument, + * and node already knows other nodes, the node's friends list is populated + * with the other nodes info. */ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { @@ -2391,7 +2422,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (myself) { node->flags |= CLUSTER_MANAGER_FLAG_MYSELF; currentNode = node; - CLUSTER_MANAGER_RESET_SLOTS(node); + clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); //TODO: just while(remaining) && assign p inside the block @@ -2501,7 +2532,6 @@ cleanup: * point. All nodes will be loaded inside the cluster_manager.nodes list. * Warning: if something goes wrong, it will free the starting node before * returning 0. */ - static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { if (node->context == NULL) CLUSTER_MANAGER_NODE_CONNECT(node); @@ -2681,7 +2711,6 @@ static int clusterManagerIsConfigConsistent(void) { if (cluster_manager.nodes == NULL) return 0; int consistent = (listLength(cluster_manager.nodes) <= 1); // If the Cluster has only one node, it's always consistent - // Does it make sense? if (consistent) return 1; sds first_cfg = NULL; listIter li; @@ -2705,6 +2734,13 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +static void clusterManagerOnError(sds err) { + if (cluster_manager.errors == NULL) + cluster_manager.errors = listCreate(); + listAddNodeTail(cluster_manager.errors, err); + clusterManagerLogErr("%s\n", (char *) err); +} + static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -2732,7 +2768,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!quiet) clusterManagerShowNodes(); if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -2761,7 +2797,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } if (n->importing != NULL) { if (open_slots == NULL) @@ -2779,7 +2815,7 @@ static void clusterManagerCheckCluster(int quiet) { errstr = sdscatfmt(errstr, fmt, slot); } errstr = sdscat(errstr, "."); - CLUSTER_MANAGER_ERROR(errstr); + clusterManagerOnError(errstr); } } if (open_slots != NULL) { @@ -2808,7 +2844,7 @@ static void clusterManagerCheckCluster(int quiet) { err = sdscatprintf(err, "[ERR] Not all %d slots are " "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); - CLUSTER_MANAGER_ERROR(err); + clusterManagerOnError(err); } } @@ -2832,6 +2868,53 @@ static void clusterManagerLog(int level, const char* fmt, ...) { if (use_colors) printf("\033[" LOG_COLOR_RESET); } +static void clusterManagerNodeArrayInit(clusterManagerNodeArray *array, + int alloc_len) +{ + array->nodes = zcalloc(alloc_len * sizeof(clusterManagerNode*)); + array->alloc = array->nodes; + array->len = alloc_len; + array->count = 0; +} + +/* Reset array->nodes to the original array allocation and re-count non-NULL + * nodes. */ +static void clusterManagerNodeArrayReset(clusterManagerNodeArray *array) { + if (array->nodes > array->alloc) { + array->len = array->nodes - array->alloc; + array->nodes = array->alloc; + array->count = 0; + int i = 0; + for(; i < array->len; i++) { + if (array->nodes[i] != NULL) array->count++; + } + } +} + +/* Shift array->nodes and store the shifted node into 'nodeptr'. */ +static void clusterManagerNodeArrayShift(clusterManagerNodeArray *array, + clusterManagerNode **nodeptr) +{ + assert(array->nodes < (array->nodes + array->len)); + /* If the first node to be shifted is not NULL, decrement count. */ + if (*array->nodes != NULL) array->count--; + /* Store the first node to be shifted into 'nodeptr'. */ + *nodeptr = *array->nodes; + /* Shift the nodes array and decrement length. */ + array->nodes++; + array->len--; +} + +static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, + clusterManagerNode *node) +{ + assert(array->nodes < (array->nodes + array->len)); + assert(node != NULL); + assert(array->count < array->len); + array->nodes[array->count++] = node; +} + +/* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; char **argv = config.cluster_manager_command.argv; @@ -2919,7 +3002,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } clusterManagerLogInfo(">>> Performing hash slots allocation " "on %d nodes...\n", node_len); - int interleaved_len = 0, ips_len = 0; + int interleaved_len = 0, ip_count = 0; clusterManagerNode **interleaved = zcalloc(node_len*sizeof(**interleaved)); char **ips = zcalloc(node_len * sizeof(char*)); clusterManagerNodeArray *ip_nodes = zcalloc(node_len * sizeof(*ip_nodes)); @@ -2929,7 +3012,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; int found = 0; - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { char *ip = ips[i]; if (!strcmp(ip, n->ip)) { found = 1; @@ -2937,19 +3020,19 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } } if (!found) { - ips[ips_len++] = n->ip; + ips[ip_count++] = n->ip; } clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->nodes == NULL) - CLUSTER_MANAGER_NODEARRAY_INIT(node_array, node_len); - CLUSTER_MANAGER_NODEARRAY_ADD(node_array, n); + clusterManagerNodeArrayInit(node_array, node_len); + clusterManagerNodeArrayAdd(node_array, n); } while (interleaved_len < node_len) { - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = &(ip_nodes[i]); if (node_array->count > 0) { - clusterManagerNode *n; - CLUSTER_MANAGER_NODEARRAY_SHIFT(node_array, n); + clusterManagerNode *n = NULL; + clusterManagerNodeArrayShift(node_array, &n); interleaved[interleaved_len++] = n; } } @@ -3019,11 +3102,11 @@ assign_replicas: printf("Adding extra replicas...\n"); goto assign_replicas; } - for (i = 0; i < ips_len; i++) { + for (i = 0; i < ip_count; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_RESET(node_array); + clusterManagerNodeArrayReset(node_array); } - clusterManagerOptimizeAntiAffinity(ip_nodes, ips_len); + clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); fflush(stdout); @@ -3031,7 +3114,6 @@ assign_replicas: int nread = read(fileno(stdin),buf,4); buf[3] = '\0'; if (nread != 0 && !strcmp("yes", buf)) { - printf("\nFlushing configuration!\n"); listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3128,7 +3210,7 @@ cleanup: zfree(ips); for (i = 0; i < node_len; i++) { clusterManagerNodeArray *node_array = ip_nodes + i; - CLUSTER_MANAGER_NODEARRAY_FREE(node_array); + CLUSTER_MANAGER_NODE_ARRAY_FREE(node_array); } zfree(ip_nodes); return success; From 99da9c9508afcde3e08c952122e6ac8eb01b17ce Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 10:44:11 +0100 Subject: [PATCH 45/66] Cluster Manager: reshard command, fixed slots parsing bug and other minor bugs. --- src/redis-cli.c | 655 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 593 insertions(+), 62 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 66fc4d18..fcf48a47 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -69,6 +69,13 @@ #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" #define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 +#define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 + +#define CLUSTER_MANAGER_INVALID_HOST_ARG \ + "Invalid arguments: you need to pass either a valid " \ + "address (ie. 120.0.0.1:7000) or space separated IP " \ + "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) #define CLUSTER_MANAGER_MASTERS_COUNT(nodes, replicas) (nodes/(replicas + 1)) #define CLUSTER_MANAGER_NODE_CONNECT(n) \ @@ -103,9 +110,14 @@ #define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 #define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 #define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 +#define CLUSTER_MANAGER_OPT_COLD 1 << 1 +#define CLUSTER_MANAGER_OPT_UPDATE 1 << 2 +#define CLUSTER_MANAGER_OPT_QUIET 1 << 6 +#define CLUSTER_MANAGER_OPT_VERBOSE 1 << 7 #define CLUSTER_MANAGER_LOG_LVL_INFO 1 #define CLUSTER_MANAGER_LOG_LVL_WARN 2 @@ -143,6 +155,11 @@ typedef struct clusterManagerCommand { char **argv; int flags; int replicas; + char *from; + char *to; + int slots; + int timeout; + int pipeline; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1261,6 +1278,19 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { + config.cluster_manager_command.from = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { + config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { + config.cluster_manager_command.slots = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { + config.cluster_manager_command.timeout = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { + config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-yes")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_YES; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1358,7 +1388,7 @@ static void usage(void) { " --ldb-sync-mode Like --ldb but uses the synchronous Lua debugger, in\n" " this mode the server is blocked and script changes are\n" " are not rolled back from the server memory.\n" -" --cluster [args...]\n" +" --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" @@ -1729,6 +1759,12 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; +/* Used for reshard table. */ +typedef struct clusterManagerReshardTableItem { + clusterManagerNode *source; + int slot; +} clusterManagerReshardTableItem; + static dictType clusterManagerDictType = { dictSdsHash, /* hash function */ NULL, /* key dup */ @@ -1754,7 +1790,7 @@ static int clusterManagerGetAntiAffinityScore(clusterManagerNodeArray *ipnodes, int ip_count, clusterManagerNode ***offending, int *offending_len); static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); -static sds clusterManagerNodeInfo(clusterManagerNode *node); +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); @@ -1776,6 +1812,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1789,9 +1826,11 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", - "cluster-replicas"}, - {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"reshard", clusterManagerCommandReshard, -1, "host:port", + "from ,to ,slots ,yes,timeout ,pipeline "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1829,6 +1868,38 @@ static clusterManagerCommandProc *validateClusterManagerCommand(void) { return proc; } +/* Get host ip and port from command arguments. If only one argument has + * been provided it must be in the form of 'ip:port', elsewhere + * the first argument must be the ip and the second one the port. + * If host and port can be detected, it returns 1 and it stores host and + * port into variables referenced by'ip_ptr' and 'port_ptr' pointers, + * elsewhere it returns 0. */ +static int getClusterHostFromCmdArgs(int argc, char **argv, + char **ip_ptr, int *port_ptr) { + int port = 0; + char *ip = NULL; + if (argc == 1) { + char *addr = argv[0]; + char *c = strrchr(addr, '@'); + if (c != NULL) *c = '\0'; + c = strrchr(addr, ':'); + if (c != NULL) { + *c = '\0'; + ip = addr; + port = atoi(++c); + } else return 0; + } else { + ip = argv[0]; + port = atoi(argv[1]); + } + if (!ip || !port) return 0; + else { + *ip_ptr = ip; + *port_ptr = port; + } + return 1; +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2188,8 +2259,12 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } -static sds clusterManagerNodeInfo(clusterManagerNode *node) { +static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); + sds spaces = sdsempty(); + int i; + for (i = 0; i < indent; i++) spaces = sdscat(spaces, " "); + if (indent) info = sdscat(info, spaces); int is_master = !(node->flags & CLUSTER_MANAGER_FLAG_SLAVE); char *role = (is_master ? "M" : "S"); sds slots = NULL; @@ -2198,17 +2273,18 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node) { else { slots = clusterManagerNodeSlotsString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" - " slots:%S (%u slots) " + "%s slots:%S (%u slots) " "", //TODO: flags string - role, node->name, node->ip, node->port, + role, node->name, node->ip, node->port, spaces, slots, node->slots_count); sdsfree(slots); } if (node->replicate != NULL) - info = sdscatfmt(info, "\n replicates %S", node->replicate); + info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); else if (node->replicas_count) - info = sdscatfmt(info, "\n %U additional replica(s)", - node->replicas_count); + info = sdscatfmt(info, "\n%s %U additional replica(s)", + spaces, node->replicas_count); + sdsfree(spaces); return info; } @@ -2218,7 +2294,7 @@ static void clusterManagerShowNodes(void) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; - sds info = clusterManagerNodeInfo(node); + sds info = clusterManagerNodeInfo(node, 0); printf("%s\n", info); sdsfree(info); } @@ -2306,7 +2382,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { - success = 1; + success = 0; goto cleanup; } reply = (redisReply*) _reply; @@ -2326,6 +2402,193 @@ cleanup: return success; } +/* Set slot status to "importing" or "migrating" */ +static int clusterManagerSetSlot(clusterManagerNode *node1, + clusterManagerNode *node2, + int slot, const char *mode, char **err) { + redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " + "SETSLOT %d %s %s", + slot, mode, + (char *) node2->name); + if (err != NULL) *err = NULL; + if (!reply) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + } + return 0; + } + return 1; +} + +static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int timeout, + int pipeline, int verbose, + char **err) +{ + int success = 1; + while (1) { + redisReply *reply = NULL, *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " + "GETKEYSINSLOT %d %d", slot, + pipeline); + success = (reply != NULL); + if (!success) return 0; + if (reply->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((reply->len + 1) * sizeof(char)); + strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + assert(reply->type == REDIS_REPLY_ARRAY); + size_t count = reply->elements; + if (count == 0) { + freeReplyObject(reply); + break; + } + char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + /* Calling MIGRATE command. */ + size_t argc = count + 8; + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + argv[6] = "REPLACE"; + argv_len[6] = 7; + argv[7] = "KEYS"; + argv_len[7] = 4; + for (size_t i = 0; i < count; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + 8; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (verbose) dots[i] = '.'; + } + if (verbose) dots[count] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); + if (!success) goto next; + migrate_reply = (redisReply *) _reply; + if (migrate_reply->type == REDIS_REPLY_ERROR) { + // TODO: Implement fix. + success = 0; + if (err != NULL) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; + } + if (verbose) { + printf("%s", dots); + fflush(stdout); + } +next: + if (reply != NULL) freeReplyObject(reply); + if (migrate_reply != NULL) freeReplyObject(migrate_reply); + zfree(argv); + zfree(argv_len); + if (!success) break; + } + return success; +} + +/* Move slots between source and target nodes using MIGRATE. + * + * Options: + * CLUSTER_MANAGER_OPT_VERBOSE -- Print a dot for every moved key. + * CLUSTER_MANAGER_OPT_COLD -- Move keys without opening slots / + * reconfiguring the nodes. + * CLUSTER_MANAGER_OPT_UPDATE -- Update node->slots for source/target nodes. + * CLUSTER_MANAGER_OPT_QUIET -- Don't print info messages. +*/ +static int clusterManagerMoveSlot(clusterManagerNode *source, + clusterManagerNode *target, + int slot, int opts, char**err) +{ + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) { + printf("Moving slot %d from %s:%d to %s:%d: ", slot, source->ip, + source->port, target->ip, target->port); + fflush(stdout); + } + if (err != NULL) *err = NULL; + int pipeline = config.cluster_manager_command.pipeline, + timeout = config.cluster_manager_command.timeout, + print_dots = (opts & CLUSTER_MANAGER_OPT_VERBOSE), + option_cold = (opts & CLUSTER_MANAGER_OPT_COLD), + success = 1; + if (!option_cold) { + success = clusterManagerSetSlot(target, source, slot, + "importing", err); + if (!success) return 0; + success = clusterManagerSetSlot(source, target, slot, + "migrating", err); + if (!success) return 0; + } + success = clusterManagerMigrateKeysInSlot(source, target, slot, timeout, + pipeline, print_dots, err); + if (!(opts & CLUSTER_MANAGER_OPT_QUIET)) printf("\n"); + if (!success) return 0; + /* Set the new node as the owner of the slot in all the known nodes. */ + if (!option_cold) { + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER " + "SETSLOT %d %s %s", + slot, "node", + target->name); + success = (r != NULL); + if (!success) return 0; + if (r->type == REDIS_REPLY_ERROR) { + success = 0; + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + } + } + freeReplyObject(r); + if (!success) return 0; + } + } + /* Update the node logical config */ + if (opts & CLUSTER_MANAGER_OPT_UPDATE) { + source->slots[slot] = 0; + target->slots[slot] = 1; + } + return 1; +} + /* Flush the dirty node configuration by calling replicate for slaves or * adding the slots for masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { @@ -2425,20 +2688,24 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, clusterManagerNodeResetSlots(node); if (i == 8) { int remaining = strlen(line); - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; remaining -= (p - line); char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds dst = sdsnew(p); node->migrating_count += 2; @@ -2451,6 +2718,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, } else if ((p = strstr(slotsdef, "-<-"))) {//Importing *p = '\0'; p += 3; + char *closing_bracket = strchr(p, ']'); + if (closing_bracket) *closing_bracket = '\0'; sds slot = sdsnew(slotsdef); sds src = sdsnew(p); node->importing_count += 2; @@ -2605,8 +2874,9 @@ invalid_friend: if (n->replicate != NULL) { clusterManagerNode *master = clusterManagerNodeByName(n->replicate); if (master == NULL) { - printf("*** WARNING: %s:%d claims to be slave of unknown " - "node ID %s.\n", n->ip, n->port, n->replicate); + clusterManagerLogWarn("*** WARNING: %s:%d claims to be " + "slave of unknown node ID %s.\n", + n->ip, n->port, n->replicate); } else master->replicas_count++; } } @@ -2619,6 +2889,12 @@ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { return strcmp(*i1, *i2); } +int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node2->slots_count - node1->slots_count; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -2651,16 +2927,18 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { if (remaining == 0) continue; char **slots = NULL; int c = 0; - //TODO: just while(remaining) && assign p inside the block - while ((p = strchr(line, ' ')) != NULL || remaining) { + while (remaining > 0) { + p = strchr(line, ' '); if (p == NULL) p = line + remaining; int size = (p - line); remaining -= size; tot_size += size; char *slotsdef = line; *p = '\0'; - if (remaining) line = p + 1; - else line = p; + if (remaining) { + line = p + 1; + remaining--; + } else line = p; if (slotsdef[0] != '[') { c++; slots = zrealloc(slots, (c * sizeof(char *))); @@ -2792,7 +3070,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->migrating_count; i += 2) { sds slot = n->migrating[i]; - dictAdd(open_slots, slot, n->migrating[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->migrating[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2810,7 +3088,7 @@ static void clusterManagerCheckCluster(int quiet) { n->port); for (i = 0; i < n->importing_count; i += 2) { sds slot = n->importing[i]; - dictAdd(open_slots, slot, n->importing[i + 1]); + dictAdd(open_slots, slot, sdsdup(n->importing[i + 1])); char *fmt = (i > 0 ? ",%S" : "%S"); errstr = sdscatfmt(errstr, fmt, slot); } @@ -2848,6 +3126,76 @@ static void clusterManagerCheckCluster(int quiet) { } } +static clusterManagerNode *clusterNodeForResharding(char *id, + clusterManagerNode *target, + int *raise_err) +{ + clusterManagerNode *node = NULL; + const char *invalid_node_msg = "*** The specified node is not known or " + "not a master, please retry.\n"; + node = clusterManagerNodeByName(id); + *raise_err = 0; + if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { + clusterManagerLogErr(invalid_node_msg); + *raise_err = 1; + return NULL; + } else if (node != NULL && target != NULL) { + if (!strcmp(node->name, target->name)) { + clusterManagerLogErr( "*** It is not possible to use " + "the target node as " + "source node.\n"); + return NULL; + } + } + return node; +} + +static list *clusterManagerComputeReshardTable(list *sources, int numslots) { + list *moved = listCreate(); + int src_count = listLength(sources), i = 0, tot_slots = 0, j; + clusterManagerNode **sorted = zmalloc(src_count * sizeof(**sorted)); + listIter li; + listNode *ln; + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *node = ln->value; + tot_slots += node->slots_count; + sorted[i++] = node; + } + qsort(sorted, src_count, sizeof(clusterManagerNode *), + clusterManagerSlotCountCompareDesc); + for (i = 0; i < src_count; i++) { + clusterManagerNode *node = sorted[i]; + float n = ((float) numslots / tot_slots * node->slots_count); + if (i == 0) n = ceil(n); + else n = floor(n); + int max = (int) n, count = 0; + for (j = 0; j < CLUSTER_MANAGER_SLOTS; j++) { + int slot = node->slots[j]; + if (!slot) continue; + if (count >= max || (int)listLength(moved) >= numslots) break; + clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + item->source = node; + item->slot = j; + listAddNodeTail(moved, item); + count++; + } + } + zfree(sorted); + return moved; +} + +static void clusterManagerShowReshardTable(list *table) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + clusterManagerNode *n = item->source; + printf(" Moving slot %d from %s\n", item->slot, (char *) n->name); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3219,59 +3567,218 @@ cleanup: static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } static int clusterManagerCommandCheck(int argc, char **argv) { int port = 0; char *ip = NULL; - if (argc == 1) { - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else goto invalid_args; - } else { - ip = argv[0]; - port = atoi(argv[1]); - } - if (!ip || !port) goto invalid_args; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); clusterManagerCheckCluster(0); return 1; invalid_args: - fprintf(stderr, "Invalid arguments: you need to pass either a valid " - "address (ie. 120.0.0.1:7000) or space separated IP " - "and port (ie. 120.0.0.1 7000)\n"); + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandReshard(int argc, char **argv) { + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + clusterManagerCheckCluster(0); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + fflush(stdout); + fprintf(stderr, + "*** Please fix your cluster problems before resharding\n"); + return 0; + } + int slots = config.cluster_manager_command.slots; + if (!slots) { + while (slots <= 0 || slots > CLUSTER_MANAGER_SLOTS) { + printf("How many slots do you want to move (from 1 to %d)? ", + CLUSTER_MANAGER_SLOTS); + fflush(stdout); + char buf[6]; + int nread = read(fileno(stdin),buf,6); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + slots = atoi(buf); + } + } + char buf[255]; + char *to = config.cluster_manager_command.to, + *from = config.cluster_manager_command.from; + while (to == NULL) { + printf("What is the receiving node ID? "); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (strlen(buf) > 0) to = buf; + } + int raise_err = 0; + clusterManagerNode *target = clusterNodeForResharding(to, NULL, &raise_err); + if (target == NULL) return 0; + list *sources = listCreate(); + list *table = NULL; + int all = 0, result = 1; + if (from == NULL) { + printf("Please enter all the source node IDs.\n"); + printf(" Type 'all' to use all the nodes as source nodes for " + "the hash slots.\n"); + printf(" Type 'done' once you entered all the source nodes IDs.\n"); + while (1) { + printf("Source node #%lu: ", listLength(sources) + 1); + fflush(stdout); + int nread = read(fileno(stdin),buf,255); + if (!nread) continue; //TODO: nread < 0 + int last_idx = nread - 1; + if (buf[last_idx] != '\n') { + int ch; + while ((ch = getchar()) != '\n' && ch != EOF) {} + } + buf[last_idx] = '\0'; + if (!strcmp(buf, "done")) break; + else if (!strcmp(buf, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(buf, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + } else { + char *p; + while((p = strchr(from, ',')) != NULL) { + *p = '\0'; + if (!strcmp(from, "all")) { + all = 1; + break; + } else { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + from = p + 1; + } + /* Check if there's still another source to process. */ + if (!all && strlen(from) > 0) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } + } + } + listIter li; + listNode *ln; + if (all) { + listEmpty(sources); + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!sdscmp(n->name, target->name)) continue; + listAddNodeTail(sources, n); + } + } + if (listLength(sources) == 0) { + fprintf(stderr, "*** No source nodes given, operation aborted.\n"); + result = 0; + goto cleanup; + } + printf("\nReady to move %d slots.\n", slots); + printf(" Source nodes:\n"); + listRewind(sources, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *src = ln->value; + sds info = clusterManagerNodeInfo(src, 4); + printf("%s\n", info); + sdsfree(info); + } + printf(" Destination node:\n"); + sds info = clusterManagerNodeInfo(target, 4); + printf("%s\n", info); + sdsfree(info); + table = clusterManagerComputeReshardTable(sources, slots); + printf(" Resharding plan:\n"); + clusterManagerShowReshardTable(table); + if (!(config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_YES)) + { + printf("Do you want to proceed with the proposed " + "reshard plan (yes/no)? "); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + if (nread <= 0 || strcmp("yes", buf) != 0) { + result = 0; + goto cleanup; + } + } + int opts = CLUSTER_MANAGER_OPT_VERBOSE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + char *err = NULL; + result = clusterManagerMoveSlot(item->source, target, item->slot, + opts, &err); + if (!result) { + if (err != NULL) { + clusterManagerLogErr("\n%s\n", err); + zfree(err); + } + goto cleanup; + } + } +cleanup: + listRelease(sources); + if (table) { + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } @@ -3332,13 +3839,32 @@ static int clusterManagerCommandHelp(int argc, char **argv) { sizeof(clusterManagerCommandDef); int i = 0, j; fprintf(stderr, "Cluster Manager Commands:\n"); + int padding = 15; for (; i < commands_count; i++) { clusterManagerCommandDef *def = &(clusterManagerCommands[i]); - int namelen = strlen(def->name), padlen = 15 - namelen; + int namelen = strlen(def->name), padlen = padding - namelen; fprintf(stderr, " %s", def->name); for (j = 0; j < padlen; j++) fprintf(stderr, " "); fprintf(stderr, "%s\n", (def->args ? def->args : "")); - //TODO: if (def->options) + if (def->options != NULL) { + int optslen = strlen(def->options); + char *p = def->options, *eos = p + optslen; + char *comma = NULL; + while ((comma = strchr(p, ',')) != NULL) { + int deflen = (int)(comma - p); + char buf[255]; + memcpy(buf, p, deflen); + buf[deflen] = '\0'; + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", buf); + p = comma + 1; + if (p >= eos) break; + } + if (p < eos) { + for (j = 0; j < padding; j++) fprintf(stderr, " "); + fprintf(stderr, " --cluster-%s\n", p); + } + } } return 0; } @@ -4641,6 +5167,11 @@ int main(int argc, char **argv) { config.cluster_manager_command.argv = NULL; config.cluster_manager_command.flags = 0; config.cluster_manager_command.replicas = 0; + config.cluster_manager_command.from = NULL; + config.cluster_manager_command.to = NULL; + config.cluster_manager_command.slots = 0; + config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; + config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 220375b4240b46dbcc9eb2599a090cd0e6ffaecd Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 28 Feb 2018 11:49:10 +0100 Subject: [PATCH 46/66] Fixed memory write error in clusterManagerGetConfigSignature --- src/redis-cli.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index fcf48a47..baaa615c 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2295,7 +2295,7 @@ static void clusterManagerShowNodes(void) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; sds info = clusterManagerNodeInfo(node, 0); - printf("%s\n", info); + printf("%s\n", (char *) info); sdsfree(info); } } @@ -2916,8 +2916,8 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { line = p + 1; if (i == 0) { nodename = token; - tot_size = p - token; - name_len = tot_size; + tot_size = (p - token); + name_len = tot_size++; // Make room for ':' in tot_size } else if (i == 8) break; i++; } @@ -2951,6 +2951,7 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { node_count++; node_configs = zrealloc(node_configs, (node_count * sizeof(char *))); + /* Make room for '|' separators. */ tot_size += (sizeof(char) * (c - 1)); char *cfg = zmalloc((sizeof(char) * tot_size) + 1); memcpy(cfg, nodename, name_len); @@ -3760,7 +3761,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { opts, &err); if (!result) { if (err != NULL) { - clusterManagerLogErr("\n%s\n", err); + //clusterManagerLogErr("\n%s\n", err); zfree(err); } goto cleanup; From 5b3d73b3d71fa8805af7677825cd5445f6a217ea Mon Sep 17 00:00:00 2001 From: Artix Date: Wed, 28 Feb 2018 15:21:08 +0100 Subject: [PATCH 47/66] Cluster Manager: fixed some memory error --- src/redis-cli.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index baaa615c..317b1125 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2412,14 +2412,19 @@ static int clusterManagerSetSlot(clusterManagerNode *node1, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; + int success = 1; if (reply->type == REDIS_REPLY_ERROR) { + success = 0; if (err != NULL) { *err = zmalloc((reply->len + 1) * sizeof(char)); strcpy(*err, reply->str); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(node1, err); } - return 0; + goto cleanup; } - return 1; +cleanup: + freeReplyObject(reply); + return success; } static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, @@ -3175,7 +3180,7 @@ static list *clusterManagerComputeReshardTable(list *sources, int numslots) { int slot = node->slots[j]; if (!slot) continue; if (count >= max || (int)listLength(moved) >= numslots) break; - clusterManagerReshardTableItem *item = zmalloc(sizeof(item)); + clusterManagerReshardTableItem *item = zmalloc(sizeof(*item)); item->source = node; item->slot = j; listAddNodeTail(moved, item); From a4a1c7bb52dda29a4caaed07ba4222aa7f078f29 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 2 Mar 2018 17:06:50 +0100 Subject: [PATCH 48/66] ClusterManager: fixed --cluster-from 'all' parsing --- src/redis-cli.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 317b1125..8fa2d725 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3137,12 +3137,12 @@ static clusterManagerNode *clusterNodeForResharding(char *id, int *raise_err) { clusterManagerNode *node = NULL; - const char *invalid_node_msg = "*** The specified node is not known or " - "not a master, please retry.\n"; + const char *invalid_node_msg = "*** The specified node (%s) is not known " + "or not a master, please retry.\n"; node = clusterManagerNodeByName(id); *raise_err = 0; if (!node || node->flags & CLUSTER_MANAGER_FLAG_SLAVE) { - clusterManagerLogErr(invalid_node_msg); + clusterManagerLogErr(invalid_node_msg, id); *raise_err = 1; return NULL; } else if (node != NULL && target != NULL) { @@ -3700,12 +3700,15 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } /* Check if there's still another source to process. */ if (!all && strlen(from) > 0) { - clusterManagerNode *src = - clusterNodeForResharding(from, target, &raise_err); - if (src != NULL) listAddNodeTail(sources, src); - else if (raise_err) { - result = 0; - goto cleanup; + if (!strcmp(from, "all")) all = 1; + if (!all) { + clusterManagerNode *src = + clusterNodeForResharding(from, target, &raise_err); + if (src != NULL) listAddNodeTail(sources, src); + else if (raise_err) { + result = 0; + goto cleanup; + } } } } From adebee8adc493f7f768aa23aaeb4bed2f51ae820 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 6 Mar 2018 13:06:04 +0200 Subject: [PATCH 49/66] clusterManagerAddSlots: changed the way ADDSLOTS command is built --- src/redis-cli.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 8fa2d725..4f87f906 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2354,32 +2354,28 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) redisReply *reply = NULL; void *_reply = NULL; int is_err = 0, success = 1; - int argc; - sds *argv = NULL; - size_t *argvlen = NULL; + /* First two args are used for the command itself. */ + int argc = node->slots_count + 2; + sds *argv = zmalloc(argc * sizeof(*argv)); + size_t *argvlen = zmalloc(argc * sizeof(*argvlen)); + argv[0] = "CLUSTER"; + argv[1] = "ADDSLOTS"; + argvlen[0] = 7; + argvlen[1] = 8; *err = NULL; - sds cmd = sdsnew("CLUSTER ADDSLOTS "); - int i, added = 0; + int i, argv_idx = 2; for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { - int last_slot = (i == (CLUSTER_MANAGER_SLOTS - 1)); + if (argv_idx >= argc) break; if (node->slots[i]) { - char *fmt = (!last_slot ? "%u " : "%u"); - cmd = sdscatfmt(cmd, fmt, i); - added++; + argv[argv_idx] = sdsfromlonglong((long long) i); + argvlen[argv_idx] = sdslen(argv[argv_idx]); + argv_idx++; } } - if (!added) { + if (!argv_idx) { success = 0; goto cleanup; } - argv = cliSplitArgs(cmd, &argc); - if (argc == 0 || argv == NULL) { - success = 0; - goto cleanup; - } - argvlen = zmalloc(argc*sizeof(size_t)); - for (i = 0; i < argc; i++) - argvlen[i] = sdslen(argv[i]); redisAppendCommandArgv(node->context,argc,(const char**)argv,argvlen); if (redisGetReply(node->context, &_reply) != REDIS_OK) { success = 0; @@ -2395,9 +2391,11 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } cleanup: - sdsfree(cmd); zfree(argvlen); - if (argv != NULL) sdsfreesplitres(argv,argc); + if (argv != NULL) { + for (i = 2; i < argc; i++) sdsfree(argv[i]); + zfree(argv); + } if (reply != NULL) freeReplyObject(reply); return success; } From 6d1a7cec230e5a892be3eeffeec83231a4079b9f Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 23 Mar 2018 16:46:43 +0100 Subject: [PATCH 50/66] Cluster Manager: rebalance command --- src/redis-cli.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 286 insertions(+), 11 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 4f87f906..49ba4125 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -71,6 +71,7 @@ #define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 +#define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ "Invalid arguments: you need to pass either a valid " \ @@ -108,10 +109,13 @@ #define CLUSTER_MANAGER_FLAG_DISCONNECT 1 << 4 #define CLUSTER_MANAGER_FLAG_FAIL 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 -#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 -#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_FIX 1 << 0 +#define CLUSTER_MANAGER_CMD_FLAG_SLAVE 1 << 1 +#define CLUSTER_MANAGER_CMD_FLAG_YES 1 << 2 +#define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 +#define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 +#define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -157,9 +161,12 @@ typedef struct clusterManagerCommand { int replicas; char *from; char *to; + char **weight; + int weight_argc; int slots; int timeout; int pipeline; + float threshold; } clusterManagerCommand; static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -206,6 +213,7 @@ static struct config { int eval_ldb_end; /* Lua debugging session ended. */ int enable_ldb_on_eval; /* Handle manual SCRIPT DEBUG + EVAL commands. */ int last_cmd_type; + int verbose; clusterManagerCommand cluster_manager_command; } config; @@ -1266,6 +1274,8 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"-d") && !lastarg) { sdsfree(config.mb_delim); config.mb_delim = sdsnew(argv[++i]); + } else if (!strcmp(argv[i],"--verbose")) { + config.verbose = 1; } else if (!strcmp(argv[i],"--cluster") && !lastarg) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; @@ -1282,15 +1292,35 @@ static int parseOptions(int argc, char **argv) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { config.cluster_manager_command.to = argv[++i]; + } else if (!strcmp(argv[i],"--cluster-weight") && !lastarg) { + int widx = i + 1; + char **weight = argv + widx; + int wargc = 0; + for (; widx < argc; widx++) { + if (strstr(argv[widx], "--") == argv[widx]) break; + wargc++; + } + if (wargc > 0) { + config.cluster_manager_command.weight = weight; + config.cluster_manager_command.weight_argc = wargc; + } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-timeout") && !lastarg) { config.cluster_manager_command.timeout = atoi(argv[++i]); } else if (!strcmp(argv[i],"--cluster-pipeline") && !lastarg) { config.cluster_manager_command.pipeline = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-threshold") && !lastarg) { + config.cluster_manager_command.threshold = atof(argv[++i]); } else if (!strcmp(argv[i],"--cluster-yes")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_YES; + } else if (!strcmp(argv[i],"--cluster-simulate")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; } else if (!strcmp(argv[i],"-v") || !strcmp(argv[i], "--version")) { sds version = cliVersion(); printf("redis-cli %s\n", version); @@ -1390,6 +1420,7 @@ static void usage(void) { " are not rolled back from the server memory.\n" " --cluster [args...] [opts...]\n" " Cluster Manager command and arguments (see below).\n" +" --verbose Verbose mode.\n" " --help Output this help and exit.\n" " --version Output version and exit.\n" "\n" @@ -1749,6 +1780,8 @@ typedef struct clusterManagerNode { sds *importing; int migrating_count; int importing_count; + float weight; /* Weight used by rebalance */ + int balance; /* Used by rebalance */ } clusterManagerNode; /* Data structure used to represent a sequence of nodes. */ @@ -1780,6 +1813,7 @@ typedef int clusterManagerCommandProc(int argc, char **argv); static clusterManagerNode *clusterManagerNewNode(char *ip, int port); static clusterManagerNode *clusterManagerNodeByName(const char *name); +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, @@ -1813,6 +1847,7 @@ static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); +static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1831,6 +1866,9 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, + {"rebalance", clusterManagerCommandRebalance, -1, "host:port", + "weight ,use-empty-masters," + "timeout ,simulate,pipeline ,threshold "}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -1970,10 +2008,13 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->migrating_count = 0; node->importing_count = 0; node->replicas_count = 0; + node->weight = 1.0f; + node->balance = 0; clusterManagerNodeResetSlots(node); return node; } +/* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -1994,6 +2035,32 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } +/* Like get_node_by_name but the specified name can be just the first + * part of the node ID as long as the prefix in unique across the + * cluster. + */ +static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char*name) +{ + if (cluster_manager.nodes == NULL) return NULL; + clusterManagerNode *found = NULL; + sds lcname = sdsempty(); + lcname = sdscpy(lcname, name); + sdstolower(lcname); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->name && + strstr(n->name, lcname) == n->name) { + found = n; + break; + } + } + sdsfree(lcname); + return found; +} + static void clusterManagerNodeResetSlots(clusterManagerNode *node) { memset(node->slots, 0, sizeof(node->slots)); node->slots_count = 0; @@ -2898,6 +2965,12 @@ int clusterManagerSlotCountCompareDesc(const void *n1, const void *n2) { return node2->slots_count - node1->slots_count; } +int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { + clusterManagerNode *node1 = *((clusterManagerNode **) n1); + clusterManagerNode *node2 = *((clusterManagerNode **) n2); + return node1->balance - node2->balance; +} + static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; @@ -3200,6 +3273,19 @@ static void clusterManagerShowReshardTable(list *table) { } } +static void clusterManagerReleaseReshardTable(list *table) { + if (table != NULL) { + listIter li; + listNode *ln; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + zfree(item); + } + listRelease(table); + } +} + static void clusterManagerLog(int level, const char* fmt, ...) { int use_colors = (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COLOR); @@ -3775,14 +3861,199 @@ static int clusterManagerCommandReshard(int argc, char **argv) { } cleanup: listRelease(sources); - if (table) { - listRewind(table, &li); - while ((ln = listNext(&li)) != NULL) { - clusterManagerReshardTableItem *item = ln->value; - zfree(item); - } - listRelease(table); + clusterManagerReleaseReshardTable(table); + return result; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + +static int clusterManagerCommandRebalance(int argc, char **argv) { + int port = 0; + char *ip = NULL; + clusterManagerNode **weightedNodes = NULL; + list *involved = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int result = 1, i; + if (config.cluster_manager_command.weight != NULL) { + for (i = 0; i < config.cluster_manager_command.weight_argc; i++) { + char *name = config.cluster_manager_command.weight[i]; + char *p = strchr(name, '='); + if (p == NULL) { + result = 0; + goto cleanup; + } + *p = '\0'; + float w = atof(++p); + clusterManagerNode *n = clusterManagerNodeByAbbreviatedName(name); + if (n == NULL) { + clusterManagerLogErr("*** No such master node %s\n", name); + result = 0; + goto cleanup; + } + n->weight = w; + } } + float total_weight = 0; + int nodes_involved = 0; + int use_empty = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; + + involved = listCreate(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + /* Compute the total cluster weight. */ + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + if (!use_empty && n->slots_count == 0) { + n->weight = 0; + continue; + } + total_weight += n->weight; + nodes_involved++; + listAddNodeTail(involved, n); + } + weightedNodes = zmalloc(nodes_involved * + sizeof(clusterManagerNode *)); + if (weightedNodes == NULL) goto cleanup; + /* Check cluster, only proceed if it looks sane. */ + clusterManagerCheckCluster(1); + if (cluster_manager.errors && listLength(cluster_manager.errors) > 0) { + clusterManagerLogErr("*** Please fix your cluster problems " + "before rebalancing" ); + result = 0; + goto cleanup; + } + /* Calculate the slots balance for each node. It's the number of + * slots the node should lose (if positive) or gain (if negative) + * in order to be balanced. */ + int threshold_reached = 0, total_balance = 0; + float threshold = config.cluster_manager_command.threshold; + i = 0; + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + weightedNodes[i++] = n; + int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + (int) n->weight); + n->balance = n->slots_count - expected; + total_balance += n->balance; + /* Compute the percentage of difference between the + * expected number of slots and the real one, to see + * if it's over the threshold specified by the user. */ + int over_threshold = 0; + if (config.cluster_manager_command.threshold > 0) { + if (n->slots_count > 0) { + float err_perc = fabs((100-(100.0*expected/n->slots_count))); + if (err_perc > threshold) over_threshold = 1; + } else if (expected > 1) { + over_threshold = 1; + } + } + if (over_threshold) threshold_reached = 1; + } + if (!threshold_reached) { + clusterManagerLogErr("*** No rebalancing needed! " + "All nodes are within the %.2f%% threshold.\n", + config.cluster_manager_command.threshold); + result = 0; + goto cleanup; + } + /* Because of rounding, it is possible that the balance of all nodes + * summed does not give 0. Make sure that nodes that have to provide + * slots are always matched by nodes receiving slots. */ + while (total_balance > 0) { + listRewind(involved, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->balance < 0 && total_balance > 0) { + n->balance--; + total_balance--; + } + } + } + /* Sort nodes by their slots balance. */ + qsort(weightedNodes, nodes_involved, sizeof(clusterManagerNode *), + clusterManagerCompareNodeBalance); + clusterManagerLogInfo(">>> Rebalancing across %d nodes. " + "Total weight = %.2f\n", + nodes_involved, total_weight); + if (config.verbose) { + for (i = 0; i < nodes_involved; i++) { + clusterManagerNode *n = weightedNodes[i]; + printf("%s:%d balance is %d slots\n", n->ip, n->port, n->balance); + } + } + /* Now we have at the start of the 'sn' array nodes that should get + * slots, at the end nodes that must give slots. + * We take two indexes, one at the start, and one at the end, + * incrementing or decrementing the indexes accordingly til we + * find nodes that need to get/provide slots. */ + int dst_idx = 0; + int src_idx = nodes_involved - 1; + int simulate = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + while (dst_idx < src_idx) { + clusterManagerNode *dst = weightedNodes[dst_idx]; + clusterManagerNode *src = weightedNodes[src_idx]; + int db = abs(dst->balance); + int sb = abs(src->balance); + int numslots = (db < sb ? db : sb); + if (numslots > 0) { + printf("Moving %d slots from %s:%d to %s:%d\n", numslots, + src->ip, + src->port, + dst->ip, + dst->port); + /* Actaully move the slots. */ + list *lsrc = listCreate(), *table = NULL; + listAddNodeTail(lsrc, src); + table = clusterManagerComputeReshardTable(lsrc, numslots); + listRelease(lsrc); + int table_len = (int) listLength(table); + if (!table || table_len != numslots) { + clusterManagerLogErr("*** Assertio failed: Reshard table " + "!= number of slots"); + result = 0; + goto end_move; + } + if (simulate) { + for (i = 0; i < table_len; i++) printf("#"); + } else { + int opts = CLUSTER_MANAGER_OPT_QUIET | + CLUSTER_MANAGER_OPT_UPDATE; + listRewind(table, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerReshardTableItem *item = ln->value; + result = clusterManagerMoveSlot(item->source, + dst, + item->slot, + opts, NULL); + if (!result) goto end_move; + printf("#"); + fflush(stdout); + } + + } + printf("\n"); +end_move: + clusterManagerReleaseReshardTable(table); + if (!result) goto cleanup; + } + /* Update nodes balance. */ + dst->balance += numslots; + src->balance -= numslots; + if (dst->balance == 0) dst_idx++; + if (src->balance == 0) src_idx --; + } +cleanup: + if (involved != NULL) listRelease(involved); + if (weightedNodes != NULL) zfree(weightedNodes); return result; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5169,6 +5440,7 @@ int main(int argc, char **argv) { config.eval_ldb_sync = 0; config.enable_ldb_on_eval = 0; config.last_cmd_type = -1; + config.verbose = 0; config.cluster_manager_command.name = NULL; config.cluster_manager_command.argc = 0; config.cluster_manager_command.argv = NULL; @@ -5176,9 +5448,12 @@ int main(int argc, char **argv) { config.cluster_manager_command.replicas = 0; config.cluster_manager_command.from = NULL; config.cluster_manager_command.to = NULL; + config.cluster_manager_command.weight = NULL; config.cluster_manager_command.slots = 0; config.cluster_manager_command.timeout = CLUSTER_MANAGER_MIGRATE_TIMEOUT; config.cluster_manager_command.pipeline = CLUSTER_MANAGER_MIGRATE_PIPELINE; + config.cluster_manager_command.threshold = + CLUSTER_MANAGER_REBALANCE_THRESHOLD; pref.hints = 1; spectrum_palette = spectrum_palette_color; From 3f8a4adb49c29fbc2778aded97089fc6fa770afc Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 6 Apr 2018 18:02:40 +0200 Subject: [PATCH 51/66] Cluster Manager: fix command. --- src/redis-cli.c | 715 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 642 insertions(+), 73 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 49ba4125..8d5732c2 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -151,6 +151,7 @@ static uint64_t dictSdsHash(const void *key); static int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2); static void dictSdsDestructor(void *privdata, void *val); +static void dictListDestructor(void *privdata, void *val); /* Cluster Manager Command Info */ typedef struct clusterManagerCommand { @@ -406,6 +407,12 @@ static void dictSdsDestructor(void *privdata, void *val) sdsfree(val); } +void dictListDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + listRelease((list*)val); +} + /* _serverAssert is needed by dict */ void _serverAssert(const char *estr, const char *file, int line) { fprintf(stderr, "=== ASSERTION FAILED ==="); @@ -1446,6 +1453,15 @@ static void usage(void) { exit(1); } +static int confirmWithYes(char *msg) { + printf("%s (type 'yes' to accept): ", msg); + fflush(stdout); + char buf[4]; + int nread = read(fileno(stdin),buf,4); + buf[3] = '\0'; + return (nread != 0 && !strcmp("yes", buf)); +} + /* Turn the plain C strings into Sds strings */ static char **convertToSds(int count, char** args) { int j; @@ -1751,7 +1767,7 @@ static int evalMode(int argc, char **argv) { } /*------------------------------------------------------------------------------ - * Cluster Manager mode + * Cluster Manager *--------------------------------------------------------------------------- */ /* The Cluster Manager global structure */ @@ -1760,6 +1776,9 @@ static struct clusterManager { list *errors; } cluster_manager; +/* Used by clusterManagerFixSlotsCoverage */ +dict *clusterManagerUncoveredSlots = NULL; + typedef struct clusterManagerNode { redisContext *context; sds name; @@ -1776,10 +1795,12 @@ typedef struct clusterManagerNode { int slots_count; int replicas_count; list *friends; - sds *migrating; - sds *importing; - int migrating_count; - int importing_count; + sds *migrating; /* An array of sds where even strings are slots and odd + * strings are the destination node IDs. */ + sds *importing; /* An array of sds where even strings are slots and odd + * strings are the source node IDs. */ + int migrating_count; /* Length of the migrating array (migrating slots*2) */ + int importing_count; /* Length of the importing array (importing slots*2) */ float weight; /* Weight used by rebalance */ int balance; /* Used by rebalance */ } clusterManagerNode; @@ -1829,7 +1850,7 @@ static void clusterManagerShowNodes(void); static void clusterManagerShowInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); -static void clusterManagerCheckCluster(int quiet); +static int clusterManagerCheckCluster(int quiet); static void clusterManagerLog(int level, const char* fmt, ...); static int clusterManagerIsConfigConsistent(void); static void clusterManagerOnError(sds err); @@ -1846,6 +1867,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); +static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); @@ -1863,6 +1885,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, @@ -1988,6 +2011,8 @@ static void freeClusterManager(void) { listRelease(cluster_manager.errors); cluster_manager.errors = NULL; } + if (clusterManagerUncoveredSlots != NULL) + dictRelease(clusterManagerUncoveredSlots); } static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { @@ -2013,6 +2038,38 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } +/* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the + * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * of reply error (it's up to the caller function to free it), elsewhere + * the error is directly printed. */ +static int clusterManagerCheckRedisReply(clusterManagerNode *n, + redisReply *r, char **err) +{ + int is_err = 0; + if (!r || (is_err = (r->type == REDIS_REPLY_ERROR))) { + if (is_err) { + if (err != NULL) { + *err = zmalloc((r->len + 1) * sizeof(char)); + strcpy(*err, r->str); + } else CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, r->str); + } + return 0; + } + return 1; +} + +static void clusterManagerRemoveNodeFromList(list *nodelist, + clusterManagerNode *node) { + listIter li; + listNode *ln; + listRewind(nodelist, &li); + while ((ln = listNext(&li)) != NULL) { + if (node == ln->value) { + listDelNode(nodelist, ln); + break; + } + } +} /* Return the node with the specified ID or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { @@ -2470,10 +2527,10 @@ cleanup: /* Set slot status to "importing" or "migrating" */ static int clusterManagerSetSlot(clusterManagerNode *node1, clusterManagerNode *node2, - int slot, const char *mode, char **err) { + int slot, const char *status, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node1, "CLUSTER " "SETSLOT %d %s %s", - slot, mode, + slot, status, (char *) node2->name); if (err != NULL) *err = NULL; if (!reply) return 0; @@ -2492,6 +2549,70 @@ cleanup: return success; } +/* Migrate keys taken from reply->elements. It returns the reply from the + * MIGRATE command, or NULL if something goes wrong. If the argument 'dots' + * is not NULL, a dot will be printed for every migrated key. */ +static redisReply *clusterManagerMigrateKeysInReply(clusterManagerNode *source, + clusterManagerNode *target, + redisReply *reply, + int replace, int timeout, + char *dots) +{ + redisReply *migrate_reply = NULL; + char **argv = NULL; + size_t *argv_len = NULL; + int c = (replace ? 8 : 7); + size_t argc = c + reply->elements; + size_t i, offset = 6; // Keys Offset + argv = zcalloc(argc * sizeof(char *)); + argv_len = zcalloc(argc * sizeof(size_t)); + char portstr[255]; + char timeoutstr[255]; + snprintf(portstr, 10, "%d", target->port); + snprintf(timeoutstr, 10, "%d", timeout); + argv[0] = "MIGRATE"; + argv_len[0] = 7; + argv[1] = target->ip; + argv_len[1] = strlen(target->ip); + argv[2] = portstr; + argv_len[2] = strlen(portstr); + argv[3] = ""; + argv_len[3] = 0; + argv[4] = "0"; + argv_len[4] = 1; + argv[5] = timeoutstr; + argv_len[5] = strlen(timeoutstr); + if (replace) { + argv[offset] = "REPLACE"; + argv_len[offset] = 7; + offset++; + } + argv[offset] = "KEYS"; + argv_len[offset] = 4; + offset++; + for (i = 0; i < reply->elements; i++) { + redisReply *entry = reply->element[i]; + size_t idx = i + offset; + assert(entry->type == REDIS_REPLY_STRING); + argv[idx] = (char *) sdsnew(entry->str); + argv_len[idx] = entry->len; + if (dots) dots[i] = '.'; + } + if (dots) dots[reply->elements] = '\0'; + void *_reply = NULL; + redisAppendCommandArgv(source->context,argc, + (const char**)argv,argv_len); + int success = (redisGetReply(source->context, &_reply) == REDIS_OK); + for (i = 0; i < reply->elements; i++) sdsfree(argv[i + offset]); + if (!success) goto cleanup; + migrate_reply = (redisReply *) _reply; +cleanup: + zfree(argv); + zfree(argv_len); + return migrate_reply; +} + +/* Migrate all keys in the given slot from source to target.*/ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, clusterManagerNode *target, int slot, int timeout, @@ -2499,10 +2620,11 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, char **err) { int success = 1; + int do_fix = (config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX); while (1) { + char *dots = NULL; redisReply *reply = NULL, *migrate_reply = NULL; - char **argv = NULL; - size_t *argv_len = NULL; reply = CLUSTER_MANAGER_COMMAND(source, "CLUSTER " "GETKEYSINSLOT %d %d", slot, pipeline); @@ -2523,57 +2645,37 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, freeReplyObject(reply); break; } - char *dots = (verbose ? zmalloc((count+1) * sizeof(char)) : NULL); + if (verbose) dots = zmalloc((count+1) * sizeof(char)); /* Calling MIGRATE command. */ - size_t argc = count + 8; - argv = zcalloc(argc * sizeof(char *)); - argv_len = zcalloc(argc * sizeof(size_t)); - char portstr[255]; - char timeoutstr[255]; - snprintf(portstr, 10, "%d", target->port); - snprintf(timeoutstr, 10, "%d", timeout); - argv[0] = "MIGRATE"; - argv_len[0] = 7; - argv[1] = target->ip; - argv_len[1] = strlen(target->ip); - argv[2] = portstr; - argv_len[2] = strlen(portstr); - argv[3] = ""; - argv_len[3] = 0; - argv[4] = "0"; - argv_len[4] = 1; - argv[5] = timeoutstr; - argv_len[5] = strlen(timeoutstr); - argv[6] = "REPLACE"; - argv_len[6] = 7; - argv[7] = "KEYS"; - argv_len[7] = 4; - for (size_t i = 0; i < count; i++) { - redisReply *entry = reply->element[i]; - size_t idx = i + 8; - assert(entry->type == REDIS_REPLY_STRING); - argv[idx] = (char *) sdsnew(entry->str); - argv_len[idx] = entry->len; - if (verbose) dots[i] = '.'; - } - if (verbose) dots[count] = '\0'; - void *_reply = NULL; - redisAppendCommandArgv(source->context,argc, - (const char**)argv,argv_len); - success = (redisGetReply(source->context, &_reply) == REDIS_OK); - for (size_t i = 0; i < count; i++) sdsfree(argv[i + 8]); - if (!success) goto next; - migrate_reply = (redisReply *) _reply; + migrate_reply = clusterManagerMigrateKeysInReply(source, target, + reply, 0, timeout, + dots); + if (migrate_reply == NULL) goto next; if (migrate_reply->type == REDIS_REPLY_ERROR) { - // TODO: Implement fix. - success = 0; - if (err != NULL) { - *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); - strcpy(*err, migrate_reply->str); - printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + if (do_fix && strstr(migrate_reply->str, "BUSYKEY")) { + clusterManagerLogWarn("*** Target key exists. " + "Replacing it for FIX.\n"); + freeReplyObject(migrate_reply); + /* Try to migrate keys adding REPLACE option. */ + migrate_reply = clusterManagerMigrateKeysInReply(source, + target, + reply, + 1, timeout, + NULL); + success = (migrate_reply != NULL && + migrate_reply->type != REDIS_REPLY_ERROR); + } else success = 0; + if (!success) { + if (migrate_reply != NULL) { + if (err) { + *err = zmalloc((migrate_reply->len + 1) * sizeof(char)); + strcpy(*err, migrate_reply->str); + } + printf("\n"); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + } + goto next; } - goto next; } if (verbose) { printf("%s", dots); @@ -2582,8 +2684,7 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, next: if (reply != NULL) freeReplyObject(reply); if (migrate_reply != NULL) freeReplyObject(migrate_reply); - zfree(argv); - zfree(argv_len); + if (dots) zfree(dots); if (!success) break; } return success; @@ -2729,6 +2830,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char *name = NULL, *addr = NULL, *flags = NULL, *master_id = NULL, *ping_sent = NULL, *ping_recv = NULL, *config_epoch = NULL, *link_status = NULL; + UNUSED(link_status); int i = 0; while ((p = strchr(line, ' ')) != NULL) { *p = '\0'; @@ -2974,11 +3076,11 @@ int clusterManagerCompareNodeBalance(const void *n1, const void *n2) { static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { sds signature = NULL; int node_count = 0, i = 0, name_len = 0; + char **node_configs = NULL; redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); if (reply == NULL || reply->type == REDIS_REPLY_ERROR) goto cleanup; char *lines = reply->str, *p, *line; - char **node_configs = NULL; while ((p = strstr(lines, "\n")) != NULL) { i = 0; *p = '\0'; @@ -3057,8 +3159,10 @@ static sds clusterManagerGetConfigSignature(clusterManagerNode *node) { } cleanup: if (reply != NULL) freeReplyObject(reply); - for (i = 0; i < node_count; i++) zfree(node_configs[i]); - zfree(node_configs); + if (node_configs != NULL) { + for (i = 0; i < node_count; i++) zfree(node_configs[i]); + zfree(node_configs); + } return signature; } @@ -3114,9 +3218,453 @@ static int clusterManagerGetCoveredSlots(char *all_slots) { return totslots; } -static void clusterManagerCheckCluster(int quiet) { +static void clusterManagerPrintSlotsList(list *slots) { + listIter li; + listNode *ln; + listRewind(slots, &li); + sds first = NULL; + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + if (!first) first = slot; + else printf(", "); + printf("%s", slot); + } + printf("\n"); +} + +/* Return the node, among 'nodes' with the greatest number of keys + * in the specified slot. */ +static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, + int slot, + char **err) +{ + clusterManagerNode *node = NULL; + int numkeys = 0; + listIter li; + listNode *ln; + listRewind(nodes, &li); + if (err) *err = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *r = + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + int success = clusterManagerCheckRedisReply(n, r, err); + if (success) { + if (r->integer > numkeys || node == NULL) { + numkeys = r->integer; + node = n; + } + } + if (r != NULL) freeReplyObject(r); + /* If the reply contains errors */ + if (!success) { + if (err != NULL && *err != NULL) + CLUSTER_MANAGER_PRINT_REPLY_ERROR(n, err); + node = NULL; + break; + } + } + return node; +} + +static int clusterManagerFixSlotsCoverage(char *all_slots) { + int i, fixed = 0; + list *none = NULL, *single = NULL, *multi = NULL; + clusterManagerLogInfo(">>> Fixing slots coverage...\n"); + printf("List of not covered slots: \n"); + int uncovered_count = 0; + sds log = sdsempty(); + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + int covered = all_slots[i]; + if (!covered) { + sds key = sdsfromlonglong((long long) i); + if (uncovered_count++ > 0) printf(","); + printf("%s", (char *) key); + list *slot_nodes = listCreate(); + sds slot_nodes_str = sdsempty(); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) + continue; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", i, 1); + if (!clusterManagerCheckRedisReply(n, reply, NULL)) { + fixed = -1; + if (reply) freeReplyObject(reply); + goto cleanup; + } + assert(reply->type == REDIS_REPLY_ARRAY); + if (reply->elements > 0) { + listAddNodeTail(slot_nodes, n); + if (listLength(slot_nodes) > 1) + slot_nodes_str = sdscat(slot_nodes_str, ", "); + slot_nodes_str = sdscatfmt(slot_nodes_str, + "%s:%u", n->ip, n->port); + } + freeReplyObject(reply); + } + log = sdscatfmt(log, "\nSlot %S has keys in %u nodes: %S", + key, listLength(slot_nodes), slot_nodes_str); + sdsfree(slot_nodes_str); + dictAdd(clusterManagerUncoveredSlots, key, slot_nodes); + } + } + printf("\n%s\n", log); + /* For every slot, take action depending on the actual condition: + * 1) No node has keys for this slot. + * 2) A single node has keys for this slot. + * 3) Multiple nodes have keys for this slot. */ + none = listCreate(); + single = listCreate(); + multi = listCreate(); + dictIterator *iter = dictGetIterator(clusterManagerUncoveredSlots); + dictEntry *entry; + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + list *nodes = (list *) dictGetVal(entry); + switch (listLength(nodes)){ + case 0: listAddNodeTail(none, slot); break; + case 1: listAddNodeTail(single, slot); break; + default: listAddNodeTail(multi, slot); break; + } + } + dictReleaseIterator(iter); + + /* Handle case "1": keys in no node. */ + if (listLength(none) > 0) { + printf("The following uncovered slots have no keys " + "across the cluster:\n"); + clusterManagerPrintSlotsList(none); + if (confirmWithYes("Fix these slots by covering with a random node?")){ + srand(time(NULL)); + listIter li; + listNode *ln; + listRewind(none, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + long idx = (long) (rand() % listLength(cluster_manager.nodes)); + listNode *node_n = listIndex(cluster_manager.nodes, idx); + assert(node_n != NULL); + clusterManagerNode *n = node_n->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "2": keys only in one node. */ + if (listLength(single) > 0) { + printf("The following uncovered slots have keys in just one node:\n"); + clusterManagerPrintSlotsList(single); + if (confirmWithYes("Fix these slots by covering with those nodes?")){ + listIter li; + listNode *ln; + listRewind(single, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + listNode *fn = listFirst(nodes); + assert(fn != NULL); + clusterManagerNode *n = fn->value; + clusterManagerLogInfo(">>> Covering slot %s with %s:%d\n", + slot, n->ip, n->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + fixed++; + } + } + } + + /* Handle case "3": keys in multiple nodes. */ + if (listLength(multi) > 0) { + printf("The folowing uncovered slots have keys in multiple nodes:\n"); + clusterManagerPrintSlotsList(multi); + if (confirmWithYes("Fix these slots by moving keys " + "into a single node?")) { + listIter li; + listNode *ln; + listRewind(multi, &li); + while ((ln = listNext(&li)) != NULL) { + sds slot = ln->value; + dictEntry *entry = dictFind(clusterManagerUncoveredSlots, slot); + assert(entry != NULL); + list *nodes = (list *) dictGetVal(entry); + int s = atoi(slot); + clusterManagerNode *target = + clusterManagerGetNodeWithMostKeysInSlot(nodes, s, NULL); + if (target == NULL) { + fixed = -1; + goto cleanup; + } + clusterManagerLogInfo(">>> Covering slot %s moving keys " + "to %s:%d\n", slot, + target->ip, target->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER ADDSLOTS %s", slot); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + r = CLUSTER_MANAGER_COMMAND(target, + "CLUSTER SETSLOT %s %s", slot, "STABLE"); + if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + listIter nli; + listNode *nln; + listRewind(nodes, &nli); + while ((nln = listNext(&nli)) != NULL) { + clusterManagerNode *src = nln->value; + if (src == target) continue; + /* Set the source node in 'importing' state + * (even if we will actually migrate keys away) + * in order to avoid receiving redirections + * for MIGRATE. */ + redisReply *r = CLUSTER_MANAGER_COMMAND(src, + "CLUSTER SETSLOT %s %s %s", slot, + "IMPORTING", target->name); + if (!clusterManagerCheckRedisReply(target, r, NULL)) + fixed = -1; + if (r) freeReplyObject(r); + if (fixed < 0) goto cleanup; + int opts = CLUSTER_MANAGER_OPT_VERBOSE | + CLUSTER_MANAGER_OPT_COLD; + if (!clusterManagerMoveSlot(src, target, s, opts, NULL)) { + fixed = -1; + goto cleanup; + } + } + fixed++; + } + } + } +cleanup: + sdsfree(log); + if (none) listRelease(none); + if (single) listRelease(single); + if (multi) listRelease(multi); + return fixed; +} + +/* Slot 'slot' was found to be in importing or migrating state in one or + * more nodes. This function fixes this condition by migrating keys where + * it seems more sensible. */ +static int clusterManagerFixOpenSlot(int slot) { + clusterManagerLogInfo(">>> Fixing open slot %d\n", slot); + /* Try to obtain the current slot owner, according to the current + * nodes configuration. */ + int success = 1; + list *owners = listCreate(); + list *migrating = listCreate(); + list *importing = listCreate(); + sds migrating_str = sdsempty(); + sds importing_str = sdsempty(); + clusterManagerNode *owner = NULL; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots[slot]) { + if (owner == NULL) owner = n; + listAddNodeTail(owners, n); + } + } + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->migrating) { + for (int i = 0; i < n->migrating_count; i += 2) { + sds migrating_slot = n->migrating[i]; + if (atoi(migrating_slot) == slot) { + char *sep = (listLength(migrating) == 0 ? "" : ","); + migrating_str = sdscatfmt(migrating_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(migrating, n); + break; + } + } + } + if (n->importing) { + for (int i = 0; i < n->importing_count; i += 2) { + sds importing_slot = n->importing[i]; + if (atoi(importing_slot) == slot) { + char *sep = (listLength(importing) == 0 ? "" : ","); + importing_str = sdscatfmt(importing_str, "%s%S:%u", + sep, n->ip, n->port); + listAddNodeTail(importing, n); + break; + } + } + } + } + printf("Set as migrating in: %s\n", migrating_str); + printf("Set as importing in: %s\n", importing_str); + /* If there is no slot owner, set as owner the slot with the biggest + * number of keys, among the set of migrating / importing nodes. */ + if (owner == NULL) { + clusterManagerLogInfo(">>> Nobody claims ownership, " + "selecting an owner...\n"); + owner = clusterManagerGetNodeWithMostKeysInSlot(cluster_manager.nodes, + slot, NULL); + // If we still don't have an owner, we can't fix it. + if (owner == NULL) { + clusterManagerLogErr("[ERR] Can't select a slot owner. " + "Impossible to fix.\n"); + success = 0; + goto cleanup; + } + + // Use ADDSLOTS to assign the slot. + printf("*** Configuring %s:%d as the slot owner\n", owner->ip, + owner->port); + redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " + "SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER ADDSLOTS %d", slot); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Make sure this information will propagate. Not strictly needed + * since there is no past owner, so all the other nodes will accept + * whatever epoch this node will claim the slot with. */ + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + /* Remove the owner from the list of migrating/importing + * nodes. */ + clusterManagerRemoveNodeFromList(migrating, owner); + clusterManagerRemoveNodeFromList(importing, owner); + } + /* If there are multiple owners of the slot, we need to fix it + * so that a single node is the owner and all the other nodes + * are in importing state. Later the fix can be handled by one + * of the base cases above. + * + * Note that this case also covers multiple nodes having the slot + * in migrating state, since migrating is a valid state only for + * slot owners. */ + if (listLength(owners) > 1) { + owner = clusterManagerGetNodeWithMostKeysInSlot(owners, slot, NULL); + listRewind(owners, &li); + redisReply *reply = NULL; + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + reply = CLUSTER_MANAGER_COMMAND(n, "CLUSTER DELSLOT %d", slot); + success = clusterManagerCheckRedisReply(n, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + success = clusterManagerSetSlot(n, owner, slot, "importing", NULL); + if (!success) goto cleanup; + clusterManagerRemoveNodeFromList(importing, n); //Avoid duplicates + listAddNodeTail(importing, n); + } + reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER BUMPEPOCH"); + success = clusterManagerCheckRedisReply(owner, reply, NULL); + if (reply) freeReplyObject(reply); + if (!success) goto cleanup; + } + int move_opts = CLUSTER_MANAGER_OPT_VERBOSE; + /* Case 1: The slot is in migrating state in one slot, and in + * importing state in 1 slot. That's trivial to address. */ + if (listLength(migrating) == 1 && listLength(importing) == 1) { + clusterManagerNode *src = listFirst(migrating)->value; + clusterManagerNode *dst = listFirst(importing)->value; + success = clusterManagerMoveSlot(src, dst, slot, move_opts, NULL); + } + /* Case 2: There are multiple nodes that claim the slot as importing, + * they probably got keys about the slot after a restart so opened + * the slot. In this case we just move all the keys to the owner + * according to the configuration. */ + else if (listLength(migrating) == 0 && listLength(importing) > 0) { + clusterManagerLogInfo(">>> Moving all the %d slot keys to its " + "owner %s:%d\n", slot, owner->ip, owner->port); + move_opts |= CLUSTER_MANAGER_OPT_COLD; + listRewind(importing, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == owner) continue; + success = clusterManagerMoveSlot(n, owner, slot, move_opts, NULL); + if (!success) goto cleanup; + clusterManagerLogInfo(">>> Setting %d as STABLE in " + "%s:%d\n", slot, n->ip, n->port); + + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } + } else { + int try_to_close_slot = (listLength(importing) == 0 && + listLength(migrating) == 1); + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, + "CLUSTER GETKEYSINSLOT %d %d", slot, 10); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) { + if (success) try_to_close_slot = (r->elements == 0); + freeReplyObject(r); + } + if (!success) goto cleanup; + } + /* Case 3: There are no slots claiming to be in importing state, but + * there is a migrating node that actually don't have any key. We + * can just close the slot, probably a reshard interrupted in the middle. */ + if (try_to_close_slot) { + clusterManagerNode *n = listFirst(migrating)->value; + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER SETSLOT %d %s", + slot, "STABLE"); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) goto cleanup; + } else { + success = 0; + clusterManagerLogErr("[ERR] Sorry, redis-cli can't fix this slot " + "yet (work in progress). Slot is set as " + "migrating in %s, as importing in %s, " + "owner is %s:%d\n", migrating_str, + importing_str, owner->ip, owner->port); + } + } +cleanup: + listRelease(owners); + listRelease(migrating); + listRelease(importing); + sdsfree(migrating_str); + sdsfree(importing_str); + return success; +} + +static int clusterManagerCheckCluster(int quiet) { listNode *ln = listFirst(cluster_manager.nodes); - if (!ln) return; + if (!ln) return 0; + int result = 1; + int do_fix = config.cluster_manager_command.flags & + CLUSTER_MANAGER_CMD_FLAG_FIX; clusterManagerNode *node = ln->value; clusterManagerLogInfo(">>> Performing Cluster Check (using node %s:%d)\n", node->ip, node->port); @@ -3124,6 +3672,7 @@ static void clusterManagerCheckCluster(int quiet) { if (!clusterManagerIsConfigConsistent()) { sds err = sdsnew("[ERR] Nodes don't agree about configuration!"); clusterManagerOnError(err); + result = 0; } else { clusterManagerLogOk("[OK] All nodes agree about slots " "configuration.\n"); @@ -3174,6 +3723,7 @@ static void clusterManagerCheckCluster(int quiet) { } } if (open_slots != NULL) { + result = 0; dictIterator *iter = dictGetIterator(open_slots); dictEntry *entry; sds errstr = sdsnew("[WARNING] The following slots are open: "); @@ -3185,6 +3735,17 @@ static void clusterManagerCheckCluster(int quiet) { } clusterManagerLogErr("%s.\n", (char *) errstr); sdsfree(errstr); + if (do_fix) { + // Fix open slots. + dictReleaseIterator(iter); + iter = dictGetIterator(open_slots); + while ((entry = dictNext(iter)) != NULL) { + sds slot = (sds) dictGetKey(entry); + result = clusterManagerFixOpenSlot(atoi(slot)); + if (!result) break; + } + } + dictReleaseIterator(iter); dictRelease(open_slots); } clusterManagerLogInfo(">>> Check slots coverage...\n"); @@ -3200,7 +3761,16 @@ static void clusterManagerCheckCluster(int quiet) { "covered by nodes.\n", CLUSTER_MANAGER_SLOTS); clusterManagerOnError(err); + result = 0; + if (do_fix/* && result*/) { + dictType dtype = clusterManagerDictType; + dtype.valDestructor = dictListDestructor; + clusterManagerUncoveredSlots = dictCreate(&dtype, NULL); + int fixed = clusterManagerFixSlotsCoverage(slots); + if (fixed > 0) result = 1; + } } + return result; } static clusterManagerNode *clusterNodeForResharding(char *id, @@ -3546,12 +4116,7 @@ assign_replicas: } clusterManagerOptimizeAntiAffinity(ip_nodes, ip_count); clusterManagerShowNodes(); - printf("Can I set the above configuration? %s", "(type 'yes' to accept): "); - fflush(stdout); - char buf[4]; - int nread = read(fileno(stdin),buf,4); - buf[3] = '\0'; - if (nread != 0 && !strcmp("yes", buf)) { + if (confirmWithYes("Can I set the above configuration?")) { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -3674,13 +4239,17 @@ static int clusterManagerCommandCheck(int argc, char **argv) { clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; clusterManagerShowInfo(); - clusterManagerCheckCluster(0); - return 1; + return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); return 0; } +static int clusterManagerCommandFix(int argc, char **argv) { + config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_FIX; + return clusterManagerCommandCheck(argc, argv); +} + static int clusterManagerCommandReshard(int argc, char **argv) { int port = 0; char *ip = NULL; From 1f548359cba410f8423d8aba101c43bd9280e489 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:25:25 +0200 Subject: [PATCH 52/66] Cluster Manager: import command --- src/Makefile | 2 +- src/redis-cli.c | 216 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 195 insertions(+), 23 deletions(-) diff --git a/src/Makefile b/src/Makefile index 14112aa1..269a7093 100644 --- a/src/Makefile +++ b/src/Makefile @@ -146,7 +146,7 @@ REDIS_SERVER_NAME=redis-server REDIS_SENTINEL_NAME=redis-sentinel REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o REDIS_CLI_NAME=redis-cli -REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o +REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o siphash.o crc16.o REDIS_BENCHMARK_NAME=redis-benchmark REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o zmalloc.o redis-benchmark.o REDIS_CHECK_RDB_NAME=redis-check-rdb diff --git a/src/redis-cli.c b/src/redis-cli.c index 8d5732c2..08a356eb 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -74,7 +74,7 @@ #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 #define CLUSTER_MANAGER_INVALID_HOST_ARG \ - "Invalid arguments: you need to pass either a valid " \ + "[ERR] Invalid arguments: you need to pass either a valid " \ "address (ie. 120.0.0.1:7000) or space separated IP " \ "and port (ie. 120.0.0.1 7000)\n" #define CLUSTER_MANAGER_MODE() (config.cluster_manager_command.name != NULL) @@ -115,7 +115,9 @@ #define CLUSTER_MANAGER_CMD_FLAG_AUTOWEIGHTS 1 << 3 #define CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER 1 << 4 #define CLUSTER_MANAGER_CMD_FLAG_SIMULATE 1 << 5 -#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_REPLACE 1 << 6 +#define CLUSTER_MANAGER_CMD_FLAG_COPY 1 << 7 +#define CLUSTER_MANAGER_CMD_FLAG_COLOR 1 << 8 #define CLUSTER_MANAGER_OPT_GETFRIENDS 1 << 0 #define CLUSTER_MANAGER_OPT_COLD 1 << 1 @@ -237,6 +239,8 @@ static long getLongInfoField(char *info, char *field); * Utility functions *--------------------------------------------------------------------------- */ +uint16_t crc16(const char *buf, int len); + static long long ustime(void) { struct timeval tv; long long ust; @@ -1325,6 +1329,12 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-simulate")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_SIMULATE; + } else if (!strcmp(argv[i],"--cluster-replace")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_REPLACE; + } else if (!strcmp(argv[i],"--cluster-copy")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_COPY; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1870,6 +1880,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1892,6 +1903,8 @@ clusterManagerCommandDef clusterManagerCommands[] = { {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} @@ -2383,6 +2396,37 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { return slots; } +/* ----------------------------------------------------------------------------- + * Key space handling + * -------------------------------------------------------------------------- */ + +/* We have 16384 hash slots. The hash slot of a given key is obtained + * as the least significant 14 bits of the crc16 of the key. + * + * However if the key contains the {...} pattern, only the part between + * { and } is hashed. This may be useful in the future to force certain + * keys to be in the same node (assuming no resharding is in progress). */ +static unsigned int keyHashSlot(char *key, int keylen) { + int s, e; /* start-end indexes of { and } */ + + for (s = 0; s < keylen; s++) + if (key[s] == '{') break; + + /* No '{' ? Hash the whole key. This is the base case. */ + if (s == keylen) return crc16(key,keylen) & 0x3FFF; + + /* '{' found? Check if we have the corresponding '}'. */ + for (e = s+1; e < keylen; e++) + if (key[e] == '}') break; + + /* No '}' or nothing between {} ? Hash the whole key. */ + if (e == keylen || e == s+1) return crc16(key,keylen) & 0x3FFF; + + /* If we are here there is both a { and a } on its right. Hash + * what is in the middle between { and }. */ + return crc16(key+s+1,e-s-1) & 0x3FFF; +} + static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -3533,8 +3577,8 @@ static int clusterManagerFixOpenSlot(int slot) { } // Use ADDSLOTS to assign the slot. - printf("*** Configuring %s:%d as the slot owner\n", owner->ip, - owner->port); + clusterManagerLogWarn("*** Configuring %s:%d as the slot owner\n", + owner->ip, owner->port); redisReply *reply = CLUSTER_MANAGER_COMMAND(owner, "CLUSTER " "SETSLOT %d %s", slot, "STABLE"); @@ -4527,7 +4571,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { if (over_threshold) threshold_reached = 1; } if (!threshold_reached) { - clusterManagerLogErr("*** No rebalancing needed! " + clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); result = 0; @@ -4586,7 +4630,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { listRelease(lsrc); int table_len = (int) listLength(table); if (!table || table_len != numslots) { - clusterManagerLogErr("*** Assertio failed: Reshard table " + clusterManagerLogErr("*** Assertion failed: Reshard table " "!= number of slots"); result = 0; goto end_move; @@ -4629,23 +4673,148 @@ invalid_args: return 0; } -static int clusterManagerCommandCall(int argc, char **argv) { - int port = 0; - char *ip = NULL; - char *addr = argv[0]; - char *c = strrchr(addr, '@'); - int i; - if (c != NULL) *c = '\0'; - c = strrchr(addr, ':'); - if (c != NULL) { - *c = '\0'; - ip = addr; - port = atoi(++c); - } else { - fprintf(stderr, - "Invalid arguments: first agrumnt must be host:port.\n"); - return 0; +static int clusterManagerCommandImport(int argc, char **argv) { + int success = 1; + int port = 0, src_port = 0; + char *ip = NULL, *src_ip = NULL; + char *invalid_args_msg = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) { + invalid_args_msg = CLUSTER_MANAGER_INVALID_HOST_ARG; + goto invalid_args; } + if (config.cluster_manager_command.from == NULL) { + invalid_args_msg = "[ERR] Option '--cluster-from' is required for " + "subcommand 'import'.\n"; + goto invalid_args; + } + char *src_host[] = {config.cluster_manager_command.from}; + if (!getClusterHostFromCmdArgs(1, src_host, &src_ip, &src_port)) { + invalid_args_msg = "[ERR] Invalid --cluster-from host. You need to " + "pass a valid address (ie. 120.0.0.1:7000).\n"; + goto invalid_args; + } + clusterManagerLogInfo(">>> Importing data from %s:%d to cluster %s:%d\n", + src_ip, src_port, ip, port); + + clusterManagerNode *refnode = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + char *reply_err = NULL; + redisReply *src_reply = NULL; + // Connect to the source node. + redisContext *src_ctx = redisConnect(src_ip, src_port); + if (src_ctx->err) { + success = 0; + fprintf(stderr,"Could not connect to Redis at %s:%d: %s.\n", src_ip, + src_port, src_ctx->errstr); + goto cleanup; + } + src_reply = reconnectingRedisCommand(src_ctx, "INFO"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + if (getLongInfoField(src_reply->str, "cluster_enabled")) { + clusterManagerLogErr("[ERR] The source node should not be a " + "cluster node.\n"); + success = 0; + goto cleanup; + } + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "DBSIZE"); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + int size = src_reply->integer, i; + clusterManagerLogWarn("*** Importing %d keys from DB 0\n", size); + + // Build a slot -> node map + clusterManagerNode *slots_map[CLUSTER_MANAGER_SLOTS]; + memset(slots_map, 0, sizeof(slots_map) / sizeof(clusterManagerNode *)); + listIter li; + listNode *ln; + for (i = 0; i < CLUSTER_MANAGER_SLOTS; i++) { + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->slots_count == 0) continue; + if (n->slots[i]) { + slots_map[i] = n; + break; + } + } + } + + char cmdfmt[50] = "MIGRATE %s %d %s %d %d"; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_COPY) + strcat(cmdfmt, " %s"); + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_REPLACE) + strcat(cmdfmt, " %s"); + + /* Use SCAN to iterate over the keys, migrating to the + * right node as needed. */ + int cursor = -999, timeout = config.cluster_manager_command.timeout; + while (cursor != 0) { + if (cursor < 0) cursor = 0; + freeReplyObject(src_reply); + src_reply = reconnectingRedisCommand(src_ctx, "SCAN %d COUNT %d", + cursor, 1000); + if (!src_reply || src_reply->type == REDIS_REPLY_ERROR) { + if (src_reply && src_reply->str) reply_err = src_reply->str; + success = 0; + goto cleanup; + } + assert(src_reply->type == REDIS_REPLY_ARRAY); + assert(src_reply->elements >= 2); + assert(src_reply->element[1]->type == REDIS_REPLY_ARRAY); + if (src_reply->element[0]->type == REDIS_REPLY_STRING) + cursor = atoi(src_reply->element[0]->str); + else if (src_reply->element[0]->type == REDIS_REPLY_INTEGER) + cursor = src_reply->element[0]->integer; + int keycount = src_reply->element[1]->elements; + for (i = 0; i < keycount; i++) { + redisReply *kr = src_reply->element[1]->element[i]; + assert(kr->type == REDIS_REPLY_STRING); + char *key = kr->str; + uint16_t slot = keyHashSlot(key, kr->len); + clusterManagerNode *target = slots_map[slot]; + printf("Migrating %s to %s:%d: ", key, target->ip, target->port); + redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, + target->ip, target->port, + key, 0, timeout, + "COPY", "REPLACE"); + if (!r || r->type == REDIS_REPLY_ERROR) { + if (r && r->str) { + clusterManagerLogErr("Source %s:%d replied with " + "error:\n%s\n", src_ip, src_port, + r->str); + } + success = 0; + } + freeReplyObject(r); + if (!success) goto cleanup; + clusterManagerLogOk("OK\n"); + } + } +cleanup: + if (reply_err) + clusterManagerLogErr("Source %s:%d replied with error:\n%s\n", + src_ip, src_port, reply_err); + if (src_ctx) redisFree(src_ctx); + if (src_reply) freeReplyObject(src_reply); + return success; +invalid_args: + fprintf(stderr, "%s", invalid_args_msg); + return 0; +} + +static int clusterManagerCommandCall(int argc, char **argv) { + int port = 0, i; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; @@ -4677,6 +4846,9 @@ static int clusterManagerCommandCall(int argc, char **argv) { } zfree(argvlen); return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; } static int clusterManagerCommandHelp(int argc, char **argv) { From efa51f161726d7992e6c2a90710faabf9312b2d1 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 10 Apr 2018 16:53:24 +0200 Subject: [PATCH 53/66] Cluster Manager: added clusterManagerCheckCluster to import command --- src/redis-cli.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/redis-cli.c b/src/redis-cli.c index 08a356eb..9d93f29b 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4698,6 +4698,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; char *reply_err = NULL; redisReply *src_reply = NULL; // Connect to the source node. From aeaf6ee1c32686e2623ae0551a86edc202579b9a Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 17:08:53 +0200 Subject: [PATCH 54/66] Cluster Manager: add-node command. --- src/redis-cli.c | 168 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 154 insertions(+), 14 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9d93f29b..da2421c7 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -165,6 +165,7 @@ typedef struct clusterManagerCommand { char *from; char *to; char **weight; + char *master_id; int weight_argc; int slots; int timeout; @@ -1299,6 +1300,8 @@ static int parseOptions(int argc, char **argv) { usage(); } else if (!strcmp(argv[i],"--cluster-replicas") && !lastarg) { config.cluster_manager_command.replicas = atoi(argv[++i]); + } else if (!strcmp(argv[i],"--cluster-master-id") && !lastarg) { + config.cluster_manager_command.master_id = argv[++i]; } else if (!strcmp(argv[i],"--cluster-from") && !lastarg) { config.cluster_manager_command.from = argv[++i]; } else if (!strcmp(argv[i],"--cluster-to") && !lastarg) { @@ -1335,6 +1338,9 @@ static int parseOptions(int argc, char **argv) { } else if (!strcmp(argv[i],"--cluster-copy")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_COPY; + } else if (!strcmp(argv[i],"--cluster-slave")) { + config.cluster_manager_command.flags |= + CLUSTER_MANAGER_CMD_FLAG_SLAVE; } else if (!strcmp(argv[i],"--cluster-use-empty-masters")) { config.cluster_manager_command.flags |= CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; @@ -1847,6 +1853,8 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name); static clusterManagerNode *clusterManagerNodeByAbbreviatedName(const char *n); static void clusterManagerNodeResetSlots(clusterManagerNode *node); static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err); +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err); static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err); static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts); @@ -1875,6 +1883,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, /* Cluster Manager commands. */ static int clusterManagerCommandCreate(int argc, char **argv); +static int clusterManagerCommandAddNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1895,6 +1904,8 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3030,8 +3041,7 @@ static int clusterManagerLoadInfoFromNode(clusterManagerNode *node, int opts) { opts |= CLUSTER_MANAGER_OPT_GETFRIENDS; char *e = NULL; if (!clusterManagerNodeIsCluster(node, &e)) { - char *msg = (e ? e : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n",node->ip,node->port,msg); + clusterManagerPrintNotClusterNodeError(node, e); if (e) zfree(e); freeClusterManagerNode(node); return 0; @@ -3313,6 +3323,27 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, return node; } +/* This function returns the master that has the least number of replicas + * in the cluster. If there are multiple masters with the same smaller + * number of replicas, one at random is returned. */ + +static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { + clusterManagerNode *node = NULL; + int lowest_count = 0; + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (node == NULL || n->replicas_count < lowest_count) { + node = n; + lowest_count = n->replicas_count; + } + } + return node; +} + static int clusterManagerFixSlotsCoverage(char *all_slots) { int i, fixed = 0; list *none = NULL, *single = NULL, *multi = NULL; @@ -3966,6 +3997,26 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, array->nodes[array->count++] = node; } +static void clusterManagerPrintNotEmptyNodeError(clusterManagerNode *node, + char *err) +{ + char *msg; + if (err) msg = err; + else { + msg = "is not empty. Either the node already knows other " + "nodes (check with CLUSTER NODES) or contains some " + "key in database 0."; + } + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + +static void clusterManagerPrintNotClusterNodeError(clusterManagerNode *node, + char *err) +{ + char *msg = (err ? err : "is not configured as a cluster node."); + clusterManagerLogErr("[ERR] Node %s:%d %s\n", node->ip, node->port, msg); +} + /* Execute redis-cli in Cluster Manager mode */ static void clusterManagerMode(clusterManagerCommandProc *proc) { int argc = config.cluster_manager_command.argc; @@ -4008,8 +4059,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } char *err = NULL; if (!clusterManagerNodeIsCluster(node, &err)) { - char *msg = (err ? err : "is not configured as a cluster node."); - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotClusterNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4025,14 +4075,7 @@ static int clusterManagerCommandCreate(int argc, char **argv) { } err = NULL; if (!clusterManagerNodeIsEmpty(node, &err)) { - char *msg; - if (err) msg = err; - else { - msg = "is not empty. Either the node already knows other " - "nodes (check with CLUSTER NODES) or contains some " - "key in database 0."; - } - clusterManagerLogErr("[ERR] Node %s:%d %s\n", ip, port, msg); + clusterManagerPrintNotEmptyNodeError(node, err); if (err) zfree(err); freeClusterManagerNode(node); return 0; @@ -4263,6 +4306,104 @@ cleanup: return success; } +static int clusterManagerCommandAddNode(int argc, char **argv) { + int success = 1; + redisReply *reply = NULL; + char *ref_ip = NULL, *ip = NULL; + int ref_port = 0, port = 0; + if (!getClusterHostFromCmdArgs(argc - 1, argv + 1, &ref_ip, &ref_port)) + goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) + goto invalid_args; + clusterManagerLogInfo(">>> Adding node %s:%d to cluster %s:%d\n", ip, port, + ref_ip, ref_port); + // Check the existing cluster + clusterManagerNode *refnode = clusterManagerNewNode(ref_ip, ref_port); + if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; + if (!clusterManagerCheckCluster(0)) return 0; + + /* If --cluster-master-id was specified, try to resolve it now so that we + * abort before starting with the node configuration. */ + clusterManagerNode *master_node = NULL; + if (config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_SLAVE) { + char *master_id = config.cluster_manager_command.master_id; + if (master_id != NULL) { + master_node = clusterManagerNodeByName(master_id); + if (master_node == NULL) { + clusterManagerLogErr("[ERR] No such master ID %s\n", master_id); + return 0; + } + } else { + master_node = clusterManagerNodeWithLeastReplicas(); + assert(master_node != NULL); + printf("Automatically selected master %s:%d\n", master_node->ip, + master_node->port); + } + } + + // Add the new node + clusterManagerNode *new_node = clusterManagerNewNode(ip, port); + int added = 0; + CLUSTER_MANAGER_NODE_CONNECT(new_node); + if (new_node->context->err) { + clusterManagerLogErr("[ERR] Sorry, can't connect to node %s:%d\n", + ip, port); + success = 0; + goto cleanup; + } + char *err = NULL; + if (!(success = clusterManagerNodeIsCluster(new_node, &err))) { + clusterManagerPrintNotClusterNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + if (!clusterManagerNodeLoadInfo(new_node, 0, &err)) { + if (err) { + CLUSTER_MANAGER_PRINT_REPLY_ERROR(new_node, err); + zfree(err); + } + success = 0; + goto cleanup; + } + if (!(success = clusterManagerNodeIsEmpty(new_node, &err))) { + clusterManagerPrintNotEmptyNodeError(new_node, err); + if (err) zfree(err); + goto cleanup; + } + clusterManagerNode *first = listFirst(cluster_manager.nodes)->value; + listAddNodeTail(cluster_manager.nodes, new_node); + added = 1; + + // Send CLUSTER MEET command to the new node + clusterManagerLogInfo(">>> Send CLUSTER MEET to node %s:%d to make it " + "join the cluster.\n", ip, port); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER MEET %s %d", + first->ip, first->port); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + + /* Additional configuration is needed if the node is added as a slave. */ + if (master_node) { + sleep(1); + clusterManagerWaitForClusterJoin(); + clusterManagerLogInfo(">>> Configure node as replica of %s:%d.\n", + master_node->ip, master_node->port); + freeReplyObject(reply); + reply = CLUSTER_MANAGER_COMMAND(new_node, "CLUSTER REPLICATE %s", + master_node->name); + if (!(success = clusterManagerCheckRedisReply(new_node, reply, NULL))) + goto cleanup; + } + clusterManagerLogOk("[OK] New node added correctly.\n"); +cleanup: + if (!added && new_node) freeClusterManagerNode(new_node); + if (reply) freeReplyObject(reply); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -4531,8 +4672,7 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { nodes_involved++; listAddNodeTail(involved, n); } - weightedNodes = zmalloc(nodes_involved * - sizeof(clusterManagerNode *)); + weightedNodes = zmalloc(nodes_involved * sizeof(clusterManagerNode *)); if (weightedNodes == NULL) goto cleanup; /* Check cluster, only proceed if it looks sane. */ clusterManagerCheckCluster(1); From 52f17f6f8ed828a211e468ef6a745a6c6cb846a9 Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 11 Apr 2018 18:22:44 +0200 Subject: [PATCH 55/66] - Cluster Manager: del-node command. - Cluster Manager: fixed bug in clusterManagerNodeWithLeastReplicas --- src/redis-cli.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index da2421c7..9a1ab0fd 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1884,6 +1884,7 @@ static void clusterManagerNodeArrayAdd(clusterManagerNodeArray *array, static int clusterManagerCommandCreate(int argc, char **argv); static int clusterManagerCommandAddNode(int argc, char **argv); +static int clusterManagerCommandDeleteNode(int argc, char **argv); static int clusterManagerCommandInfo(int argc, char **argv); static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); @@ -1906,6 +1907,7 @@ clusterManagerCommandDef clusterManagerCommands[] = { "replicas "}, {"add-node", clusterManagerCommandAddNode, 2, "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, @@ -3335,7 +3337,7 @@ static clusterManagerNode *clusterManagerNodeWithLeastReplicas() { listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; - if (node->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; + if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE) continue; if (node == NULL || n->replicas_count < lowest_count) { node = n; lowest_count = n->replicas_count; @@ -4404,6 +4406,73 @@ invalid_args: return 0; } +static int clusterManagerCommandDeleteNode(int argc, char **argv) { + UNUSED(argc); + int success = 1; + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + char *node_id = argv[1]; + clusterManagerLogInfo(">>> Removing node %s from cluster %s:%d\n", + node_id, ip, port); + clusterManagerNode *ref_node = clusterManagerNewNode(ip, port); + clusterManagerNode *node = NULL; + + // Load cluster information + if (!clusterManagerLoadInfoFromNode(ref_node, 0)) return 0; + + // Check if the node exists and is not empty + node = clusterManagerNodeByName(node_id); + if (node == NULL) { + clusterManagerLogErr("[ERR] No such node ID %s\n", node_id); + return 0; + } + if (node->slots_count != 0) { + clusterManagerLogErr("[ERR] Node %s:%d is not empty! Reshard data " + "away and try again.\n", node->ip, node->port); + return 0; + } + + // Send CLUSTER FORGET to all the nodes but the node to remove + clusterManagerLogInfo(">>> Sending CLUSTER FORGET messages to the " + "cluster...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + if (n == node) continue; + if (n->replicate && !strcasecmp(n->replicate, node_id)) { + // Reconfigure the slave to replicate with some other node + clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); + //TODO: check whether master could be the same as node + assert(master != NULL); + clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", + n->ip, n->port, master->ip, master->port); + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER REPLICATE %s", + master->name); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + redisReply *r = CLUSTER_MANAGER_COMMAND(n, "CLUSTER FORGET %s", + node_id); + success = clusterManagerCheckRedisReply(n, r, NULL); + if (r) freeReplyObject(r); + if (!success) return 0; + } + + // Finally shutdown the node + clusterManagerLogInfo(">>> SHUTDOWN the node.\n"); + redisReply *r = redisCommand(node->context, "SHUTDOWN"); + success = clusterManagerCheckRedisReply(node, r, NULL); + if (r) freeReplyObject(r); + return success; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandInfo(int argc, char **argv) { int port = 0; char *ip = NULL; @@ -5026,6 +5095,9 @@ static int clusterManagerCommandHelp(int argc, char **argv) { } } } + fprintf(stderr, "\nFor check, fix, reshard, del-node, set-timeout you " + "can specify the host and port of any working node in " + "the cluster.\n\n"); return 0; } From 5f358dae337c78ed14a5080d991e83d3b68588f6 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 13 Apr 2018 16:09:22 +0200 Subject: [PATCH 56/66] Cluster Manager: set-timeout command --- src/redis-cli.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9a1ab0fd..dba8781f 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1890,6 +1890,7 @@ static int clusterManagerCommandCheck(int argc, char **argv); static int clusterManagerCommandFix(int argc, char **argv); static int clusterManagerCommandReshard(int argc, char **argv); static int clusterManagerCommandRebalance(int argc, char **argv); +static int clusterManagerCommandSetTimeout(int argc, char **argv); static int clusterManagerCommandImport(int argc, char **argv); static int clusterManagerCommandCall(int argc, char **argv); static int clusterManagerCommandHelp(int argc, char **argv); @@ -1905,21 +1906,23 @@ typedef struct clusterManagerCommandDef { clusterManagerCommandDef clusterManagerCommands[] = { {"create", clusterManagerCommandCreate, -2, "host1:port1 ... hostN:portN", "replicas "}, - {"add-node", clusterManagerCommandAddNode, 2, - "new_host:new_port existing_host:existing_port", "slave,master-id "}, - {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"check", clusterManagerCommandCheck, -1, "host:port", NULL}, - {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"info", clusterManagerCommandInfo, -1, "host:port", NULL}, + {"fix", clusterManagerCommandFix, -1, "host:port", NULL}, {"reshard", clusterManagerCommandReshard, -1, "host:port", "from ,to ,slots ,yes,timeout ,pipeline "}, {"rebalance", clusterManagerCommandRebalance, -1, "host:port", "weight ,use-empty-masters," "timeout ,simulate,pipeline ,threshold "}, - {"import", clusterManagerCommandImport, 1, "host:port", - "from ,copy,replace"}, + {"add-node", clusterManagerCommandAddNode, 2, + "new_host:new_port existing_host:existing_port", "slave,master-id "}, + {"del-node", clusterManagerCommandDeleteNode, 2, "host:port node_id",NULL}, {"call", clusterManagerCommandCall, -2, "host:port command arg arg .. arg", NULL}, + {"set-timeout", clusterManagerCommandSetTimeout, 2, + "host:port milliseconds", NULL}, + {"import", clusterManagerCommandImport, 1, "host:port", + "from ,copy,replace"}, {"help", clusterManagerCommandHelp, 0, NULL, NULL} }; @@ -4882,6 +4885,61 @@ invalid_args: return 0; } +static int clusterManagerCommandSetTimeout(int argc, char **argv) { + UNUSED(argc); + int port = 0; + char *ip = NULL; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; + int timeout = atoi(argv[1]); + if (timeout < 100) { + fprintf(stderr, "Setting a node timeout of less than 100 " + "milliseconds is a bad idea.\n"); + return 0; + } + // Load cluster information + clusterManagerNode *node = clusterManagerNewNode(ip, port); + if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; + int ok_count = 0, err_count = 0; + + clusterManagerLogInfo(">>> Reconfiguring node timeout in every " + "cluster node...\n"); + listIter li; + listNode *ln; + listRewind(cluster_manager.nodes, &li); + while ((ln = listNext(&li)) != NULL) { + clusterManagerNode *n = ln->value; + char *err = NULL; + redisReply *reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s %s %d", + "SET", + "cluster-node-timeout", + timeout); + if (reply == NULL) goto reply_err; + int ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + reply = CLUSTER_MANAGER_COMMAND(n, "CONFIG %s", "REWRITE"); + if (reply == NULL) goto reply_err; + ok = clusterManagerCheckRedisReply(n, reply, &err); + freeReplyObject(reply); + if (!ok) goto reply_err; + clusterManagerLogWarn("*** New timeout set for %s:%d\n", n->ip, + n->port); + ok_count++; + continue; +reply_err: + if (err == NULL) err = ""; + clusterManagerLogErr("ERR setting node-timeot for %s:%d: %s\n", n->ip, + n->port, err); + err_count++; + } + clusterManagerLogInfo(">>> New node timeout set. %d OK, %d ERR.\n", + ok_count, err_count); + return 1; +invalid_args: + fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); + return 0; +} + static int clusterManagerCommandImport(int argc, char **argv) { int success = 1; int port = 0, src_port = 0; From 5bc2c98789905a75322f83648dd104b8458ac477 Mon Sep 17 00:00:00 2001 From: artix Date: Thu, 19 Apr 2018 18:52:01 +0200 Subject: [PATCH 57/66] Cluster Manager: code improvements and more comments added. --- src/redis-cli.c | 66 +++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index dba8781f..07732367 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -68,7 +68,7 @@ #define REDIS_CLI_RCFILE_ENV "REDISCLI_RCFILE" #define REDIS_CLI_RCFILE_DEFAULT ".redisclirc" -#define CLUSTER_MANAGER_SLOTS 16384 +#define CLUSTER_MANAGER_SLOTS 16384 #define CLUSTER_MANAGER_MIGRATE_TIMEOUT 60000 #define CLUSTER_MANAGER_MIGRATE_PIPELINE 10 #define CLUSTER_MANAGER_REBALANCE_THRESHOLD 2 @@ -172,6 +172,7 @@ typedef struct clusterManagerCommand { int pipeline; float threshold; } clusterManagerCommand; + static void createClusterManagerCommand(char *cmdname, int argc, char **argv); @@ -1788,7 +1789,7 @@ static int evalMode(int argc, char **argv) { /* The Cluster Manager global structure */ static struct clusterManager { - list *nodes; /* List of nodes int he configuration. */ + list *nodes; /* List of nodes in the configuration. */ list *errors; } cluster_manager; @@ -1821,7 +1822,7 @@ typedef struct clusterManagerNode { int balance; /* Used by rebalance */ } clusterManagerNode; -/* Data structure used to represent a sequence of nodes. */ +/* Data structure used to represent a sequence of cluster nodes. */ typedef struct clusterManagerNodeArray { clusterManagerNode **nodes; /* Actual nodes array */ clusterManagerNode **alloc; /* Pointer to the allocated memory */ @@ -1829,7 +1830,7 @@ typedef struct clusterManagerNodeArray { int count; /* Non-NULL nodes count */ } clusterManagerNodeArray; -/* Used for reshard table. */ +/* Used for the reshard table. */ typedef struct clusterManagerReshardTableItem { clusterManagerNode *source; int slot; @@ -1865,7 +1866,7 @@ static void clusterManagerOptimizeAntiAffinity(clusterManagerNodeArray *ipnodes, int ip_count); static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent); static void clusterManagerShowNodes(void); -static void clusterManagerShowInfo(void); +static void clusterManagerShowClusterInfo(void); static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err); static void clusterManagerWaitForClusterJoin(void); static int clusterManagerCheckCluster(int quiet); @@ -2067,8 +2068,9 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { clusterManagerNodeResetSlots(node); return node; } + /* Check whether reply is NULL or its type is REDIS_REPLY_ERROR. In the - * latest case, if 'err' arg is not NULL, it gets allocated with a copy + * latest case, if the 'err' arg is not NULL, it gets allocated with a copy * of reply error (it's up to the caller function to free it), elsewhere * the error is directly printed. */ static int clusterManagerCheckRedisReply(clusterManagerNode *n, @@ -2100,7 +2102,7 @@ static void clusterManagerRemoveNodeFromList(list *nodelist, } } -/* Return the node with the specified ID or NULL. */ +/* Return the node with the specified name (ID) or NULL. */ static clusterManagerNode *clusterManagerNodeByName(const char *name) { if (cluster_manager.nodes == NULL) return NULL; clusterManagerNode *found = NULL; @@ -2121,7 +2123,7 @@ static clusterManagerNode *clusterManagerNodeByName(const char *name) { return found; } -/* Like get_node_by_name but the specified name can be just the first +/* Like clusterManagerNodeByName but the specified name can be just the first * part of the node ID as long as the prefix in unique across the * cluster. */ @@ -2152,6 +2154,7 @@ static void clusterManagerNodeResetSlots(clusterManagerNode *node) { node->slots_count = 0; } +/* Call "INFO" redis command on the specified node and return the reply. */ static redisReply *clusterManagerGetNodeRedisInfo(clusterManagerNode *node, char **err) { @@ -2181,7 +2184,7 @@ static int clusterManagerNodeIsCluster(clusterManagerNode *node, char **err) { * some key or if it already knows other nodes */ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { redisReply *info = clusterManagerGetNodeRedisInfo(node, err); - int is_err = 0, is_empty = 1; + int is_empty = 1; if (info == NULL) return 0; if (strstr(info->str, "db0:") != NULL) { is_empty = 0; @@ -2190,11 +2193,7 @@ static int clusterManagerNodeIsEmpty(clusterManagerNode *node, char **err) { freeReplyObject(info); info = CLUSTER_MANAGER_COMMAND(node, "CLUSTER INFO"); if (err != NULL) *err = NULL; - if (info == NULL || (is_err = (info->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((info->len + 1) * sizeof(char)); - strcpy(*err, info->str); - } + if (!clusterManagerCheckRedisReply(node, info, err)) { is_empty = 0; goto result; } @@ -2422,7 +2421,7 @@ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { * However if the key contains the {...} pattern, only the part between * { and } is hashed. This may be useful in the future to force certain * keys to be in the same node (assuming no resharding is in progress). */ -static unsigned int keyHashSlot(char *key, int keylen) { +static unsigned int clusterManagerKeyHashSlot(char *key, int keylen) { int s, e; /* start-end indexes of { and } */ for (s = 0; s < keylen; s++) @@ -2443,6 +2442,7 @@ static unsigned int keyHashSlot(char *key, int keylen) { return crc16(key+s+1,e-s-1) & 0x3FFF; } +/* Return a string representation of the cluster node. */ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { sds info = sdsempty(); sds spaces = sdsempty(); @@ -2484,7 +2484,7 @@ static void clusterManagerShowNodes(void) { } } -static void clusterManagerShowInfo(void) { +static void clusterManagerShowClusterInfo(void) { int masters = 0; int keys = 0; listIter li; @@ -2533,11 +2533,12 @@ static void clusterManagerShowInfo(void) { printf("%.2f keys per slot on average.\n", keys_per_slot); } +/* Flush dirty slots configuration of the node by calling CLUSTER ADDSLOTS */ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) { redisReply *reply = NULL; void *_reply = NULL; - int is_err = 0, success = 1; + int success = 1; /* First two args are used for the command itself. */ int argc = node->slots_count + 2; sds *argv = zmalloc(argc * sizeof(*argv)); @@ -2566,14 +2567,7 @@ static int clusterManagerAddSlots(clusterManagerNode *node, char**err) goto cleanup; } reply = (redisReply*) _reply; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } - success = 0; - goto cleanup; - } + success = clusterManagerCheckRedisReply(node, reply, err); cleanup: zfree(argvlen); if (argv != NULL) { @@ -2821,7 +2815,7 @@ static int clusterManagerMoveSlot(clusterManagerNode *source, } /* Flush the dirty node configuration by calling replicate for slaves or - * adding the slots for masters. */ + * adding the slots defined in the masters. */ static int clusterManagerFlushNodeConfig(clusterManagerNode *node, char **err) { if (!node->dirty) return 0; redisReply *reply = NULL; @@ -2852,6 +2846,7 @@ cleanup: return success; } +/* Wait until the cluster configuration is consistent. */ static void clusterManagerWaitForClusterJoin(void) { printf("Waiting for the cluster to join\n"); while(!clusterManagerIsConfigConsistent()) { @@ -2871,13 +2866,9 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, char **err) { redisReply *reply = CLUSTER_MANAGER_COMMAND(node, "CLUSTER NODES"); - int is_err = 0, success = 1; + int success = 1; *err = NULL; - if (reply == NULL || (is_err = (reply->type == REDIS_REPLY_ERROR))) { - if (is_err && err != NULL) { - *err = zmalloc((reply->len + 1) * sizeof(char)); - strcpy(*err, reply->str); - } + if (!clusterManagerCheckRedisReply(node, reply, err)) { success = 0; goto cleanup; } @@ -3114,6 +3105,7 @@ invalid_friend: return 1; } +/* Compare functions used by various sorting operations. */ int clusterManagerSlotCompare(const void *slot1, const void *slot2) { const char **i1 = (const char **)slot1; const char **i2 = (const char **)slot2; @@ -3252,6 +3244,7 @@ static int clusterManagerIsConfigConsistent(void) { return consistent; } +/* Add the error string to cluster_manager.errors and print it. */ static void clusterManagerOnError(sds err) { if (cluster_manager.errors == NULL) cluster_manager.errors = listCreate(); @@ -3259,6 +3252,9 @@ static void clusterManagerOnError(sds err) { clusterManagerLogErr("%s\n", (char *) err); } +/* Check the slots coverage of the cluster. The 'all_slots' argument must be + * and array of 16384 bytes. Every covered slot will be set to 1 in the + * 'all_slots' array. The function returns the total number if covered slots.*/ static int clusterManagerGetCoveredSlots(char *all_slots) { if (cluster_manager.nodes == NULL) return 0; listIter li; @@ -4482,7 +4478,7 @@ static int clusterManagerCommandInfo(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return 1; invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -4495,7 +4491,7 @@ static int clusterManagerCommandCheck(int argc, char **argv) { if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; clusterManagerNode *node = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(node, 0)) return 0; - clusterManagerShowInfo(); + clusterManagerShowClusterInfo(); return clusterManagerCheckCluster(0); invalid_args: fprintf(stderr, CLUSTER_MANAGER_INVALID_HOST_ARG); @@ -5047,7 +5043,7 @@ static int clusterManagerCommandImport(int argc, char **argv) { redisReply *kr = src_reply->element[1]->element[i]; assert(kr->type == REDIS_REPLY_STRING); char *key = kr->str; - uint16_t slot = keyHashSlot(key, kr->len); + uint16_t slot = clusterManagerKeyHashSlot(key, kr->len); clusterManagerNode *target = slots_map[slot]; printf("Migrating %s to %s:%d: ", key, target->ip, target->port); redisReply *r = reconnectingRedisCommand(src_ctx, cmdfmt, From 2f31545beb939f23b89acfa2a188c1393cdbbd2a Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 18:08:30 +0200 Subject: [PATCH 58/66] Cluster Manager: fixed bug when parsing CLUSTER NODES reply (clusterManagerNodeLoadInfo) --- src/redis-cli.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 07732367..adb2095e 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2922,6 +2922,7 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, line = p + 1; remaining--; } else line = p; + char *dash = NULL; if (slotsdef[0] == '[') { slotsdef++; if ((p = strstr(slotsdef, "->-"))) { // Migrating @@ -2953,7 +2954,8 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, node->importing[node->importing_count - 1] = src; } - } else if ((p = strchr(slotsdef, '-')) != NULL) { + } else if ((dash = strchr(slotsdef, '-')) != NULL) { + p = dash; int start, stop; *p = '\0'; start = atoi(slotsdef); @@ -5078,7 +5080,7 @@ invalid_args: static int clusterManagerCommandCall(int argc, char **argv) { int port = 0, i; char *ip = NULL; - if (!getClusterHostFromCmdArgs(argc, argv, &ip, &port)) goto invalid_args; + if (!getClusterHostFromCmdArgs(1, argv, &ip, &port)) goto invalid_args; clusterManagerNode *refnode = clusterManagerNewNode(ip, port); if (!clusterManagerLoadInfoFromNode(refnode, 0)) return 0; argc--; From be94e890319d7abaea462922d57222df0ca345f0 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:25:08 +0200 Subject: [PATCH 59/66] Cluster Manager: fixed expected slots calculation (rebalance) Cluster Manager: fixed argument parsing after --cluster-weight --- src/redis-cli.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index adb2095e..bdc4b7b4 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1318,6 +1318,7 @@ static int parseOptions(int argc, char **argv) { if (wargc > 0) { config.cluster_manager_command.weight = weight; config.cluster_manager_command.weight_argc = wargc; + i += wargc; } } else if (!strcmp(argv[i],"--cluster-slots") && !lastarg) { config.cluster_manager_command.slots = atoi(argv[++i]); @@ -4724,7 +4725,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { int nodes_involved = 0; int use_empty = config.cluster_manager_command.flags & CLUSTER_MANAGER_CMD_FLAG_EMPTYMASTER; - involved = listCreate(); listIter li; listNode *ln; @@ -4762,15 +4762,15 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { while ((ln = listNext(&li)) != NULL) { clusterManagerNode *n = ln->value; weightedNodes[i++] = n; - int expected = (((float)CLUSTER_MANAGER_SLOTS / total_weight) * - (int) n->weight); + int expected = (int) (((float)CLUSTER_MANAGER_SLOTS / total_weight) * + n->weight); n->balance = n->slots_count - expected; total_balance += n->balance; /* Compute the percentage of difference between the * expected number of slots and the real one, to see * if it's over the threshold specified by the user. */ int over_threshold = 0; - if (config.cluster_manager_command.threshold > 0) { + if (threshold > 0) { if (n->slots_count > 0) { float err_perc = fabs((100-(100.0*expected/n->slots_count))); if (err_perc > threshold) over_threshold = 1; @@ -4784,7 +4784,6 @@ static int clusterManagerCommandRebalance(int argc, char **argv) { clusterManagerLogWarn("*** No rebalancing needed! " "All nodes are within the %.2f%% threshold.\n", config.cluster_manager_command.threshold); - result = 0; goto cleanup; } /* Because of rounding, it is possible that the balance of all nodes From af4584d608beb12c8f9fb33d209b507c4d8ea944 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 20 Apr 2018 19:29:42 +0200 Subject: [PATCH 60/66] Cluster tests now using redis-cli instead of redis-trib --- tests/cluster/tests/04-resharding.tcl | 10 +++++----- tests/cluster/tests/12-replica-migration-2.tcl | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/cluster/tests/04-resharding.tcl b/tests/cluster/tests/04-resharding.tcl index 0ccbf717..68fba135 100644 --- a/tests/cluster/tests/04-resharding.tcl +++ b/tests/cluster/tests/04-resharding.tcl @@ -73,12 +73,12 @@ test "Cluster consistency during live resharding" { flush stdout set target [dict get [get_myself [randomInt 5]] id] set tribpid [lindex [exec \ - ../../../src/redis-trib.rb reshard \ - --from all \ - --to $target \ - --slots 100 \ - --yes \ + ../../../src/redis-cli --cluster reshard \ 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-from all \ + --cluster-to $target \ + --cluster-slots 100 \ + --cluster-yes \ | [info nameofexecutable] \ ../tests/helpers/onlydots.tcl \ &] 0] diff --git a/tests/cluster/tests/12-replica-migration-2.tcl b/tests/cluster/tests/12-replica-migration-2.tcl index 48ecd1d5..3d8b7b04 100644 --- a/tests/cluster/tests/12-replica-migration-2.tcl +++ b/tests/cluster/tests/12-replica-migration-2.tcl @@ -31,9 +31,9 @@ test "Each master should have at least two replicas attached" { set master0_id [dict get [get_myself 0] id] test "Resharding all the master #0 slots away from it" { set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=0 \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=0 >@ stdout ] } test "Master #0 should lose its replicas" { @@ -49,10 +49,10 @@ test "Resharding back some slot to master #0" { # new resharding. after 10000 set output [exec \ - ../../../src/redis-trib.rb rebalance \ - --weight ${master0_id}=.01 \ - --use-empty-masters \ - 127.0.0.1:[get_instance_attrib redis 0 port] >@ stdout] + ../../../src/redis-cli --cluster rebalance \ + 127.0.0.1:[get_instance_attrib redis 0 port] \ + --cluster-weight ${master0_id}=.01 \ + --cluster-use-empty-masters >@ stdout] } test "Master #0 should re-acquire one or more replicas" { From 35b3a8e1ee145d40a51c586ca937bc86a4230f41 Mon Sep 17 00:00:00 2001 From: artix Date: Mon, 7 May 2018 15:56:12 +0200 Subject: [PATCH 61/66] - Updated create-cluster with redis-cli - Updated README --- utils/create-cluster/README | 2 +- utils/create-cluster/create-cluster | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/create-cluster/README b/utils/create-cluster/README index f2a89839..e682f6dc 100644 --- a/utils/create-cluster/README +++ b/utils/create-cluster/README @@ -15,7 +15,7 @@ To create a cluster, follow these steps: 1. Edit create-cluster and change the start / end port, depending on the number of instances you want to create. 2. Use "./create-cluster start" in order to run the instances. -3. Use "./create-cluster create" in order to execute redis-trib create, so that +3. Use "./create-cluster create" in order to execute redis-cli --cluster create, so that an actual Redis cluster will be created. 4. Now you are ready to play with the cluster. AOF files and logs for each instances are created in the current directory. diff --git a/utils/create-cluster/create-cluster b/utils/create-cluster/create-cluster index d821683f..468f924a 100755 --- a/utils/create-cluster/create-cluster +++ b/utils/create-cluster/create-cluster @@ -34,7 +34,7 @@ then PORT=$((PORT+1)) HOSTS="$HOSTS 127.0.0.1:$PORT" done - ../../src/redis-trib.rb create --replicas $REPLICAS $HOSTS + ../../src/redis-cli --cluster create $HOSTS --cluster-replicas $REPLICAS exit 0 fi @@ -94,7 +94,7 @@ fi echo "Usage: $0 [start|create|stop|watch|tail|clean]" echo "start -- Launch Redis Cluster instances." -echo "create -- Create a cluster using redis-trib create." +echo "create -- Create a cluster using redis-cli --cluster create." echo "stop -- Stop Redis Cluster instances." echo "watch -- Show CLUSTER NODES output (first 30 lines) of first node." echo "tail -- Run tail -f of instance at base port + ID." From 3312de067cfe78a63401059defa1c0c4a8274b10 Mon Sep 17 00:00:00 2001 From: artix Date: Mon, 7 May 2018 17:31:34 +0200 Subject: [PATCH 62/66] Cluster Manager: --cluster options can now be placed everywhere --- src/redis-cli.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index bdc4b7b4..85588fe4 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1293,8 +1293,8 @@ static int parseOptions(int argc, char **argv) { if (CLUSTER_MANAGER_MODE()) usage(); char *cmd = argv[++i]; int j = i; - for (; j < argc; j++) if (argv[j][0] == '-') break; - j--; + while (j < argc && argv[j][0] != '-') j++; + if (j > i) j--; createClusterManagerCommand(cmd, j - i, argv + i + 1); i = j; } else if (!strcmp(argv[i],"--cluster") && lastarg) { @@ -1351,6 +1351,15 @@ static int parseOptions(int argc, char **argv) { printf("redis-cli %s\n", version); sdsfree(version); exit(0); + } else if (CLUSTER_MANAGER_MODE() && argv[i][0] != '-') { + if (config.cluster_manager_command.argc == 0) { + int j = i + 1; + while (j < argc && argv[j][0] != '-') j++; + int cmd_argc = j - i; + config.cluster_manager_command.argc = cmd_argc; + config.cluster_manager_command.argv = argv + i; + if (cmd_argc > 1) i = j - 1; + } } else { if (argv[i][0] == '-') { fprintf(stderr, From 3c039996b533c48b5de8e54e90441b2eda632393 Mon Sep 17 00:00:00 2001 From: artix Date: Fri, 11 May 2018 18:28:10 +0200 Subject: [PATCH 63/66] - Fixed mistyped redis command (clusterManagerGetNodeWithMostKeysInSlot) - Cluster node structure is now updated after ADDSLOTS --- src/redis-cli.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 85588fe4..d591bcd0 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -3316,7 +3316,7 @@ static clusterManagerNode * clusterManagerGetNodeWithMostKeysInSlot(list *nodes, if (n->flags & CLUSTER_MANAGER_FLAG_SLAVE || n->replicate) continue; redisReply *r = - CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOTi %d", slot); + CLUSTER_MANAGER_COMMAND(n, "CLUSTER COUNTKEYSINSLOT %d", slot); int success = clusterManagerCheckRedisReply(n, r, err); if (success) { if (r->integer > numkeys || node == NULL) { @@ -3446,6 +3446,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + n->slots[atoi(slot)] = 1; fixed++; } } @@ -3474,6 +3477,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(n, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + n->slots[atoi(slot)] = 1; fixed++; } } @@ -3513,6 +3519,9 @@ static int clusterManagerFixSlotsCoverage(char *all_slots) { if (!clusterManagerCheckRedisReply(target, r, NULL)) fixed = -1; if (r) freeReplyObject(r); if (fixed < 0) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + target->slots[atoi(slot)] = 1; listIter nli; listNode *nln; listRewind(nodes, &nli); @@ -3633,6 +3642,9 @@ static int clusterManagerFixOpenSlot(int slot) { success = clusterManagerCheckRedisReply(owner, reply, NULL); if (reply) freeReplyObject(reply); if (!success) goto cleanup; + /* Since CLUSTER ADDSLOTS succeded, we also update the slot + * info into the node struct, in order to keep it synced */ + owner->slots[slot] = 1; /* Make sure this information will propagate. Not strictly needed * since there is no past owner, so all the other nodes will accept * whatever epoch this node will claim the slot with. */ From 2f9c032a136cd564d915d4db65a4d95f101a8940 Mon Sep 17 00:00:00 2001 From: artix Date: Tue, 15 May 2018 18:41:46 +0200 Subject: [PATCH 64/66] Cluster Manager: print flags as strings. --- src/redis-cli.c | 90 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 18 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index d591bcd0..c108e673 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -1815,6 +1815,7 @@ typedef struct clusterManagerNode { time_t ping_sent; time_t ping_recv; int flags; + list *flags_str; /* Flags string representations */ sds replicate; /* Master ID if node is a slave */ list replicas; int dirty; /* Node has changes that can be flushed */ @@ -2001,6 +2002,17 @@ static int getClusterHostFromCmdArgs(int argc, char **argv, return 1; } +static void freeClusterManagerNodeFlags(list *flags) { + listIter li; + listNode *ln; + listRewind(flags, &li); + while ((ln = listNext(&li)) != NULL) { + sds flag = ln->value; + sdsfree(flag); + } + listRelease(flags); +} + static void freeClusterManagerNode(clusterManagerNode *node) { if (node->context != NULL) redisFree(node->context); if (node->friends != NULL) { @@ -2027,6 +2039,10 @@ static void freeClusterManagerNode(clusterManagerNode *node) { for (i = 0; i < node->importing_count; i++) sdsfree(node->importing[i]); zfree(node->importing); } + if (node->flags_str != NULL) { + freeClusterManagerNodeFlags(node->flags_str); + node->flags_str = NULL; + } zfree(node); } @@ -2065,6 +2081,7 @@ static clusterManagerNode *clusterManagerNewNode(char *ip, int port) { node->ping_sent = 0; node->ping_recv = 0; node->flags = 0; + node->flags_str = NULL; node->replicate = NULL; node->dirty = 0; node->friends = NULL; @@ -2391,6 +2408,24 @@ cleanup: zfree(offenders); } +/* Return a representable string of the node's flags */ +static sds clusterManagerNodeFlagString(clusterManagerNode *node) { + sds flags = sdsempty(); + if (!node->flags_str) return flags; + int empty = 1; + listIter li; + listNode *ln; + listRewind(node->flags_str, &li); + while ((ln = listNext(&li)) != NULL) { + sds flag = ln->value; + if (strcmp(flag, "myself") == 0) continue; + if (!empty) flags = sdscat(flags, ","); + flags = sdscatfmt(flags, "%S", flag); + empty = 0; + } + return flags; +} + /* Return a representable string of the node's slots */ static sds clusterManagerNodeSlotsString(clusterManagerNode *node) { sds slots = sdsempty(); @@ -2466,12 +2501,14 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { info = sdscatfmt(info, "S: %S %s:%u", node->name, node->ip, node->port); else { slots = clusterManagerNodeSlotsString(node); + sds flags = clusterManagerNodeFlagString(node); info = sdscatfmt(info, "%s: %S %s:%u\n" "%s slots:%S (%u slots) " - "", //TODO: flags string + "%S", role, node->name, node->ip, node->port, spaces, - slots, node->slots_count); + slots, node->slots_count, flags); sdsfree(slots); + sdsfree(flags); } if (node->replicate != NULL) info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); @@ -3008,18 +3045,35 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, if (currentNode->name) sdsfree(currentNode->name); currentNode->name = sdsnew(name); } - if (strstr(flags, "noaddr") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; - if (strstr(flags, "disconnected") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; - if (strstr(flags, "fail") != NULL) - currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; - if (strstr(flags, "slave") != NULL) { - currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; - if (master_id != NULL) { - if (currentNode->replicate) sdsfree(currentNode->replicate); - currentNode->replicate = sdsnew(master_id); + if (currentNode->flags_str != NULL) + freeClusterManagerNodeFlags(currentNode->flags_str); + currentNode->flags_str = listCreate(); + int flag_len; + while ((flag_len = strlen(flags)) > 0) { + sds flag = NULL; + char *fp = strchr(flags, ','); + if (fp) { + *fp = '\0'; + flag = sdsnew(flags); + flags = fp + 1; + } else { + flag = sdsnew(flags); + flags += flag_len; } + if (strcmp(flag, "noaddr") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_NOADDR; + else if (strcmp(flag, "disconnected") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_DISCONNECT; + else if (strcmp(flag, "fail") == 0) + currentNode->flags |= CLUSTER_MANAGER_FLAG_FAIL; + else if (strcmp(flag, "slave") == 0) { + currentNode->flags |= CLUSTER_MANAGER_FLAG_SLAVE; + if (master_id == 0) { + if (currentNode->replicate) sdsfree(currentNode->replicate); + currentNode->replicate = sdsnew(master_id); + } + } + listAddNodeTail(currentNode->flags_str, flag); } if (config_epoch != NULL) currentNode->current_epoch = atoll(config_epoch); @@ -4283,12 +4337,12 @@ assign_replicas: goto cleanup; } } - // Give one second for the join to start, in order to avoid that - // waiting for cluster join will find all the nodes agree about - // the config as they are still empty with unassigned slots. + /* Give one second for the join to start, in order to avoid that + * waiting for cluster join will find all the nodes agree about + * the config as they are still empty with unassigned slots. */ sleep(1); clusterManagerWaitForClusterJoin(); - // Useful for the replicas //TODO: create a function for this? + /* Useful for the replicas */ listRewind(cluster_manager.nodes, &li); while ((ln = listNext(&li)) != NULL) { clusterManagerNode *node = ln->value; @@ -4315,7 +4369,7 @@ assign_replicas: listEmpty(cluster_manager.nodes); if (!clusterManagerLoadInfoFromNode(first_node, 0)) { success = 0; - goto cleanup; //TODO: msg? + goto cleanup; } clusterManagerCheckCluster(0); } From 1e4fb1b33a92e85c189510a46acdf37937a325cd Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 16 May 2018 17:49:18 +0200 Subject: [PATCH 65/66] Cluster Manager: fixed unprinted reply error --- src/redis-cli.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index c108e673..9ea47ab0 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -2773,7 +2773,8 @@ static int clusterManagerMigrateKeysInSlot(clusterManagerNode *source, strcpy(*err, migrate_reply->str); } printf("\n"); - CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, err); + CLUSTER_MANAGER_PRINT_REPLY_ERROR(source, + migrate_reply->str); } goto next; } @@ -3021,7 +3022,6 @@ static int clusterManagerNodeLoadInfo(clusterManagerNode *node, int opts, else break; } else { if (addr == NULL) { - // TODO: find a better err message fprintf(stderr, "Error: invalid CLUSTER NODES reply\n"); success = 0; goto cleanup; @@ -4602,7 +4602,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { fflush(stdout); char buf[6]; int nread = read(fileno(stdin),buf,6); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -4619,7 +4619,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { printf("What is the receiving node ID? "); fflush(stdout); int nread = read(fileno(stdin),buf,255); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -4643,7 +4643,7 @@ static int clusterManagerCommandReshard(int argc, char **argv) { printf("Source node #%lu: ", listLength(sources) + 1); fflush(stdout); int nread = read(fileno(stdin),buf,255); - if (!nread) continue; //TODO: nread < 0 + if (nread <= 0) continue; int last_idx = nread - 1; if (buf[last_idx] != '\n') { int ch; @@ -5176,7 +5176,7 @@ static int clusterManagerCommandCall(int argc, char **argv) { redisAppendCommandArgv(n->context, argc, (const char **) argv, argvlen); int status = redisGetReply(n->context, (void **)(&reply)); if (status != REDIS_OK || reply == NULL ) - printf("%s:%d: Failed!\n", n->ip, n->port); //TODO: better message? + printf("%s:%d: Failed!\n", n->ip, n->port); else { sds formatted_reply = cliFormatReplyTTY(reply, ""); printf("%s:%d: %s\n", n->ip, n->port, (char *) formatted_reply); From e47c751c7414c2747c7c2d934656978ecddbe9bf Mon Sep 17 00:00:00 2001 From: artix Date: Wed, 16 May 2018 18:04:13 +0200 Subject: [PATCH 66/66] Removed TODO in redis-cli --- src/redis-cli.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/redis-cli.c b/src/redis-cli.c index 9ea47ab0..850b1024 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -4522,7 +4522,6 @@ static int clusterManagerCommandDeleteNode(int argc, char **argv) { if (n->replicate && !strcasecmp(n->replicate, node_id)) { // Reconfigure the slave to replicate with some other node clusterManagerNode *master = clusterManagerNodeWithLeastReplicas(); - //TODO: check whether master could be the same as node assert(master != NULL); clusterManagerLogInfo(">>> %s:%d as replica of %s:%d\n", n->ip, n->port, master->ip, master->port);