From 1a02b7440aa02cb24c845d8bda48b98c4a60eb4c Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 5 Mar 2013 13:15:05 +0100 Subject: [PATCH] Cluster: new node field fail_time. This is the unix time at which we set the FAIL flag for the node. It is only valid if FAIL is set. The idea is to use it in order to make the cluster more robust, for instance in order to revert a FAIL state if it is long-standing but still slots are assigned to this node, that is, no one is going to fix these slots apparently. --- src/cluster.c | 4 ++++ src/redis.h | 1 + 2 files changed, 5 insertions(+) diff --git a/src/cluster.c b/src/cluster.c index 1bc4cc0c..65de12fb 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -107,6 +107,7 @@ int clusterLoadConfig(char *filename) { n->flags |= REDIS_NODE_PFAIL; } else if (!strcasecmp(s,"fail")) { n->flags |= REDIS_NODE_FAIL; + n->fail_time = time(NULL); } else if (!strcasecmp(s,"handshake")) { n->flags |= REDIS_NODE_HANDSHAKE; } else if (!strcasecmp(s,"noaddr")) { @@ -341,6 +342,7 @@ clusterNode *createClusterNode(char *nodename, int flags) { node->slaves = NULL; node->slaveof = NULL; node->ping_sent = node->pong_received = 0; + node->fail_time = 0; node->configdigest = NULL; node->configdigest_ts = 0; node->link = NULL; @@ -594,6 +596,7 @@ void markNodeAsFailingIfNeeded(clusterNode *node) { /* Mark the node as failing. */ node->flags &= ~REDIS_NODE_PFAIL; node->flags |= REDIS_NODE_FAIL; + node->fail_time = time(NULL); /* Broadcast the failing node name to everybody */ clusterSendFail(node->name); @@ -907,6 +910,7 @@ int clusterProcessPacket(clusterLink *link) { "FAIL message received from %.40s about %.40s", hdr->sender, hdr->data.fail.about.nodename); failing->flags |= REDIS_NODE_FAIL; + failing->fail_time = time(NULL); failing->flags &= ~REDIS_NODE_PFAIL; clusterUpdateState(); clusterSaveConfigOrDie(); diff --git a/src/redis.h b/src/redis.h index d93455ba..940b4f26 100644 --- a/src/redis.h +++ b/src/redis.h @@ -559,6 +559,7 @@ struct clusterNode { struct clusterNode *slaveof; /* pointer to the master node */ time_t ping_sent; /* Unix time we sent latest ping */ time_t pong_received; /* Unix time we received the pong */ + time_t fail_time; /* Unix time when FAIL flag was set */ char *configdigest; /* Configuration digest of this node */ time_t configdigest_ts; /* Configuration digest timestamp */ char ip[16]; /* Latest known IP address of this node */