Cluster: more chatty slaves when failover is stalled.

This commit is contained in:
antirez 2014-10-07 09:51:55 +02:00
parent e4b0c8ec50
commit edb3987a06
2 changed files with 94 additions and 4 deletions

View File

@ -417,6 +417,7 @@ void clusterInit(void) {
server.cluster->failover_auth_count = 0;
server.cluster->failover_auth_rank = 0;
server.cluster->failover_auth_epoch = 0;
server.cluster->cant_failover_reason = REDIS_CLUSTER_CANT_FAILOVER_NONE;
server.cluster->lastVoteEpoch = 0;
server.cluster->stats_bus_messages_sent = 0;
server.cluster->stats_bus_messages_received = 0;
@ -2430,6 +2431,68 @@ int clusterGetSlaveRank(void) {
return rank;
}
/* This function is called by clusterHandleSlaveFailover() in order to
* let the slave log why it is not able to failover. Sometimes there are
* not the conditions, but since the failover function is called again and
* again, we can't log the same things continuously.
*
* This function works by logging only if a given set of conditions are
* true:
*
* 1) The reason for which the failover can't be initiated changed.
* The reasons also include a NONE reason we reset the state to
* when the slave finds that its master is fine (no FAIL flag).
* 2) Also, the log is emitted again if the master is still down and
* the reason for not failing over is still the same, but more than
* REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed.
* 3) Finally, the function only logs if the slave is down for more than
* five seconds + NODE_TIMEOUT. This way nothing is logged when a
* failover starts in a reasonable time.
*
* The function is called with the reason why the slave can't failover
* which is one of the integer macros REDIS_CLUSTER_CANT_FAILOVER_*.
*
* The function is guaranteed to be called only if 'myself' is a slave. */
void clusterLogCantFailover(int reason) {
char *msg;
static time_t lastlog_time = 0;
mstime_t nolog_fail_time = server.cluster_node_timeout + 5000;
/* Don't log if we have the same reason for some time. */
if (reason == server.cluster->cant_failover_reason &&
time(NULL)-lastlog_time < REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD)
return;
server.cluster->cant_failover_reason = reason;
/* We also don't emit any log if the master failed no long ago, the
* goal of this function is to log slaves in a stalled condition for
* a long time. */
if (myself->slaveof &&
nodeFailed(myself->slaveof) &&
(mstime() - myself->slaveof->fail_time) < nolog_fail_time) return;
switch(reason) {
case REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE:
msg = "Disconnected from master for longer than allowed.";
break;
case REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY:
msg = "Waiting the delay before I can start a new failover.";
break;
case REDIS_CLUSTER_CANT_FAILOVER_EXPIRED:
msg = "Failover attempt expired.";
break;
case REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES:
msg = "Waiting for votes, but majority still not reached.";
break;
default:
msg = "Unknown reason code.";
break;
}
lastlog_time = time(NULL);
redisLog(REDIS_WARNING,"Currently unable to failover: %s", msg);
}
/* This function is called if we are a slave node and our master serving
* a non-zero amount of hash slots is in FAIL state.
*
@ -2468,7 +2531,13 @@ void clusterHandleSlaveFailover(void) {
if (nodeIsMaster(myself) ||
myself->slaveof == NULL ||
(!nodeFailed(myself->slaveof) && !manual_failover) ||
myself->slaveof->numslots == 0) return;
myself->slaveof->numslots == 0)
{
/* There are no reasons to failover, so we set the reason why we
* are returning without failing over to NONE. */
server.cluster->cant_failover_reason = REDIS_CLUSTER_CANT_FAILOVER_NONE;
return;
}
/* Set data_age to the number of seconds we are disconnected from
* the master. */
@ -2494,7 +2563,10 @@ void clusterHandleSlaveFailover(void) {
(((mstime_t)server.repl_ping_slave_period * 1000) +
(server.cluster_node_timeout * server.cluster_slave_validity_factor)))
{
if (!manual_failover) return;
if (!manual_failover) {
clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE);
return;
}
}
/* If the previous failover attempt timedout and the retry time has
@ -2550,10 +2622,16 @@ void clusterHandleSlaveFailover(void) {
}
/* Return ASAP if we can't still start the election. */
if (mstime() < server.cluster->failover_auth_time) return;
if (mstime() < server.cluster->failover_auth_time) {
clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY);
return;
}
/* Return ASAP if the election is too old to be valid. */
if (auth_age > auth_timeout) return;
if (auth_age > auth_timeout) {
clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_EXPIRED);
return;
}
/* Ask for votes if needed. */
if (server.cluster->failover_auth_sent == 0) {
@ -2608,6 +2686,8 @@ void clusterHandleSlaveFailover(void) {
/* 6) If there was a manual failover in progress, clear the state. */
resetManualFailover();
} else {
clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES);
}
}

View File

@ -62,6 +62,14 @@ typedef struct clusterLink {
#define nodeTimedOut(n) ((n)->flags & REDIS_NODE_PFAIL)
#define nodeFailed(n) ((n)->flags & REDIS_NODE_FAIL)
/* Reasons why a slave is not able to failover. */
#define REDIS_CLUSTER_CANT_FAILOVER_NONE 0
#define REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE 1
#define REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
#define REDIS_CLUSTER_CANT_FAILOVER_EXPIRED 3
#define REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
#define REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD (60*5) /* seconds. */
/* This structure represent elements of node->fail_reports. */
typedef struct clusterNodeFailReport {
struct clusterNode *node; /* Node reporting the failure condition. */
@ -107,6 +115,8 @@ typedef struct clusterState {
int failover_auth_sent; /* True if we already asked for votes. */
int failover_auth_rank; /* This slave rank for current auth request. */
uint64_t failover_auth_epoch; /* Epoch of the current election. */
int cant_failover_reason; /* Why a slave is currently not able to
failover. See the CANT_FAILOVER_* macros. */
/* Manual failover state in common. */
mstime_t mf_end; /* Manual failover time limit (ms unixtime).
It is zero if there is no MF in progress. */