Cluster: more chatty slaves when failover is stalled.

2025-05-05 07:22:15 +00:00 · 2014-10-07 09:51:55 +02:00 · 2014-10-07 09:51:55 +02:00 · edb3987a06
commit edb3987a06
parent e4b0c8ec50
2 changed files with 94 additions and 4 deletions
--- a/src/cluster.c
+++ b/src/cluster.c
@ -417,6 +417,7 @@ void clusterInit(void) {
    server.cluster->failover_auth_count = 0;
    server.cluster->failover_auth_rank = 0;
    server.cluster->failover_auth_epoch = 0;
+    server.cluster->cant_failover_reason = REDIS_CLUSTER_CANT_FAILOVER_NONE;
    server.cluster->lastVoteEpoch = 0;
    server.cluster->stats_bus_messages_sent = 0;
    server.cluster->stats_bus_messages_received = 0;
@ -2430,6 +2431,68 @@ int clusterGetSlaveRank(void) {
    return rank;
 }

+/* This function is called by clusterHandleSlaveFailover() in order to
+ * let the slave log why it is not able to failover. Sometimes there are
+ * not the conditions, but since the failover function is called again and
+ * again, we can't log the same things continuously.
+ *
+ * This function works by logging only if a given set of conditions are
+ * true:
+ *
+ * 1) The reason for which the failover can't be initiated changed.
+ *    The reasons also include a NONE reason we reset the state to
+ *    when the slave finds that its master is fine (no FAIL flag).
+ * 2) Also, the log is emitted again if the master is still down and
+ *    the reason for not failing over is still the same, but more than
+ *    REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD seconds elapsed.
+ * 3) Finally, the function only logs if the slave is down for more than
+ *    five seconds + NODE_TIMEOUT. This way nothing is logged when a
+ *    failover starts in a reasonable time.
+ *
+ * The function is called with the reason why the slave can't failover
+ * which is one of the integer macros REDIS_CLUSTER_CANT_FAILOVER_*.
+ *
+ * The function is guaranteed to be called only if 'myself' is a slave. */
+void clusterLogCantFailover(int reason) {
+    char *msg;
+    static time_t lastlog_time = 0;
+    mstime_t nolog_fail_time = server.cluster_node_timeout + 5000;
+
+    /* Don't log if we have the same reason for some time. */
+    if (reason == server.cluster->cant_failover_reason &&
+        time(NULL)-lastlog_time < REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD)
+        return;
+
+    server.cluster->cant_failover_reason = reason;
+
+    /* We also don't emit any log if the master failed no long ago, the
+     * goal of this function is to log slaves in a stalled condition for
+     * a long time. */
+    if (myself->slaveof &&
+        nodeFailed(myself->slaveof) &&
+        (mstime() - myself->slaveof->fail_time) < nolog_fail_time) return;
+
+    switch(reason) {
+    case REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE:
+        msg = "Disconnected from master for longer than allowed.";
+        break;
+    case REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY:
+        msg = "Waiting the delay before I can start a new failover.";
+        break;
+    case REDIS_CLUSTER_CANT_FAILOVER_EXPIRED:
+        msg = "Failover attempt expired.";
+        break;
+    case REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES:
+        msg = "Waiting for votes, but majority still not reached.";
+        break;
+    default:
+        msg = "Unknown reason code.";
+        break;
+    }
+    lastlog_time = time(NULL);
+    redisLog(REDIS_WARNING,"Currently unable to failover: %s", msg);
+}
+
 /* This function is called if we are a slave node and our master serving
 * a non-zero amount of hash slots is in FAIL state.
 *
@ -2468,7 +2531,13 @@ void clusterHandleSlaveFailover(void) {
    if (nodeIsMaster(myself) ||
        myself->slaveof == NULL ||
        (!nodeFailed(myself->slaveof) && !manual_failover) ||
-        myself->slaveof->numslots == 0) return;
+        myself->slaveof->numslots == 0)
+    {
+        /* There are no reasons to failover, so we set the reason why we
+         * are returning without failing over to NONE. */
+        server.cluster->cant_failover_reason = REDIS_CLUSTER_CANT_FAILOVER_NONE;
+        return;
+    }

    /* Set data_age to the number of seconds we are disconnected from
     * the master. */
@ -2494,7 +2563,10 @@ void clusterHandleSlaveFailover(void) {
        (((mstime_t)server.repl_ping_slave_period * 1000) +
         (server.cluster_node_timeout * server.cluster_slave_validity_factor)))
    {
-        if (!manual_failover) return;
+        if (!manual_failover) {
+            clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE);
+            return;
+        }
    }

    /* If the previous failover attempt timedout and the retry time has
@ -2550,10 +2622,16 @@ void clusterHandleSlaveFailover(void) {
    }

    /* Return ASAP if we can't still start the election. */
-    if (mstime() < server.cluster->failover_auth_time) return;
+    if (mstime() < server.cluster->failover_auth_time) {
+        clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY);
+        return;
+    }

    /* Return ASAP if the election is too old to be valid. */
-    if (auth_age > auth_timeout) return;
+    if (auth_age > auth_timeout) {
+        clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_EXPIRED);
+        return;
+    }

    /* Ask for votes if needed. */
    if (server.cluster->failover_auth_sent == 0) {
@ -2608,6 +2686,8 @@ void clusterHandleSlaveFailover(void) {

        /* 6) If there was a manual failover in progress, clear the state. */
        resetManualFailover();
+    } else {
+        clusterLogCantFailover(REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES);
    }
 }

--- a/src/cluster.h
+++ b/src/cluster.h
@ -62,6 +62,14 @@ typedef struct clusterLink {
 #define nodeTimedOut(n) ((n)->flags & REDIS_NODE_PFAIL)
 #define nodeFailed(n) ((n)->flags & REDIS_NODE_FAIL)

+/* Reasons why a slave is not able to failover. */
+#define REDIS_CLUSTER_CANT_FAILOVER_NONE 0
+#define REDIS_CLUSTER_CANT_FAILOVER_DATA_AGE 1
+#define REDIS_CLUSTER_CANT_FAILOVER_WAITING_DELAY 2
+#define REDIS_CLUSTER_CANT_FAILOVER_EXPIRED 3
+#define REDIS_CLUSTER_CANT_FAILOVER_WAITING_VOTES 4
+#define REDIS_CLUSTER_CANT_FAILOVER_RELOG_PERIOD (60*5) /* seconds. */
+
 /* This structure represent elements of node->fail_reports. */
 typedef struct clusterNodeFailReport {
    struct clusterNode *node;  /* Node reporting the failure condition. */
@ -107,6 +115,8 @@ typedef struct clusterState {
    int failover_auth_sent;     /* True if we already asked for votes. */
    int failover_auth_rank;     /* This slave rank for current auth request. */
    uint64_t failover_auth_epoch; /* Epoch of the current election. */
+    int cant_failover_reason;   /* Why a slave is currently not able to
+                                   failover. See the CANT_FAILOVER_* macros. */
    /* Manual failover state in common. */
    mstime_t mf_end;            /* Manual failover time limit (ms unixtime).
                                   It is zero if there is no MF in progress. */