Sentinel failure detection implementation improved.

Failure detection in Sentinel is ping-pong based. It used to work by
remembering the last time a valid PONG reply was received, and checking
if the reception time was too old compared to the current current time.

PINGs were sent at a fixed interval of 1 second.

This works in a decent way, but does not scale well when we want to set
very small values of "down-after-milliseconds" (this is the node
timeout basically).

This commit reiplements the failure detection making a number of
changes. Some changes are inspired to Redis Cluster failure detection
code:

* A new last_ping_time field is added in representation of instances.
  If non zero, we have an active ping that was sent at the specified
  time. When a valid reply to ping is received, the field is zeroed
  again.
* last_ping_time is not reset when we reconnect the link or send a new
  ping, so from our point of view it represents the time we started
  waiting for the instance to reply to our pings without receiving a
  reply.
* last_ping_time is now used in order to check if the instance is
  timed out. This means that we can have a node timeout of 100
  milliseconds and yet the system will work well since the new check is
  not bound to the period used to send pings.
* Pings are now sent every second, or often if the value of
  down-after-milliseconds is less than one second. With a lower limit of
  10 HZ ping frequency.
* Link reconnection code was improved. This is used in order to try to
  reconnect the link when we are at 50% of the node timeout without a
  valid reply received yet. However the old code triggered unnecessary
  reconnections when the node timeout was very small. Now that should be
  ok.

The new code passes the tests but more testing is needed and more unit
tests stressing the failure detector, so currently this is merged only
in the unstable branch.
This commit is contained in:
antirez 2014-03-17 17:20:44 +01:00
parent 3a2ff55617
commit ae0b7680b3

View File

@ -129,6 +129,10 @@ typedef struct sentinelRedisInstance {
mstime_t pc_last_activity; /* Last time we received any message. */
mstime_t last_avail_time; /* Last time the instance replied to ping with
a reply we consider valid. */
mstime_t last_ping_time; /* Last time a pending ping was sent in the
context of the current command connection
with the instance. 0 if still not sent or
if pong already received. */
mstime_t last_pong_time; /* Last time the instance replied to ping,
whatever the reply was. That's used to check
if the link is idle and must be reconnected. */
@ -329,6 +333,7 @@ int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port);
char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char *req_runid, uint64_t *leader_epoch);
void sentinelFlushConfig(void);
void sentinelGenerateInitialMonitorEvents(void);
int sentinelSendPing(sentinelRedisInstance *ri);
/* ========================= Dictionary types =============================== */
@ -925,6 +930,11 @@ sentinelRedisInstance *createSentinelRedisInstance(char *name, int flags, char *
ri->cc_conn_time = 0;
ri->pc_conn_time = 0;
ri->pc_last_activity = 0;
/* We set the last_ping_time to "now" even if we actually don't have yet
* a connection with the node, nor we sent a ping.
* This is useful to detect a timeout in case we'll not be able to connect
* with the node at all. */
ri->last_ping_time = mstime();
ri->last_avail_time = mstime();
ri->last_pong_time = mstime();
ri->last_pub_time = mstime();
@ -1161,6 +1171,7 @@ void sentinelResetMaster(sentinelRedisInstance *ri, int flags) {
sdsfree(ri->slave_master_host);
ri->runid = NULL;
ri->slave_master_host = NULL;
ri->last_ping_time = mstime();
ri->last_avail_time = mstime();
ri->last_pong_time = mstime();
ri->role_reported_time = mstime();
@ -1655,6 +1666,9 @@ void sentinelReconnectInstance(sentinelRedisInstance *ri) {
sentinelDisconnectCallback);
sentinelSendAuthIfNeeded(ri,ri->cc);
sentinelSetClientName(ri,ri->cc,"cmd");
/* Send a PING ASAP when reconnecting. */
sentinelSendPing(ri);
}
}
/* Pub / Sub */
@ -1990,6 +2004,7 @@ void sentinelPingReplyCallback(redisAsyncContext *c, void *reply, void *privdata
strncmp(r->str,"MASTERDOWN",10) == 0)
{
ri->last_avail_time = mstime();
ri->last_ping_time = 0; /* Flag the pong as received. */
} else {
/* Send a SCRIPT KILL command if the instance appears to be
* down because of a busy script. */
@ -2186,11 +2201,31 @@ int sentinelSendHello(sentinelRedisInstance *ri) {
return REDIS_OK;
}
/* Send a PING to the specified instance and refresh the last_ping_time
* if it is zero (that is, if we received a pong for the previous ping).
*
* On error zero is returned, and we can't consider the PING command
* queued in the connection. */
int sentinelSendPing(sentinelRedisInstance *ri) {
int retval = redisAsyncCommand(ri->cc,
sentinelPingReplyCallback, NULL, "PING");
if (retval == REDIS_OK) {
ri->pending_commands++;
/* We update the ping time only if we received the pong for
* the previous ping, otherwise we are technically waiting
* since the first ping that did not received a reply. */
if (ri->last_ping_time == 0) ri->last_ping_time = mstime();
return 1;
} else {
return 0;
}
}
/* Send periodic PING, INFO, and PUBLISH to the Hello channel to
* the specified master or slave instance. */
void sentinelPingInstance(sentinelRedisInstance *ri) {
void sentinelSendPeriodicCommands(sentinelRedisInstance *ri) {
mstime_t now = mstime();
mstime_t info_period;
mstime_t info_period, ping_period;
int retval;
/* Return ASAP if we have already a PING or INFO already pending, or
@ -2216,6 +2251,12 @@ void sentinelPingInstance(sentinelRedisInstance *ri) {
info_period = SENTINEL_INFO_PERIOD;
}
/* We ping instances every time the last received pong is older than
* the configured 'down-after-milliseconds' time, but every second
* anyway if 'down-after-milliseconds' is greater than 1 second. */
ping_period = ri->down_after_period;
if (ping_period > SENTINEL_PING_PERIOD) ping_period = SENTINEL_PING_PERIOD;
if ((ri->flags & SRI_SENTINEL) == 0 &&
(ri->info_refresh == 0 ||
(now - ri->info_refresh) > info_period))
@ -2223,14 +2264,10 @@ void sentinelPingInstance(sentinelRedisInstance *ri) {
/* Send INFO to masters and slaves, not sentinels. */
retval = redisAsyncCommand(ri->cc,
sentinelInfoReplyCallback, NULL, "INFO");
if (retval != REDIS_OK) return;
ri->pending_commands++;
} else if ((now - ri->last_pong_time) > SENTINEL_PING_PERIOD) {
if (retval == REDIS_OK) ri->pending_commands++;
} else if ((now - ri->last_pong_time) > ping_period) {
/* Send PING to all the three kinds of instances. */
retval = redisAsyncCommand(ri->cc,
sentinelPingReplyCallback, NULL, "PING");
if (retval != REDIS_OK) return;
ri->pending_commands++;
sentinelSendPing(ri);
} else if ((now - ri->last_pub_time) > SENTINEL_PUBLISH_PERIOD) {
/* PUBLISH hello messages to all the three kinds of instances. */
sentinelSendHello(ri);
@ -2306,6 +2343,11 @@ void addReplySentinelRedisInstance(redisClient *c, sentinelRedisInstance *ri) {
fields++;
}
addReplyBulkCString(c,"last-ping-sent");
addReplyBulkLongLong(c,
ri->last_ping_time ? (mstime() - ri->last_ping_time) : 0);
fields++;
addReplyBulkCString(c,"last-ok-ping-reply");
addReplyBulkLongLong(c,mstime() - ri->last_avail_time);
fields++;
@ -2805,16 +2847,23 @@ void sentinelPublishCommand(redisClient *c) {
/* Is this instance down from our point of view? */
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
mstime_t elapsed = mstime() - ri->last_avail_time;
mstime_t elapsed = 0;
if (ri->last_ping_time)
elapsed = mstime() - ri->last_ping_time;
/* Check if we are in need for a reconnection of one of the
* links, because we are detecting low activity.
*
* 1) Check if the command link seems connected, was connected not less
* than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have an
* idle time that is greater than down_after_period / 2 seconds. */
* than SENTINEL_MIN_LINK_RECONNECT_PERIOD, but still we have a
* pending ping for more than half the timeout. */
if (ri->cc &&
(mstime() - ri->cc_conn_time) > SENTINEL_MIN_LINK_RECONNECT_PERIOD &&
ri->last_ping_time != 0 && /* Ther is a pending ping... */
/* The pending ping is delayed, and we did not received
* error replies as well. */
(mstime() - ri->last_ping_time) > (ri->down_after_period/2) &&
(mstime() - ri->last_pong_time) > (ri->down_after_period/2))
{
sentinelKillLink(ri,ri->cc);
@ -3570,7 +3619,7 @@ void sentinelHandleRedisInstance(sentinelRedisInstance *ri) {
/* ========== MONITORING HALF ============ */
/* Every kind of instance */
sentinelReconnectInstance(ri);
sentinelPingInstance(ri);
sentinelSendPeriodicCommands(ri);
/* ============== ACTING HALF ============= */
/* We don't proceed with the acting half if we are in TILT mode.