mirror of
https://github.com/fluencelabs/redis
synced 2025-03-18 16:40:50 +00:00
Cluster: slave node now uses the new protocol to get elected.
This commit is contained in:
parent
656c3ffe4a
commit
fb9b76fe14
@ -257,6 +257,7 @@ void clusterInit(void) {
|
||||
server.cluster->nodes = dictCreate(&clusterNodesDictType,NULL);
|
||||
server.cluster->failover_auth_time = 0;
|
||||
server.cluster->failover_auth_count = 0;
|
||||
server.cluster->failover_auth_epoch = 0;
|
||||
memset(server.cluster->migrating_slots_to,0,
|
||||
sizeof(server.cluster->migrating_slots_to));
|
||||
memset(server.cluster->importing_slots_from,0,
|
||||
@ -1581,16 +1582,22 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) {
|
||||
*
|
||||
* The gaol of this function is:
|
||||
* 1) To check if we are able to perform a failover, is our data updated?
|
||||
* 2) Ask reachable masters the authorization to perform the failover.
|
||||
* 2) Try to get elected by masters.
|
||||
* 3) Check if there is the majority of masters agreeing we should failover.
|
||||
* 4) Perform the failover informing all the other nodes.
|
||||
*/
|
||||
void clusterHandleSlaveFailover(void) {
|
||||
time_t data_age = server.unixtime - server.repl_down_since;
|
||||
time_t auth_age = server.unixtime - server.cluster->failover_auth_time;
|
||||
mstime_t auth_age = mstime() - server.cluster->failover_auth_time;
|
||||
int needed_quorum = (server.cluster->size / 2) + 1;
|
||||
int j;
|
||||
|
||||
/* Remove the node timeout from the data age as it is fine that we are
|
||||
* disconnected from our master at least for the time it was down to be
|
||||
* flagged as FAIL, that's the baseline. */
|
||||
if (data_age > server.cluster_node_timeout)
|
||||
data_age -= server.cluster_node_timeout;
|
||||
|
||||
/* Check if our data is recent enough. For now we just use a fixed
|
||||
* constant of ten times the node timeout since the cluster should
|
||||
* react much faster to a master down. */
|
||||
@ -1598,19 +1605,37 @@ void clusterHandleSlaveFailover(void) {
|
||||
server.cluster_node_timeout * REDIS_CLUSTER_SLAVE_VALIDITY_MULT)
|
||||
return;
|
||||
|
||||
/* TODO: check if we are the first slave as well? Or just rely on the
|
||||
* master authorization? */
|
||||
|
||||
/* Ask masters if we are authorized to perform the failover. If there
|
||||
* is a pending auth request that's too old, reset it. */
|
||||
/* Compute the time at which we can start an election. */
|
||||
if (server.cluster->failover_auth_time == 0 ||
|
||||
auth_age >
|
||||
server.cluster_node_timeout * REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT)
|
||||
server.cluster_node_timeout * 1000 * REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT)
|
||||
{
|
||||
redisLog(REDIS_WARNING,"Asking masters if I can failover...");
|
||||
server.cluster->failover_auth_time = time(NULL);
|
||||
server.cluster->failover_auth_time = mstime() +
|
||||
500 + /* Fixed delay of 500 milliseconds, let FAIL msg propagate. */
|
||||
data_age * 100 + /* Add 100 milliseconds for every second of age. */
|
||||
random() % 500; /* Random delay between 0 and 500 milliseconds. */
|
||||
server.cluster->failover_auth_count = 0;
|
||||
server.cluster->failover_auth_sent = 0;
|
||||
redisLog(REDIS_WARNING,"Start of election delayed for %lld milliseconds.",
|
||||
server.cluster->failover_auth_time - mstime());
|
||||
return;
|
||||
}
|
||||
|
||||
/* Return ASAP if we can't still start the election. */
|
||||
if (mstime() < server.cluster->failover_auth_time) return;
|
||||
|
||||
/* Return ASAP if the election is too old to be valid. */
|
||||
if (mstime() - server.cluster->failover_auth_time > server.cluster_node_timeout)
|
||||
return;
|
||||
|
||||
/* Ask for votes if needed. */
|
||||
if (server.cluster->failover_auth_sent == 0) {
|
||||
server.cluster->currentEpoch++;
|
||||
server.cluster->failover_auth_epoch = server.cluster->currentEpoch;
|
||||
redisLog(REDIS_WARNING,"Starting a failover election for epoch %llu.",
|
||||
server.cluster->currentEpoch);
|
||||
clusterRequestFailoverAuth();
|
||||
server.cluster->failover_auth_sent = 1;
|
||||
return; /* Wait for replies. */
|
||||
}
|
||||
|
||||
@ -1619,7 +1644,7 @@ void clusterHandleSlaveFailover(void) {
|
||||
clusterNode *oldmaster = server.cluster->myself->slaveof;
|
||||
|
||||
redisLog(REDIS_WARNING,
|
||||
"Masters quorum reached: failing over my (failing) master.");
|
||||
"Failover election won: failing over my (failing) master.");
|
||||
/* We have the quorum, perform all the steps to correctly promote
|
||||
* this slave to a master.
|
||||
*
|
||||
@ -1644,7 +1669,10 @@ void clusterHandleSlaveFailover(void) {
|
||||
* accordingly and detect that we switched to master role. */
|
||||
clusterBroadcastPong();
|
||||
|
||||
/* 4) Update state and save config. */
|
||||
/* 4) Update my configEpoch to the epoch of the election. */
|
||||
server.cluster->myself->configEpoch = server.cluster->failover_auth_epoch;
|
||||
|
||||
/* 5) Update state and save config. */
|
||||
clusterUpdateState();
|
||||
clusterSaveConfigOrDie();
|
||||
}
|
||||
|
11
src/redis.h
11
src/redis.h
@ -368,6 +368,8 @@
|
||||
* Data types
|
||||
*----------------------------------------------------------------------------*/
|
||||
|
||||
typedef long long mstime_t; /* millisecond time type. */
|
||||
|
||||
/* A redis object, that is a type able to hold a string / list / set */
|
||||
|
||||
/* The actual Redis Object */
|
||||
@ -581,7 +583,7 @@ typedef struct redisOpArray {
|
||||
#define REDIS_CLUSTER_FAIL_UNDO_TIME_MULT 2 /* Undo fail if master is back. */
|
||||
#define REDIS_CLUSTER_FAIL_UNDO_TIME_ADD 10 /* Some additional time. */
|
||||
#define REDIS_CLUSTER_SLAVE_VALIDITY_MULT 10 /* Slave data validity. */
|
||||
#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 1 /* Auth request retry time. */
|
||||
#define REDIS_CLUSTER_FAILOVER_AUTH_RETRY_MULT 4 /* Auth request retry time. */
|
||||
#define REDIS_CLUSTER_FAILOVER_DELAY 5 /* Seconds */
|
||||
|
||||
struct clusterNode;
|
||||
@ -643,8 +645,11 @@ typedef struct {
|
||||
clusterNode *importing_slots_from[REDIS_CLUSTER_SLOTS];
|
||||
clusterNode *slots[REDIS_CLUSTER_SLOTS];
|
||||
zskiplist *slots_to_keys;
|
||||
int failover_auth_time; /* Time at which we sent the AUTH request. */
|
||||
int failover_auth_count; /* Number of authorizations received. */
|
||||
/* The following fields are used to take the slave state on elections. */
|
||||
mstime_t failover_auth_time;/* Time at which we'll try to get elected in ms. */
|
||||
int failover_auth_count; /* Number of votes received so far. */
|
||||
int failover_auth_sent; /* True if we already asked for votes. */
|
||||
uint64_t failover_auth_epoch; /* Epoch of the current election. */
|
||||
} clusterState;
|
||||
|
||||
/* Redis cluster messages header */
|
||||
|
@ -43,8 +43,6 @@ extern char **environ;
|
||||
|
||||
/* ======================== Sentinel global state =========================== */
|
||||
|
||||
typedef long long mstime_t; /* millisecond time type. */
|
||||
|
||||
/* Address object, used to describe an ip:port pair. */
|
||||
typedef struct sentinelAddr {
|
||||
char *ip;
|
||||
|
Loading…
x
Reference in New Issue
Block a user