Sentinel: always send CONFIG REWRITE when changing instance role.

This change makes Sentinel less fragile about a number of failure modes.

This commit also fixes a different bug as a side effect, SLAVEOF command
was sent multiple times without incrementing the pending commands count.
This commit is contained in:
antirez 2013-11-05 17:23:11 +01:00
parent ebcb6251e6
commit 671c1dfb56
2 changed files with 41 additions and 25 deletions

View File

@ -350,7 +350,7 @@ void scanCallback(void *privdata, const dictEntry *de) {
if (val) listAddNodeTail(keys, val); if (val) listAddNodeTail(keys, val);
} }
/* Try to parse a SCAN cursor stored ad object 'o': /* Try to parse a SCAN cursor stored at object 'o':
* if the cursor is valid, store it as unsigned integer into *cursor and * if the cursor is valid, store it as unsigned integer into *cursor and
* returns REDIS_OK. Otherwise return REDIS_ERR and send an error to the * returns REDIS_OK. Otherwise return REDIS_ERR and send an error to the
* client. */ * client. */

View File

@ -1518,8 +1518,7 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
(mstime() - ri->master->info_refresh) < SENTINEL_INFO_PERIOD*2) (mstime() - ri->master->info_refresh) < SENTINEL_INFO_PERIOD*2)
{ {
int retval; int retval;
retval = redisAsyncCommand(ri->cc, retval = sentinelSendSlaveOf(ri,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %d",
ri->master->addr->ip, ri->master->addr->ip,
ri->master->addr->port); ri->master->addr->port);
if (retval == REDIS_OK) if (retval == REDIS_OK)
@ -2521,6 +2520,39 @@ char *sentinelGetObjectiveLeader(sentinelRedisInstance *master) {
return winner; return winner;
} }
/* Send SLAVEOF to the specified instance, always followed by a
* CONFIG REWRITE command in order to store the new configuration on disk
* when possible (that is, if the Redis instance is recent enough to support
* config rewriting, and if the server was started with a configuration file).
*
* If Host is NULL the function sends "SLAVEOF NO ONE".
*
* The command returns REDIS_OK if the SLAVEOF command was accepted for
* (later) delivery otherwise REDIS_ERR. The command replies are just
* discarded. */
int sentinelSendSlaveOf(sentinelRedisInstance *ri, char *host, int port) {
char portstr[32];
ll2string(portstr,sizeof(portstr),port);
if (host == NULL) {
host = "NO";
memcpy(portstr,"ONE",4);
}
retval = redisAsyncCommand(ri->cc,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s", host, portstr);
if (retval == REDIS_ERR) return retval;
ri->pending_commands++;
if (redisAsyncCommand(ri->cc,
sentinelDiscardReplyCallback, NULL, "CONFIG REWRITE") == REDIS_OK)
{
ri->pending_commands++;
}
return REDIS_OK;
}
/* Setup the master state to start a failover as a leader. /* Setup the master state to start a failover as a leader.
* *
* State can be either: * State can be either:
@ -2750,10 +2782,8 @@ void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
* We actually register a generic callback for this command as we don't * We actually register a generic callback for this command as we don't
* really care about the reply. We check if it worked indirectly observing * really care about the reply. We check if it worked indirectly observing
* if INFO returns a different role (master instead of slave). */ * if INFO returns a different role (master instead of slave). */
retval = redisAsyncCommand(ri->promoted_slave->cc, retval = sentinelSendSlaveOf(ri->promoted_slave,NULL,0);
sentinelDiscardReplyCallback, NULL, "SLAVEOF NO ONE");
if (retval != REDIS_OK) return; if (retval != REDIS_OK) return;
ri->promoted_slave->pending_commands++;
sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion", sentinelEvent(REDIS_NOTICE, "+failover-state-wait-promotion",
ri->promoted_slave,"%@"); ri->promoted_slave,"%@");
ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION; ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
@ -2823,10 +2853,6 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
if (timeout && (master->flags & SRI_I_AM_THE_LEADER)) { if (timeout && (master->flags & SRI_I_AM_THE_LEADER)) {
dictIterator *di; dictIterator *di;
dictEntry *de; dictEntry *de;
char master_port[32];
ll2string(master_port,sizeof(master_port),
master->promoted_slave->addr->port);
di = dictGetIterator(master->slaves); di = dictGetIterator(master->slaves);
while((de = dictNext(di)) != NULL) { while((de = dictNext(di)) != NULL) {
@ -2836,10 +2862,9 @@ void sentinelFailoverDetectEnd(sentinelRedisInstance *master) {
if (slave->flags & if (slave->flags &
(SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue; (SRI_RECONF_DONE|SRI_RECONF_SENT|SRI_DISCONNECTED)) continue;
retval = redisAsyncCommand(slave->cc, retval = sentinelSendSlaveOf(slave,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
master->promoted_slave->addr->ip, master->promoted_slave->addr->ip,
master_port); master->promoted_slave->addr->port);
if (retval == REDIS_OK) { if (retval == REDIS_OK) {
sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@"); sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent-be",slave,"%@");
slave->flags |= SRI_RECONF_SENT; slave->flags |= SRI_RECONF_SENT;
@ -2892,15 +2917,11 @@ void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
continue; continue;
/* Send SLAVEOF <new master>. */ /* Send SLAVEOF <new master>. */
ll2string(master_port,sizeof(master_port), retval = sentinelSendSlaveOf(slave,
master->promoted_slave->addr->port);
retval = redisAsyncCommand(slave->cc,
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
master->promoted_slave->addr->ip, master->promoted_slave->addr->ip,
master_port); master->promoted_slave->addr->port);
if (retval == REDIS_OK) { if (retval == REDIS_OK) {
slave->flags |= SRI_RECONF_SENT; slave->flags |= SRI_RECONF_SENT;
slave->pending_commands++;
slave->slave_reconf_sent_time = mstime(); slave->slave_reconf_sent_time = mstime();
sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@"); sentinelEvent(REDIS_NOTICE,"+slave-reconf-sent",slave,"%@");
in_progress++; in_progress++;
@ -2982,13 +3003,11 @@ void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
* back to the master as well, sending a best effort SLAVEOF command. * back to the master as well, sending a best effort SLAVEOF command.
*/ */
void sentinelAbortFailover(sentinelRedisInstance *ri) { void sentinelAbortFailover(sentinelRedisInstance *ri) {
char master_port[32];
dictIterator *di; dictIterator *di;
dictEntry *de; dictEntry *de;
int sentinel_role; int sentinel_role;
redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS); redisAssert(ri->flags & SRI_FAILOVER_IN_PROGRESS);
ll2string(master_port,sizeof(master_port),ri->addr->port);
/* Clear failover related flags from slaves. /* Clear failover related flags from slaves.
* Also if we are the leader make sure to send SLAVEOF commands to all the * Also if we are the leader make sure to send SLAVEOF commands to all the
@ -3004,10 +3023,7 @@ void sentinelAbortFailover(sentinelRedisInstance *ri) {
{ {
int retval; int retval;
retval = redisAsyncCommand(slave->cc, retval = sentinelSendSlaveOf(slave,ri->addr->ip,ri->addr->port);
sentinelDiscardReplyCallback, NULL, "SLAVEOF %s %s",
ri->addr->ip,
master_port);
if (retval == REDIS_OK) if (retval == REDIS_OK)
sentinelEvent(REDIS_NOTICE,"-slave-reconf-undo",slave,"%@"); sentinelEvent(REDIS_NOTICE,"-slave-reconf-undo",slave,"%@");
} }