mirror of
https://github.com/fluencelabs/redis
synced 2025-03-19 17:10:50 +00:00
Avoid simultaneous RDB and AOF child process.
This patch, written in collaboration with Oran Agra (@oranagra) is a companion to 780a8b1. Together the two patches should avoid that the AOF and RDB saving processes can be spawned at the same time. Previously conditions that could lead to two saving processes at the same time were: 1. When AOF is enabled via CONFIG SET and an RDB saving process is already active. 2. When the SYNC command decides to start an RDB saving process ASAP in order to serve a new slave that cannot partially resynchronize (but only if we have a disk target for replication, for diskless replication there is not such a problem). Condition "1" is not very severe but "2" can happen often and is definitely good at degrading Redis performances in an unexpected way. The two commits have the effect of always spawning RDB savings for replication in replicationCron() instead of attempting to start an RDB save synchronously. Moreover when a BGSAVE or AOF rewrite must be performed, they are instead just postponed using flags that will try to perform such operations ASAP. Finally the BGSAVE command was modified in order to accept a SCHEDULE option so that if an AOF rewrite is in progress, when this option is given, the command no longer returns an error, but instead schedules an RDB rewrite operation for when it will be possible to start it.
This commit is contained in:
parent
780a8b1d76
commit
0a628e5102
@ -251,7 +251,10 @@ int startAppendOnly(void) {
|
|||||||
strerror(errno));
|
strerror(errno));
|
||||||
return C_ERR;
|
return C_ERR;
|
||||||
}
|
}
|
||||||
if (rewriteAppendOnlyFileBackground() == C_ERR) {
|
if (server.rdb_child_pid != -1) {
|
||||||
|
server.aof_rewrite_scheduled = 1;
|
||||||
|
serverLog(LL_WARNING,"AOF was enabled but there is already a child process saving an RDB file on disk. An AOF background was scheduled to start when possible.");
|
||||||
|
} else if (rewriteAppendOnlyFileBackground() == C_ERR) {
|
||||||
close(server.aof_fd);
|
close(server.aof_fd);
|
||||||
serverLog(LL_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.");
|
serverLog(LL_WARNING,"Redis needs to enable the AOF but can't trigger a background AOF rewrite operation. Check the above logs for more info about the error.");
|
||||||
return C_ERR;
|
return C_ERR;
|
||||||
@ -1273,7 +1276,7 @@ int rewriteAppendOnlyFileBackground(void) {
|
|||||||
pid_t childpid;
|
pid_t childpid;
|
||||||
long long start;
|
long long start;
|
||||||
|
|
||||||
if (server.aof_child_pid != -1) return C_ERR;
|
if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
|
||||||
if (aofCreatePipes() != C_OK) return C_ERR;
|
if (aofCreatePipes() != C_OK) return C_ERR;
|
||||||
start = ustime();
|
start = ustime();
|
||||||
if ((childpid = fork()) == 0) {
|
if ((childpid = fork()) == 0) {
|
||||||
|
28
src/rdb.c
28
src/rdb.c
@ -997,7 +997,7 @@ int rdbSaveBackground(char *filename) {
|
|||||||
pid_t childpid;
|
pid_t childpid;
|
||||||
long long start;
|
long long start;
|
||||||
|
|
||||||
if (server.rdb_child_pid != -1) return C_ERR;
|
if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
|
||||||
|
|
||||||
server.dirty_before_bgsave = server.dirty;
|
server.dirty_before_bgsave = server.dirty;
|
||||||
server.lastbgsave_try = time(NULL);
|
server.lastbgsave_try = time(NULL);
|
||||||
@ -1687,7 +1687,7 @@ int rdbSaveToSlavesSockets(void) {
|
|||||||
long long start;
|
long long start;
|
||||||
int pipefds[2];
|
int pipefds[2];
|
||||||
|
|
||||||
if (server.rdb_child_pid != -1) return C_ERR;
|
if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;
|
||||||
|
|
||||||
/* Before to fork, create a pipe that will be used in order to
|
/* Before to fork, create a pipe that will be used in order to
|
||||||
* send back to the parent the IDs of the slaves that successfully
|
* send back to the parent the IDs of the slaves that successfully
|
||||||
@ -1842,11 +1842,33 @@ void saveCommand(client *c) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* BGSAVE [SCHEDULE] */
|
||||||
void bgsaveCommand(client *c) {
|
void bgsaveCommand(client *c) {
|
||||||
|
int schedule = 0;
|
||||||
|
|
||||||
|
/* The SCHEDULE option changes the behavior of BGSAVE when an AOF rewrite
|
||||||
|
* is in progress. Instead of returning an error a BGSAVE gets scheduled. */
|
||||||
|
if (c->argc > 1) {
|
||||||
|
if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"schedule")) {
|
||||||
|
schedule = 1;
|
||||||
|
} else {
|
||||||
|
addReply(c,shared.syntaxerr);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (server.rdb_child_pid != -1) {
|
if (server.rdb_child_pid != -1) {
|
||||||
addReplyError(c,"Background save already in progress");
|
addReplyError(c,"Background save already in progress");
|
||||||
} else if (server.aof_child_pid != -1) {
|
} else if (server.aof_child_pid != -1) {
|
||||||
addReplyError(c,"Can't BGSAVE while AOF log rewriting is in progress");
|
if (schedule) {
|
||||||
|
server.rdb_bgsave_scheduled = 1;
|
||||||
|
addReplyStatus(c,"Background saving scheduled");
|
||||||
|
} else {
|
||||||
|
addReplyError(c,
|
||||||
|
"An AOF log rewriting in progress: can't BGSAVE right now. "
|
||||||
|
"Use BGSAVE SCHEDULE in order to schedule a BGSAVE whenver "
|
||||||
|
"possible.");
|
||||||
|
}
|
||||||
} else if (rdbSaveBackground(server.rdb_filename) == C_OK) {
|
} else if (rdbSaveBackground(server.rdb_filename) == C_OK) {
|
||||||
addReplyStatus(c,"Background saving started");
|
addReplyStatus(c,"Background saving started");
|
||||||
} else {
|
} else {
|
||||||
|
23
src/server.c
23
src/server.c
@ -233,7 +233,7 @@ struct redisCommand redisCommandTable[] = {
|
|||||||
{"ping",pingCommand,-1,"tF",0,NULL,0,0,0,0,0},
|
{"ping",pingCommand,-1,"tF",0,NULL,0,0,0,0,0},
|
||||||
{"echo",echoCommand,2,"F",0,NULL,0,0,0,0,0},
|
{"echo",echoCommand,2,"F",0,NULL,0,0,0,0,0},
|
||||||
{"save",saveCommand,1,"as",0,NULL,0,0,0,0,0},
|
{"save",saveCommand,1,"as",0,NULL,0,0,0,0,0},
|
||||||
{"bgsave",bgsaveCommand,1,"a",0,NULL,0,0,0,0,0},
|
{"bgsave",bgsaveCommand,-1,"a",0,NULL,0,0,0,0,0},
|
||||||
{"bgrewriteaof",bgrewriteaofCommand,1,"a",0,NULL,0,0,0,0,0},
|
{"bgrewriteaof",bgrewriteaofCommand,1,"a",0,NULL,0,0,0,0,0},
|
||||||
{"shutdown",shutdownCommand,-1,"alt",0,NULL,0,0,0,0,0},
|
{"shutdown",shutdownCommand,-1,"alt",0,NULL,0,0,0,0,0},
|
||||||
{"lastsave",lastsaveCommand,1,"RF",0,NULL,0,0,0,0,0},
|
{"lastsave",lastsaveCommand,1,"RF",0,NULL,0,0,0,0,0},
|
||||||
@ -1113,8 +1113,8 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
|||||||
/* Clear the paused clients flag if needed. */
|
/* Clear the paused clients flag if needed. */
|
||||||
clientsArePaused(); /* Don't check return value, just use the side effect. */
|
clientsArePaused(); /* Don't check return value, just use the side effect. */
|
||||||
|
|
||||||
/* Replication cron function -- used to reconnect to master and
|
/* Replication cron function -- used to reconnect to master,
|
||||||
* to detect transfer failures. */
|
* detect transfer failures, start background RDB transfers and so forth. */
|
||||||
run_with_period(1000) replicationCron();
|
run_with_period(1000) replicationCron();
|
||||||
|
|
||||||
/* Run the Redis Cluster cron. */
|
/* Run the Redis Cluster cron. */
|
||||||
@ -1132,6 +1132,22 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
|||||||
migrateCloseTimedoutSockets();
|
migrateCloseTimedoutSockets();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Start a scheduled BGSAVE if the corresponding flag is set. This is
|
||||||
|
* useful when we are forced to postpone a BGSAVE because an AOF
|
||||||
|
* rewrite is in progress.
|
||||||
|
*
|
||||||
|
* Note: this code must be after the replicationCron() call above so
|
||||||
|
* make sure when refactoring this file to keep this order. This is useful
|
||||||
|
* because we want to give priority to RDB savings for replication. */
|
||||||
|
if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
|
||||||
|
server.rdb_bgsave_scheduled &&
|
||||||
|
(server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
|
||||||
|
server.lastbgsave_status == C_OK))
|
||||||
|
{
|
||||||
|
if (rdbSaveBackground(server.rdb_filename) == C_OK)
|
||||||
|
server.rdb_bgsave_scheduled = 0;
|
||||||
|
}
|
||||||
|
|
||||||
server.cronloops++;
|
server.cronloops++;
|
||||||
return 1000/server.hz;
|
return 1000/server.hz;
|
||||||
}
|
}
|
||||||
@ -1762,6 +1778,7 @@ void initServer(void) {
|
|||||||
server.rdb_child_pid = -1;
|
server.rdb_child_pid = -1;
|
||||||
server.aof_child_pid = -1;
|
server.aof_child_pid = -1;
|
||||||
server.rdb_child_type = RDB_CHILD_TYPE_NONE;
|
server.rdb_child_type = RDB_CHILD_TYPE_NONE;
|
||||||
|
server.rdb_bgsave_scheduled = 0;
|
||||||
aofRewriteBufferReset();
|
aofRewriteBufferReset();
|
||||||
server.aof_buf = sdsempty();
|
server.aof_buf = sdsempty();
|
||||||
server.lastsave = time(NULL); /* At startup we consider the DB saved. */
|
server.lastsave = time(NULL); /* At startup we consider the DB saved. */
|
||||||
|
@ -918,6 +918,7 @@ struct redisServer {
|
|||||||
time_t lastbgsave_try; /* Unix time of last attempted bgsave */
|
time_t lastbgsave_try; /* Unix time of last attempted bgsave */
|
||||||
time_t rdb_save_time_last; /* Time used by last RDB save run. */
|
time_t rdb_save_time_last; /* Time used by last RDB save run. */
|
||||||
time_t rdb_save_time_start; /* Current RDB save start time. */
|
time_t rdb_save_time_start; /* Current RDB save start time. */
|
||||||
|
int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */
|
||||||
int rdb_child_type; /* Type of save by active child. */
|
int rdb_child_type; /* Type of save by active child. */
|
||||||
int lastbgsave_status; /* C_OK or C_ERR */
|
int lastbgsave_status; /* C_OK or C_ERR */
|
||||||
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
|
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user