diff --git a/redis.c b/redis.c index 92ae07b7..949bb58e 100644 --- a/redis.c +++ b/redis.c @@ -369,6 +369,7 @@ struct redisServer { int daemonize; int appendonly; int appendfsync; + int no_appendfsync_on_rewrite; int shutdown_asap; time_t lastfsync; int appendfd; @@ -1385,7 +1386,7 @@ void backgroundRewriteDoneHandler(int statloc) { /* If append only is actually enabled... */ close(server.appendfd); server.appendfd = fd; - fsync(fd); + if (server.appendfsync != APPENDFSYNC_NO) aof_fsync(fd); server.appendseldb = -1; /* Make sure it will issue SELECT */ redisLog(REDIS_NOTICE,"The new append only file was selected for future appends."); } else { @@ -1685,6 +1686,7 @@ static void initServerConfig() { server.daemonize = 0; server.appendonly = 0; server.appendfsync = APPENDFSYNC_EVERYSEC; + server.no_appendfsync_on_rewrite = 0; server.lastfsync = time(NULL); server.appendfd = -1; server.appendseldb = -1; /* Make sure the first time will not match */ @@ -1941,6 +1943,11 @@ static void loadServerConfig(char *filename) { } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) { zfree(server.appendfilename); server.appendfilename = zstrdup(argv[1]); + } else if (!strcasecmp(argv[0],"no-appendfsync-on-rewrite") + && argc == 2) { + if ((server.no_appendfsync_on_rewrite= yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) { if (!strcasecmp(argv[1],"no")) { server.appendfsync = APPENDFSYNC_NO; @@ -4209,7 +4216,7 @@ static int prepareForShutdown() { } if (server.appendonly) { /* Append only file: fsync() the AOF and exit */ - fsync(server.appendfd); + aof_fsync(server.appendfd); if (server.vm_enabled) unlink(server.vm_swap_file); } else { /* Snapshotting. Perform a SYNC SAVE and exit */ @@ -8266,6 +8273,11 @@ static void flushAppendOnlyFile(void) { sdsfree(server.aofbuf); server.aofbuf = sdsempty(); + /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have + * childs performing heavy I/O on disk. */ + if (server.no_appendfsync_on_rewrite && + (server.bgrewritechildpid != -1 || server.bgsavechildpid != -1)) + return; /* Fsync if needed */ now = time(NULL); if (server.appendfsync == APPENDFSYNC_ALWAYS || @@ -8704,7 +8716,7 @@ static int rewriteAppendOnlyFile(char *filename) { /* Make sure data will not remain on the OS's output buffers */ fflush(fp); - fsync(fileno(fp)); + aof_fsync(fileno(fp)); fclose(fp); /* Use RENAME to make sure the DB file is changed atomically only @@ -8821,7 +8833,7 @@ static void aofRemoveTempFile(pid_t childpid) { * at runtime using the CONFIG command. */ static void stopAppendOnly(void) { flushAppendOnlyFile(); - fsync(server.appendfd); + aof_fsync(server.appendfd); close(server.appendfd); server.appendfd = -1; @@ -9990,6 +10002,11 @@ static void configSetCommand(redisClient *c) { } else { goto badfmt; } + } else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) { + int yn = yesnotoi(o->ptr); + + if (yn == -1) goto badfmt; + server.no_appendfsync_on_rewrite = yn; } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) { int old = server.appendonly; int new = yesnotoi(o->ptr); @@ -10105,6 +10122,11 @@ static void configGetCommand(redisClient *c) { addReplyBulkCString(c,server.appendonly ? "yes" : "no"); matches++; } + if (stringmatch(pattern,"no-appendfsync-on-rewrite",0)) { + addReplyBulkCString(c,"no-appendfsync-on-rewrite"); + addReplyBulkCString(c,server.no_appendfsync_on_rewrite ? "yes" : "no"); + matches++; + } if (stringmatch(pattern,"appendfsync",0)) { char *policy; diff --git a/redis.conf b/redis.conf index c48a2fb8..b087417a 100644 --- a/redis.conf +++ b/redis.conf @@ -195,6 +195,26 @@ appendonly no appendfsync everysec # appendfsync no +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving the durability of Redis is +# the same as "appendfsync none", that in pratical terms means that it is +# possible to lost up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. +no-appendfsync-on-rewrite no + ################################ VIRTUAL MEMORY ############################### # Virtual Memory allows Redis to work with datasets bigger than the actual