From 38db91713c81b58e97496abbe35a9d14d1e423e0 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 28 May 2010 10:35:23 +0200 Subject: [PATCH 1/3] added new option no-appendfsync-on-rewrite to avoid blocking on fsync() in the main thread while a background process is doing big I/O --- redis.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/redis.c b/redis.c index 639b71df..262e3508 100644 --- a/redis.c +++ b/redis.c @@ -369,6 +369,7 @@ struct redisServer { int daemonize; int appendonly; int appendfsync; + int no_appendfsync_on_rewrite; int shutdown_asap; time_t lastfsync; int appendfd; @@ -1685,6 +1686,7 @@ static void initServerConfig() { server.daemonize = 0; server.appendonly = 0; server.appendfsync = APPENDFSYNC_EVERYSEC; + server.no_appendfsync_on_rewrite = 0; server.lastfsync = time(NULL); server.appendfd = -1; server.appendseldb = -1; /* Make sure the first time will not match */ @@ -1941,6 +1943,11 @@ static void loadServerConfig(char *filename) { } else if (!strcasecmp(argv[0],"appendfilename") && argc == 2) { zfree(server.appendfilename); server.appendfilename = zstrdup(argv[1]); + } else if (!strcasecmp(argv[0],"no-appendfsync-on-rewrite") + && argc == 2) { + if ((server.no_appendfsync_on_rewrite= yesnotoi(argv[1])) == -1) { + err = "argument must be 'yes' or 'no'"; goto loaderr; + } } else if (!strcasecmp(argv[0],"appendfsync") && argc == 2) { if (!strcasecmp(argv[1],"no")) { server.appendfsync = APPENDFSYNC_NO; @@ -8236,6 +8243,11 @@ static void flushAppendOnlyFile(void) { sdsfree(server.aofbuf); server.aofbuf = sdsempty(); + /* Don't Fsync if no-appendfsync-on-rewrite is set to yes and we have + * childs performing heavy I/O on disk. */ + if (server.no_appendfsync_on_rewrite && + (server.bgrewritechildpid != -1 || server.bgsavechildpid != -1)) + return; /* Fsync if needed */ now = time(NULL); if (server.appendfsync == APPENDFSYNC_ALWAYS || @@ -9960,6 +9972,11 @@ static void configSetCommand(redisClient *c) { } else { goto badfmt; } + } else if (!strcasecmp(c->argv[2]->ptr,"no-appendfsync-on-rewrite")) { + int yn = yesnotoi(o->ptr); + + if (yn == -1) goto badfmt; + server.no_appendfsync_on_rewrite = yn; } else if (!strcasecmp(c->argv[2]->ptr,"appendonly")) { int old = server.appendonly; int new = yesnotoi(o->ptr); @@ -10075,6 +10092,11 @@ static void configGetCommand(redisClient *c) { addReplyBulkCString(c,server.appendonly ? "yes" : "no"); matches++; } + if (stringmatch(pattern,"no-appendfsync-on-rewrite",0)) { + addReplyBulkCString(c,"no-appendfsync-on-rewrite"); + addReplyBulkCString(c,server.no_appendfsync_on_rewrite ? "yes" : "no"); + matches++; + } if (stringmatch(pattern,"appendfsync",0)) { char *policy; From b0bd87f60cdeba2ae215b22978331987e5a96d38 Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 28 May 2010 10:40:53 +0200 Subject: [PATCH 2/3] don't fsync after a rewrite if appendfsync is set to no. use aof_fsycn instead of fsync where appropriate --- redis.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/redis.c b/redis.c index 262e3508..f747c046 100644 --- a/redis.c +++ b/redis.c @@ -1386,7 +1386,7 @@ void backgroundRewriteDoneHandler(int statloc) { /* If append only is actually enabled... */ close(server.appendfd); server.appendfd = fd; - fsync(fd); + if (appendfsync != APPENDFSYNC_NO) aof_fsync(fd); server.appendseldb = -1; /* Make sure it will issue SELECT */ redisLog(REDIS_NOTICE,"The new append only file was selected for future appends."); } else { @@ -4200,7 +4200,7 @@ static int prepareForShutdown() { } if (server.appendonly) { /* Append only file: fsync() the AOF and exit */ - fsync(server.appendfd); + aof_fsync(server.appendfd); if (server.vm_enabled) unlink(server.vm_swap_file); } else { /* Snapshotting. Perform a SYNC SAVE and exit */ @@ -8686,7 +8686,7 @@ static int rewriteAppendOnlyFile(char *filename) { /* Make sure data will not remain on the OS's output buffers */ fflush(fp); - fsync(fileno(fp)); + aof_fsync(fileno(fp)); fclose(fp); /* Use RENAME to make sure the DB file is changed atomically only @@ -8803,7 +8803,7 @@ static void aofRemoveTempFile(pid_t childpid) { * at runtime using the CONFIG command. */ static void stopAppendOnly(void) { flushAppendOnlyFile(); - fsync(server.appendfd); + aof_fsync(server.appendfd); close(server.appendfd); server.appendfd = -1; From d5d23dabd7a72c63ceda017d560c362b4065d85b Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 28 May 2010 10:48:04 +0200 Subject: [PATCH 3/3] redis.conf new features the new option, a minor typo preventing the compilation fixed --- redis.c | 2 +- redis.conf | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/redis.c b/redis.c index f747c046..8c4e3ab2 100644 --- a/redis.c +++ b/redis.c @@ -1386,7 +1386,7 @@ void backgroundRewriteDoneHandler(int statloc) { /* If append only is actually enabled... */ close(server.appendfd); server.appendfd = fd; - if (appendfsync != APPENDFSYNC_NO) aof_fsync(fd); + if (server.appendfsync != APPENDFSYNC_NO) aof_fsync(fd); server.appendseldb = -1; /* Make sure it will issue SELECT */ redisLog(REDIS_NOTICE,"The new append only file was selected for future appends."); } else { diff --git a/redis.conf b/redis.conf index c48a2fb8..b087417a 100644 --- a/redis.conf +++ b/redis.conf @@ -195,6 +195,26 @@ appendonly no appendfsync everysec # appendfsync no +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving the durability of Redis is +# the same as "appendfsync none", that in pratical terms means that it is +# possible to lost up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. +no-appendfsync-on-rewrite no + ################################ VIRTUAL MEMORY ############################### # Virtual Memory allows Redis to work with datasets bigger than the actual